]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/lock.c
383bf774b8604213bbdfe97a3fb24821d46b41db
[apple/xnu.git] / osfmk / kern / lock.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_COPYRIGHT@
24 */
25 /*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50 /*
51 * File: kern/lock.c
52 * Author: Avadis Tevanian, Jr., Michael Wayne Young
53 * Date: 1985
54 *
55 * Locking primitives implementation
56 */
57
58 #include <cpus.h>
59 #include <mach_kdb.h>
60 #include <mach_ldebug.h>
61
62 #include <kern/lock.h>
63 #include <kern/etap_macros.h>
64 #include <kern/misc_protos.h>
65 #include <kern/thread.h>
66 #include <kern/sched_prim.h>
67 #include <kern/xpr.h>
68 #include <kern/debug.h>
69 #include <string.h>
70
71 #if MACH_KDB
72 #include <ddb/db_command.h>
73 #include <ddb/db_output.h>
74 #include <ddb/db_sym.h>
75 #include <ddb/db_print.h>
76 #endif /* MACH_KDB */
77
78 #ifdef __ppc__
79 #include <ppc/Firmware.h>
80 #include <ppc/POWERMAC/mp/MPPlugIn.h>
81 #endif
82
83 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
84
85 /*
86 * Some portions of the lock debugging code must run with
87 * interrupts disabled. This can be machine-dependent,
88 * but we don't have any good hooks for that at the moment.
89 * If your architecture is different, add a machine-dependent
90 * ifdef here for these macros. XXX
91 */
92
93 #define DISABLE_INTERRUPTS(s) s = ml_set_interrupts_enabled(FALSE)
94 #define ENABLE_INTERRUPTS(s) (void)ml_set_interrupts_enabled(s)
95
96 #if NCPUS > 1
97 /* Time we loop without holding the interlock.
98 * The former is for when we cannot sleep, the latter
99 * for when our thread can go to sleep (loop less)
100 * we shouldn't retake the interlock at all frequently
101 * if we cannot go to sleep, since it interferes with
102 * any other processors. In particular, 100 is too small
103 * a number for powerpc MP systems because of cache
104 * coherency issues and differing lock fetch times between
105 * the processors
106 */
107 unsigned int lock_wait_time[2] = { (unsigned int)-1, 100 } ;
108 #else /* NCPUS > 1 */
109
110 /*
111 * It is silly to spin on a uni-processor as if we
112 * thought something magical would happen to the
113 * want_write bit while we are executing.
114 */
115
116 unsigned int lock_wait_time[2] = { 0, 0 };
117 #endif /* NCPUS > 1 */
118
119 /* Forwards */
120
121 #if MACH_KDB
122 void db_print_simple_lock(
123 simple_lock_t addr);
124
125 void db_print_mutex(
126 mutex_t * addr);
127 #endif /* MACH_KDB */
128
129
130 #if USLOCK_DEBUG
131 /*
132 * Perform simple lock checks.
133 */
134 int uslock_check = 1;
135 int max_lock_loops = 100000000;
136 decl_simple_lock_data(extern , printf_lock)
137 decl_simple_lock_data(extern , panic_lock)
138 #if MACH_KDB && NCPUS > 1
139 decl_simple_lock_data(extern , kdb_lock)
140 #endif /* MACH_KDB && NCPUS >1 */
141 #endif /* USLOCK_DEBUG */
142
143
144 /*
145 * We often want to know the addresses of the callers
146 * of the various lock routines. However, this information
147 * is only used for debugging and statistics.
148 */
149 typedef void *pc_t;
150 #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
151 #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
152 #if ANY_LOCK_DEBUG || ETAP_LOCK_TRACE
153 #define OBTAIN_PC(pc,l) ((pc) = (void *) GET_RETURN_PC(&(l)))
154 #else /* ANY_LOCK_DEBUG || ETAP_LOCK_TRACE */
155 #ifdef lint
156 /*
157 * Eliminate lint complaints about unused local pc variables.
158 */
159 #define OBTAIN_PC(pc,l) ++pc
160 #else /* lint */
161 #define OBTAIN_PC(pc,l)
162 #endif /* lint */
163 #endif /* USLOCK_DEBUG || ETAP_LOCK_TRACE */
164
165
166 /* #ifndef USIMPLE_LOCK_CALLS
167 * The i386 production version of usimple_locks isn't ready yet.
168 */
169 /*
170 * Portable lock package implementation of usimple_locks.
171 */
172
173 #if ETAP_LOCK_TRACE
174 #define ETAPCALL(stmt) stmt
175 void etap_simplelock_init(simple_lock_t, etap_event_t);
176 void etap_simplelock_unlock(simple_lock_t);
177 void etap_simplelock_hold(simple_lock_t, pc_t, etap_time_t);
178 etap_time_t etap_simplelock_miss(simple_lock_t);
179
180 void etap_mutex_init(mutex_t*, etap_event_t);
181 void etap_mutex_unlock(mutex_t*);
182 void etap_mutex_hold(mutex_t*, pc_t, etap_time_t);
183 etap_time_t etap_mutex_miss(mutex_t*);
184 #else /* ETAP_LOCK_TRACE */
185 #define ETAPCALL(stmt)
186 #endif /* ETAP_LOCK_TRACE */
187
188 #if USLOCK_DEBUG
189 #define USLDBG(stmt) stmt
190 void usld_lock_init(usimple_lock_t, etap_event_t);
191 void usld_lock_pre(usimple_lock_t, pc_t);
192 void usld_lock_post(usimple_lock_t, pc_t);
193 void usld_unlock(usimple_lock_t, pc_t);
194 void usld_lock_try_pre(usimple_lock_t, pc_t);
195 void usld_lock_try_post(usimple_lock_t, pc_t);
196 void usld_lock_held(usimple_lock_t);
197 void usld_lock_none_held(void);
198 int usld_lock_common_checks(usimple_lock_t, char *);
199 #else /* USLOCK_DEBUG */
200 #define USLDBG(stmt)
201 #endif /* USLOCK_DEBUG */
202
203 /*
204 * Initialize a usimple_lock.
205 *
206 * No change in preemption state.
207 */
208 void
209 usimple_lock_init(
210 usimple_lock_t l,
211 etap_event_t event)
212 {
213 USLDBG(usld_lock_init(l, event));
214 ETAPCALL(etap_simplelock_init((l),(event)));
215 hw_lock_init(&l->interlock);
216 }
217
218
219 /*
220 * Acquire a usimple_lock.
221 *
222 * Returns with preemption disabled. Note
223 * that the hw_lock routines are responsible for
224 * maintaining preemption state.
225 */
226 void
227 usimple_lock(
228 usimple_lock_t l)
229 {
230 int i;
231 pc_t pc;
232 #if ETAP_LOCK_TRACE
233 etap_time_t start_wait_time;
234 int no_miss_info = 0;
235 #endif /* ETAP_LOCK_TRACE */
236 #if USLOCK_DEBUG
237 int count = 0;
238 #endif /* USLOCK_DEBUG */
239
240 OBTAIN_PC(pc, l);
241 USLDBG(usld_lock_pre(l, pc));
242 #if ETAP_LOCK_TRACE
243 ETAP_TIME_CLEAR(start_wait_time);
244 #endif /* ETAP_LOCK_TRACE */
245
246 #ifdef __ppc__
247 if(!hw_lock_to(&l->interlock, LockTimeOut)) { /* Try to get the lock with a timeout */
248
249 panic("simple lock deadlock detection - l=%08X, cpu=%d, ret=%08X", l, cpu_number(), pc);
250
251 #else /* __ppc__ */
252 while (!hw_lock_try(&l->interlock)) {
253 ETAPCALL(if (no_miss_info++ == 0)
254 start_wait_time = etap_simplelock_miss(l));
255 while (hw_lock_held(&l->interlock)) {
256 /*
257 * Spin watching the lock value in cache,
258 * without consuming external bus cycles.
259 * On most SMP architectures, the atomic
260 * instruction(s) used by hw_lock_try
261 * cost much, much more than an ordinary
262 * memory read.
263 */
264 #if USLOCK_DEBUG
265 if (count++ > max_lock_loops
266 #if MACH_KDB && NCPUS > 1
267 && l != &kdb_lock
268 #endif /* MACH_KDB && NCPUS > 1 */
269 ) {
270 if (l == &printf_lock) {
271 return;
272 }
273 mp_disable_preemption();
274 panic("simple lock deadlock detection - l=%08X (=%08X), cpu=%d, ret=%08X",
275 l, *hw_lock_addr(l->interlock), cpu_number(), pc);
276 count = 0;
277 mp_enable_preemption();
278 }
279 #endif /* USLOCK_DEBUG */
280 }
281 #endif /* 0 */
282 }
283 ETAPCALL(etap_simplelock_hold(l, pc, start_wait_time));
284 USLDBG(usld_lock_post(l, pc));
285 }
286
287
288 /*
289 * Release a usimple_lock.
290 *
291 * Returns with preemption enabled. Note
292 * that the hw_lock routines are responsible for
293 * maintaining preemption state.
294 */
295 void
296 usimple_unlock(
297 usimple_lock_t l)
298 {
299 pc_t pc;
300
301 // checkNMI(); /* (TEST/DEBUG) */
302
303 OBTAIN_PC(pc, l);
304 USLDBG(usld_unlock(l, pc));
305 ETAPCALL(etap_simplelock_unlock(l));
306 hw_lock_unlock(&l->interlock);
307 }
308
309
310 /*
311 * Conditionally acquire a usimple_lock.
312 *
313 * On success, returns with preemption disabled.
314 * On failure, returns with preemption in the same state
315 * as when first invoked. Note that the hw_lock routines
316 * are responsible for maintaining preemption state.
317 *
318 * XXX No stats are gathered on a miss; I preserved this
319 * behavior from the original assembly-language code, but
320 * doesn't it make sense to log misses? XXX
321 */
322 unsigned int
323 usimple_lock_try(
324 usimple_lock_t l)
325 {
326 pc_t pc;
327 unsigned int success;
328 etap_time_t zero_time;
329
330 OBTAIN_PC(pc, l);
331 USLDBG(usld_lock_try_pre(l, pc));
332 if (success = hw_lock_try(&l->interlock)) {
333 USLDBG(usld_lock_try_post(l, pc));
334 ETAP_TIME_CLEAR(zero_time);
335 ETAPCALL(etap_simplelock_hold(l, pc, zero_time));
336 }
337 return success;
338 }
339
340 #if ETAP_LOCK_TRACE
341 void
342 simple_lock_no_trace(
343 simple_lock_t l)
344 {
345 pc_t pc;
346
347 OBTAIN_PC(pc, l);
348 USLDBG(usld_lock_pre(l, pc));
349 while (!hw_lock_try(&l->interlock)) {
350 while (hw_lock_held(&l->interlock)) {
351 /*
352 * Spin watching the lock value in cache,
353 * without consuming external bus cycles.
354 * On most SMP architectures, the atomic
355 * instruction(s) used by hw_lock_try
356 * cost much, much more than an ordinary
357 * memory read.
358 */
359 }
360 }
361 USLDBG(usld_lock_post(l, pc));
362 }
363
364 void
365 simple_unlock_no_trace(
366 simple_lock_t l)
367 {
368 pc_t pc;
369
370 OBTAIN_PC(pc, l);
371 USLDBG(usld_unlock(l, pc));
372 hw_lock_unlock(&l->interlock);
373 }
374
375 int
376 simple_lock_try_no_trace(
377 simple_lock_t l)
378 {
379 pc_t pc;
380 unsigned int success;
381
382 OBTAIN_PC(pc, l);
383 USLDBG(usld_lock_try_pre(l, pc));
384 if (success = hw_lock_try(&l->interlock)) {
385 USLDBG(usld_lock_try_post(l, pc));
386 }
387 return success;
388 }
389 #endif /* ETAP_LOCK_TRACE */
390
391
392 #if USLOCK_DEBUG
393 /*
394 * Verify that the lock is locked and owned by
395 * the current thread.
396 */
397 void
398 usimple_lock_held(
399 usimple_lock_t l)
400 {
401 usld_lock_held(l);
402 }
403
404
405 /*
406 * Verify that no usimple_locks are held by
407 * this processor. Typically used in a
408 * trap handler when returning to user mode
409 * or in a path known to relinquish the processor.
410 */
411 void
412 usimple_lock_none_held(void)
413 {
414 usld_lock_none_held();
415 }
416 #endif /* USLOCK_DEBUG */
417
418
419 #if USLOCK_DEBUG
420 /*
421 * States of a usimple_lock. The default when initializing
422 * a usimple_lock is setting it up for debug checking.
423 */
424 #define USLOCK_CHECKED 0x0001 /* lock is being checked */
425 #define USLOCK_TAKEN 0x0002 /* lock has been taken */
426 #define USLOCK_INIT 0xBAA0 /* lock has been initialized */
427 #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
428 #define USLOCK_CHECKING(l) (uslock_check && \
429 ((l)->debug.state & USLOCK_CHECKED))
430
431 /*
432 * Maintain a per-cpu stack of acquired usimple_locks.
433 */
434 void usl_stack_push(usimple_lock_t, int);
435 void usl_stack_pop(usimple_lock_t, int);
436
437 /*
438 * Trace activities of a particularly interesting lock.
439 */
440 void usl_trace(usimple_lock_t, int, pc_t, const char *);
441
442
443 /*
444 * Initialize the debugging information contained
445 * in a usimple_lock.
446 */
447 void
448 usld_lock_init(
449 usimple_lock_t l,
450 etap_event_t type)
451 {
452 if (l == USIMPLE_LOCK_NULL)
453 panic("lock initialization: null lock pointer");
454 l->lock_type = USLOCK_TAG;
455 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
456 l->debug.lock_cpu = l->debug.unlock_cpu = 0;
457 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
458 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
459 l->debug.duration[0] = l->debug.duration[1] = 0;
460 l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
461 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
462 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
463 }
464
465
466 /*
467 * These checks apply to all usimple_locks, not just
468 * those with USLOCK_CHECKED turned on.
469 */
470 int
471 usld_lock_common_checks(
472 usimple_lock_t l,
473 char *caller)
474 {
475 if (l == USIMPLE_LOCK_NULL)
476 panic("%s: null lock pointer", caller);
477 if (l->lock_type != USLOCK_TAG)
478 panic("%s: 0x%x is not a usimple lock", caller, (integer_t) l);
479 if (!(l->debug.state & USLOCK_INIT))
480 panic("%s: 0x%x is not an initialized lock",
481 caller, (integer_t) l);
482 return USLOCK_CHECKING(l);
483 }
484
485
486 /*
487 * Debug checks on a usimple_lock just before attempting
488 * to acquire it.
489 */
490 /* ARGSUSED */
491 void
492 usld_lock_pre(
493 usimple_lock_t l,
494 pc_t pc)
495 {
496 char *caller = "usimple_lock";
497
498
499 #if 0
500 printf("*** %08X %08X %04X %02X %08X %02X %08X - %s\n", /* (TEST/DEBUG) */
501 l->debug.lock_pc,
502 l->debug.lock_thread,
503 l->debug.state,
504 l->debug.lock_cpu,
505 l->debug.unlock_thread,
506 l->debug.unlock_cpu,
507 l->debug.unlock_pc,
508 caller);
509 #endif
510
511 if (!usld_lock_common_checks(l, caller))
512 return;
513
514 /*
515 * Note that we have a weird case where we are getting a lock when we are]
516 * in the process of putting the system to sleep. We are running with no
517 * current threads, therefore we can't tell if we are trying to retake a lock
518 * we have or someone on the other processor has it. Therefore we just
519 * ignore this test if the locking thread is 0.
520 */
521
522 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
523 l->debug.lock_thread == (void *) current_thread()) {
524 printf("%s: lock 0x%x already locked (at 0x%x) by",
525 caller, (integer_t) l, l->debug.lock_pc);
526 printf(" current thread 0x%x (new attempt at pc 0x%x)\n",
527 l->debug.lock_thread, pc);
528 panic(caller);
529 }
530 mp_disable_preemption();
531 usl_trace(l, cpu_number(), pc, caller);
532 mp_enable_preemption();
533 }
534
535
536 /*
537 * Debug checks on a usimple_lock just after acquiring it.
538 *
539 * Pre-emption has been disabled at this point,
540 * so we are safe in using cpu_number.
541 */
542 void
543 usld_lock_post(
544 usimple_lock_t l,
545 pc_t pc)
546 {
547 register int mycpu;
548 char *caller = "successful usimple_lock";
549
550
551 #if 0
552 printf("*** %08X %08X %04X %02X %08X %02X %08X - %s\n", /* (TEST/DEBUG) */
553 l->debug.lock_pc,
554 l->debug.lock_thread,
555 l->debug.state,
556 l->debug.lock_cpu,
557 l->debug.unlock_thread,
558 l->debug.unlock_cpu,
559 l->debug.unlock_pc,
560 caller);
561 #endif
562
563 if (!usld_lock_common_checks(l, caller))
564 return;
565
566 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
567 panic("%s: lock 0x%x became uninitialized",
568 caller, (integer_t) l);
569 if ((l->debug.state & USLOCK_TAKEN))
570 panic("%s: lock 0x%x became TAKEN by someone else",
571 caller, (integer_t) l);
572
573 mycpu = cpu_number();
574 l->debug.lock_thread = (void *)current_thread();
575 l->debug.state |= USLOCK_TAKEN;
576 l->debug.lock_pc = pc;
577 l->debug.lock_cpu = mycpu;
578
579 usl_stack_push(l, mycpu);
580 usl_trace(l, mycpu, pc, caller);
581 }
582
583
584 /*
585 * Debug checks on a usimple_lock just before
586 * releasing it. Note that the caller has not
587 * yet released the hardware lock.
588 *
589 * Preemption is still disabled, so there's
590 * no problem using cpu_number.
591 */
592 void
593 usld_unlock(
594 usimple_lock_t l,
595 pc_t pc)
596 {
597 register int mycpu;
598 char *caller = "usimple_unlock";
599
600
601 #if 0
602 printf("*** %08X %08X %04X %02X %08X %02X %08X - %s\n", /* (TEST/DEBUG) */
603 l->debug.lock_pc,
604 l->debug.lock_thread,
605 l->debug.state,
606 l->debug.lock_cpu,
607 l->debug.unlock_thread,
608 l->debug.unlock_cpu,
609 l->debug.unlock_pc,
610 caller);
611 #endif
612
613 if (!usld_lock_common_checks(l, caller))
614 return;
615
616 mycpu = cpu_number();
617
618 if (!(l->debug.state & USLOCK_TAKEN))
619 panic("%s: lock 0x%x hasn't been taken",
620 caller, (integer_t) l);
621 if (l->debug.lock_thread != (void *) current_thread())
622 panic("%s: unlocking lock 0x%x, owned by thread 0x%x",
623 caller, (integer_t) l, l->debug.lock_thread);
624 if (l->debug.lock_cpu != mycpu) {
625 printf("%s: unlocking lock 0x%x on cpu 0x%x",
626 caller, (integer_t) l, mycpu);
627 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
628 panic(caller);
629 }
630 usl_trace(l, mycpu, pc, caller);
631 usl_stack_pop(l, mycpu);
632
633 l->debug.unlock_thread = l->debug.lock_thread;
634 l->debug.lock_thread = INVALID_PC;
635 l->debug.state &= ~USLOCK_TAKEN;
636 l->debug.unlock_pc = pc;
637 l->debug.unlock_cpu = mycpu;
638 }
639
640
641 /*
642 * Debug checks on a usimple_lock just before
643 * attempting to acquire it.
644 *
645 * Preemption isn't guaranteed to be disabled.
646 */
647 void
648 usld_lock_try_pre(
649 usimple_lock_t l,
650 pc_t pc)
651 {
652 char *caller = "usimple_lock_try";
653
654 if (!usld_lock_common_checks(l, caller))
655 return;
656 mp_disable_preemption();
657 usl_trace(l, cpu_number(), pc, caller);
658 mp_enable_preemption();
659 }
660
661
662 /*
663 * Debug checks on a usimple_lock just after
664 * successfully attempting to acquire it.
665 *
666 * Preemption has been disabled by the
667 * lock acquisition attempt, so it's safe
668 * to use cpu_number.
669 */
670 void
671 usld_lock_try_post(
672 usimple_lock_t l,
673 pc_t pc)
674 {
675 register int mycpu;
676 char *caller = "successful usimple_lock_try";
677
678 if (!usld_lock_common_checks(l, caller))
679 return;
680
681 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
682 panic("%s: lock 0x%x became uninitialized",
683 caller, (integer_t) l);
684 if ((l->debug.state & USLOCK_TAKEN))
685 panic("%s: lock 0x%x became TAKEN by someone else",
686 caller, (integer_t) l);
687
688 mycpu = cpu_number();
689 l->debug.lock_thread = (void *) current_thread();
690 l->debug.state |= USLOCK_TAKEN;
691 l->debug.lock_pc = pc;
692 l->debug.lock_cpu = mycpu;
693
694 #if 0
695 printf("*** %08X %08X %04X %02X %08X %02X %08X - %s\n", /* (TEST/DEBUG) */
696 l->debug.lock_pc,
697 l->debug.lock_thread,
698 l->debug.state,
699 l->debug.lock_cpu,
700 l->debug.unlock_thread,
701 l->debug.unlock_cpu,
702 l->debug.unlock_pc,
703 caller);
704 #endif
705
706 usl_stack_push(l, mycpu);
707 usl_trace(l, mycpu, pc, caller);
708 }
709
710
711 /*
712 * Determine whether the lock in question is owned
713 * by the current thread.
714 */
715 void
716 usld_lock_held(
717 usimple_lock_t l)
718 {
719 char *caller = "usimple_lock_held";
720
721
722 #if 0
723 printf("*** %08X %08X %04X %02X %08X %02X %08X - %s\n", /* (TEST/DEBUG) */
724 l->debug.lock_pc,
725 l->debug.lock_thread,
726 l->debug.state,
727 l->debug.lock_cpu,
728 l->debug.unlock_thread,
729 l->debug.unlock_cpu,
730 l->debug.unlock_pc,
731 caller);
732 #endif
733
734 if (!usld_lock_common_checks(l, caller))
735 return;
736
737 if (!(l->debug.state & USLOCK_TAKEN))
738 panic("%s: lock 0x%x hasn't been taken",
739 caller, (integer_t) l);
740 if (l->debug.lock_thread != (void *) current_thread())
741 panic("%s: lock 0x%x is owned by thread 0x%x", caller,
742 (integer_t) l, (integer_t) l->debug.lock_thread);
743
744 /*
745 * The usimple_lock is active, so preemption
746 * is disabled and the current cpu should
747 * match the one recorded at lock acquisition time.
748 */
749 if (l->debug.lock_cpu != cpu_number())
750 panic("%s: current cpu 0x%x isn't acquiring cpu 0x%x",
751 caller, cpu_number(), (integer_t) l->debug.lock_cpu);
752 }
753
754
755 /*
756 * Per-cpu stack of currently active usimple_locks.
757 * Requires spl protection so that interrupt-level
758 * locks plug-n-play with their thread-context friends.
759 */
760 #define USLOCK_STACK_DEPTH 20
761 usimple_lock_t uslock_stack[NCPUS][USLOCK_STACK_DEPTH];
762 unsigned int uslock_stack_index[NCPUS];
763 boolean_t uslock_stack_enabled = FALSE;
764
765
766 /*
767 * Record a usimple_lock just acquired on
768 * the current processor.
769 *
770 * Preemption has been disabled by lock
771 * acquisition, so it's safe to use the cpu number
772 * specified by the caller.
773 */
774 void
775 usl_stack_push(
776 usimple_lock_t l,
777 int mycpu)
778 {
779 boolean_t s;
780
781 if (uslock_stack_enabled == FALSE)
782 return;
783
784 DISABLE_INTERRUPTS(s);
785 assert(uslock_stack_index[mycpu] >= 0);
786 assert(uslock_stack_index[mycpu] < USLOCK_STACK_DEPTH);
787 if (uslock_stack_index[mycpu] >= USLOCK_STACK_DEPTH) {
788 printf("usl_stack_push (cpu 0x%x): too many locks (%d)",
789 mycpu, uslock_stack_index[mycpu]);
790 printf(" disabling stacks\n");
791 uslock_stack_enabled = FALSE;
792 ENABLE_INTERRUPTS(s);
793 return;
794 }
795 uslock_stack[mycpu][uslock_stack_index[mycpu]] = l;
796 uslock_stack_index[mycpu]++;
797 ENABLE_INTERRUPTS(s);
798 }
799
800
801 /*
802 * Eliminate the entry for a usimple_lock
803 * that had been active on the current processor.
804 *
805 * Preemption has been disabled by lock
806 * acquisition, and we haven't yet actually
807 * released the hardware lock associated with
808 * this usimple_lock, so it's safe to use the
809 * cpu number supplied by the caller.
810 */
811 void
812 usl_stack_pop(
813 usimple_lock_t l,
814 int mycpu)
815 {
816 unsigned int i, index;
817 boolean_t s;
818
819 if (uslock_stack_enabled == FALSE)
820 return;
821
822 DISABLE_INTERRUPTS(s);
823 assert(uslock_stack_index[mycpu] > 0);
824 assert(uslock_stack_index[mycpu] <= USLOCK_STACK_DEPTH);
825 if (uslock_stack_index[mycpu] == 0) {
826 printf("usl_stack_pop (cpu 0x%x): not enough locks (%d)",
827 mycpu, uslock_stack_index[mycpu]);
828 printf(" disabling stacks\n");
829 uslock_stack_enabled = FALSE;
830 ENABLE_INTERRUPTS(s);
831 return;
832 }
833 index = --uslock_stack_index[mycpu];
834 for (i = 0; i <= index; ++i) {
835 if (uslock_stack[mycpu][i] == l) {
836 if (i != index)
837 uslock_stack[mycpu][i] =
838 uslock_stack[mycpu][index];
839 ENABLE_INTERRUPTS(s);
840 return;
841 }
842 }
843 ENABLE_INTERRUPTS(s);
844 panic("usl_stack_pop: can't find usimple_lock 0x%x", l);
845 }
846
847
848 /*
849 * Determine whether any usimple_locks are currently held.
850 *
851 * Caller's preemption state is uncertain. If
852 * preemption has been disabled, this check is accurate.
853 * Otherwise, this check is just a guess. We do the best
854 * we can by disabling scheduler interrupts, so at least
855 * the check is accurate w.r.t. whatever cpu we're running
856 * on while in this routine.
857 */
858 void
859 usld_lock_none_held()
860 {
861 register int mycpu;
862 boolean_t s;
863 unsigned int locks_held;
864 char *caller = "usimple_lock_none_held";
865
866 DISABLE_INTERRUPTS(s);
867 mp_disable_preemption();
868 mycpu = cpu_number();
869 locks_held = uslock_stack_index[mycpu];
870 mp_enable_preemption();
871 ENABLE_INTERRUPTS(s);
872 if (locks_held > 0)
873 panic("%s: no locks should be held (0x%x locks held)",
874 caller, (integer_t) locks_held);
875 }
876
877
878 /*
879 * For very special cases, set traced_lock to point to a
880 * specific lock of interest. The result is a series of
881 * XPRs showing lock operations on that lock. The lock_seq
882 * value is used to show the order of those operations.
883 */
884 usimple_lock_t traced_lock;
885 unsigned int lock_seq;
886
887 void
888 usl_trace(
889 usimple_lock_t l,
890 int mycpu,
891 pc_t pc,
892 const char * op_name)
893 {
894 if (traced_lock == l) {
895 XPR(XPR_SLOCK,
896 "seq %d, cpu %d, %s @ %x\n",
897 (integer_t) lock_seq, (integer_t) mycpu,
898 (integer_t) op_name, (integer_t) pc, 0);
899 lock_seq++;
900 }
901 }
902
903
904
905 #if MACH_KDB
906 #define printf kdbprintf
907 void db_show_all_slocks(void);
908 void
909 db_show_all_slocks(void)
910 {
911 unsigned int i, index;
912 int mycpu = cpu_number();
913 usimple_lock_t l;
914
915 if (uslock_stack_enabled == FALSE) {
916 printf("Lock stack not enabled\n");
917 return;
918 }
919
920 #if 0
921 if (!mach_slocks_init)
922 iprintf("WARNING: simple locks stack may not be accurate\n");
923 #endif
924 assert(uslock_stack_index[mycpu] >= 0);
925 assert(uslock_stack_index[mycpu] <= USLOCK_STACK_DEPTH);
926 index = uslock_stack_index[mycpu];
927 for (i = 0; i < index; ++i) {
928 l = uslock_stack[mycpu][i];
929 iprintf("%d: ", i);
930 db_printsym((vm_offset_t)l, DB_STGY_ANY);
931 if (l->debug.lock_pc != INVALID_PC) {
932 printf(" locked by ");
933 db_printsym((int)l->debug.lock_pc, DB_STGY_PROC);
934 }
935 printf("\n");
936 }
937 }
938 #endif /* MACH_KDB */
939
940 #endif /* USLOCK_DEBUG */
941
942 /* #endif USIMPLE_LOCK_CALLS */
943
944 /*
945 * Routine: lock_alloc
946 * Function:
947 * Allocate a lock for external users who cannot
948 * hard-code the structure definition into their
949 * objects.
950 * For now just use kalloc, but a zone is probably
951 * warranted.
952 */
953 lock_t *
954 lock_alloc(
955 boolean_t can_sleep,
956 etap_event_t event,
957 etap_event_t i_event)
958 {
959 lock_t *l;
960
961 if ((l = (lock_t *)kalloc(sizeof(lock_t))) != 0)
962 lock_init(l, can_sleep, event, i_event);
963 return(l);
964 }
965
966 /*
967 * Routine: lock_free
968 * Function:
969 * Free a lock allocated for external users.
970 * For now just use kfree, but a zone is probably
971 * warranted.
972 */
973 void
974 lock_free(
975 lock_t *l)
976 {
977 kfree((vm_offset_t)l, sizeof(lock_t));
978 }
979
980
981 /*
982 * Routine: lock_init
983 * Function:
984 * Initialize a lock; required before use.
985 * Note that clients declare the "struct lock"
986 * variables and then initialize them, rather
987 * than getting a new one from this module.
988 */
989 void
990 lock_init(
991 lock_t *l,
992 boolean_t can_sleep,
993 etap_event_t event,
994 etap_event_t i_event)
995 {
996 (void) memset((void *) l, 0, sizeof(lock_t));
997
998 #if ETAP_LOCK_TRACE
999 etap_event_table_assign(&l->u.event_table_chain, event);
1000 l->u.s.start_list = SD_ENTRY_NULL;
1001 #endif /* ETAP_LOCK_TRACE */
1002
1003 simple_lock_init(&l->interlock, i_event);
1004 l->want_write = FALSE;
1005 l->want_upgrade = FALSE;
1006 l->read_count = 0;
1007 l->can_sleep = can_sleep;
1008
1009 #if ETAP_LOCK_ACCUMULATE
1010 l->cbuff_write = etap_cbuff_reserve(lock_event_table(l));
1011 if (l->cbuff_write != CBUFF_ENTRY_NULL) {
1012 l->cbuff_write->event = event;
1013 l->cbuff_write->instance = (unsigned long) l;
1014 l->cbuff_write->kind = WRITE_LOCK;
1015 }
1016 l->cbuff_read = CBUFF_ENTRY_NULL;
1017 #endif /* ETAP_LOCK_ACCUMULATE */
1018 }
1019
1020
1021 /*
1022 * Sleep locks. These use the same data structure and algorithm
1023 * as the spin locks, but the process sleeps while it is waiting
1024 * for the lock. These work on uniprocessor systems.
1025 */
1026
1027 #define DECREMENTER_TIMEOUT 1000000
1028
1029 void
1030 lock_write(
1031 register lock_t * l)
1032 {
1033 register int i;
1034 start_data_node_t entry = {0};
1035 boolean_t lock_miss = FALSE;
1036 unsigned short dynamic = 0;
1037 unsigned short trace = 0;
1038 etap_time_t total_time;
1039 etap_time_t stop_wait_time;
1040 pc_t pc;
1041 #if MACH_LDEBUG
1042 int decrementer;
1043 #endif /* MACH_LDEBUG */
1044
1045
1046 ETAP_STAMP(lock_event_table(l), trace, dynamic);
1047 ETAP_CREATE_ENTRY(entry, trace);
1048 MON_ASSIGN_PC(entry->start_pc, pc, trace);
1049
1050 simple_lock(&l->interlock);
1051
1052 /*
1053 * Link the new start_list entry
1054 */
1055 ETAP_LINK_ENTRY(l, entry, trace);
1056
1057 #if MACH_LDEBUG
1058 decrementer = DECREMENTER_TIMEOUT;
1059 #endif /* MACH_LDEBUG */
1060
1061 /*
1062 * Try to acquire the want_write bit.
1063 */
1064 while (l->want_write) {
1065 if (!lock_miss) {
1066 ETAP_CONTENTION_TIMESTAMP(entry, trace);
1067 lock_miss = TRUE;
1068 }
1069
1070 i = lock_wait_time[l->can_sleep ? 1 : 0];
1071 if (i != 0) {
1072 simple_unlock(&l->interlock);
1073 #if MACH_LDEBUG
1074 if (!--decrementer)
1075 Debugger("timeout - want_write");
1076 #endif /* MACH_LDEBUG */
1077 while (--i != 0 && l->want_write)
1078 continue;
1079 simple_lock(&l->interlock);
1080 }
1081
1082 if (l->can_sleep && l->want_write) {
1083 l->waiting = TRUE;
1084 ETAP_SET_REASON(current_thread(),
1085 BLOCKED_ON_COMPLEX_LOCK);
1086 thread_sleep_simple_lock((event_t) l,
1087 simple_lock_addr(l->interlock), FALSE);
1088 simple_lock(&l->interlock);
1089 }
1090 }
1091 l->want_write = TRUE;
1092
1093 /* Wait for readers (and upgrades) to finish */
1094
1095 #if MACH_LDEBUG
1096 decrementer = DECREMENTER_TIMEOUT;
1097 #endif /* MACH_LDEBUG */
1098 while ((l->read_count != 0) || l->want_upgrade) {
1099 if (!lock_miss) {
1100 ETAP_CONTENTION_TIMESTAMP(entry,trace);
1101 lock_miss = TRUE;
1102 }
1103
1104 i = lock_wait_time[l->can_sleep ? 1 : 0];
1105 if (i != 0) {
1106 simple_unlock(&l->interlock);
1107 #if MACH_LDEBUG
1108 if (!--decrementer)
1109 Debugger("timeout - wait for readers");
1110 #endif /* MACH_LDEBUG */
1111 while (--i != 0 && (l->read_count != 0 ||
1112 l->want_upgrade))
1113 continue;
1114 simple_lock(&l->interlock);
1115 }
1116
1117 if (l->can_sleep && (l->read_count != 0 || l->want_upgrade)) {
1118 l->waiting = TRUE;
1119 ETAP_SET_REASON(current_thread(),
1120 BLOCKED_ON_COMPLEX_LOCK);
1121 thread_sleep_simple_lock((event_t) l,
1122 simple_lock_addr(l->interlock), FALSE);
1123 simple_lock(&l->interlock);
1124 }
1125 }
1126
1127 /*
1128 * do not collect wait data if either the lock
1129 * was free or no wait traces are enabled.
1130 */
1131
1132 if (lock_miss && ETAP_CONTENTION_ENABLED(trace)) {
1133 ETAP_TIMESTAMP(stop_wait_time);
1134 ETAP_TOTAL_TIME(total_time,
1135 stop_wait_time,
1136 entry->start_wait_time);
1137 CUM_WAIT_ACCUMULATE(l->cbuff_write, total_time, dynamic, trace);
1138 MON_DATA_COLLECT(l,
1139 entry,
1140 total_time,
1141 WRITE_LOCK,
1142 MON_CONTENTION,
1143 trace);
1144 }
1145
1146 simple_unlock(&l->interlock);
1147
1148 /*
1149 * Set start hold time if some type of hold tracing is enabled.
1150 *
1151 * Note: if the stop_wait_time was already stamped, use
1152 * it as the start_hold_time instead of doing an
1153 * expensive bus access.
1154 *
1155 */
1156
1157 if (lock_miss && ETAP_CONTENTION_ENABLED(trace))
1158 ETAP_COPY_START_HOLD_TIME(entry, stop_wait_time, trace);
1159 else
1160 ETAP_DURATION_TIMESTAMP(entry, trace);
1161
1162 }
1163
1164 void
1165 lock_done(
1166 register lock_t * l)
1167 {
1168 boolean_t do_wakeup = FALSE;
1169 start_data_node_t entry;
1170 unsigned short dynamic = 0;
1171 unsigned short trace = 0;
1172 etap_time_t stop_hold_time;
1173 etap_time_t total_time;
1174 unsigned long lock_kind;
1175 pc_t pc;
1176
1177
1178 ETAP_STAMP(lock_event_table(l), trace, dynamic);
1179
1180 simple_lock(&l->interlock);
1181
1182 if (l->read_count != 0) {
1183 l->read_count--;
1184 lock_kind = READ_LOCK;
1185 }
1186 else
1187 if (l->want_upgrade) {
1188 l->want_upgrade = FALSE;
1189 lock_kind = WRITE_LOCK;
1190 }
1191 else {
1192 l->want_write = FALSE;
1193 lock_kind = WRITE_LOCK;
1194 }
1195
1196 /*
1197 * There is no reason to wakeup a waiting thread
1198 * if the read-count is non-zero. Consider:
1199 * we must be dropping a read lock
1200 * threads are waiting only if one wants a write lock
1201 * if there are still readers, they can't proceed
1202 */
1203
1204 if (l->waiting && (l->read_count == 0)) {
1205 l->waiting = FALSE;
1206 do_wakeup = TRUE;
1207 }
1208 /*
1209 * Collect hold data if hold tracing is
1210 * enabled.
1211 */
1212
1213 /*
1214 * NOTE: All complex locks whose tracing was on when the
1215 * lock was acquired will have an entry in the start_data
1216 * list.
1217 */
1218
1219 ETAP_UNLINK_ENTRY(l,entry);
1220 if (ETAP_DURATION_ENABLED(trace) && entry != SD_ENTRY_NULL) {
1221 ETAP_TIMESTAMP (stop_hold_time);
1222 ETAP_TOTAL_TIME (total_time,
1223 stop_hold_time,
1224 entry->start_hold_time);
1225
1226 if (lock_kind & WRITE_LOCK)
1227 CUM_HOLD_ACCUMULATE (l->cbuff_write,
1228 total_time,
1229 dynamic,
1230 trace);
1231 else {
1232 CUM_READ_ENTRY_RESERVE(l,l->cbuff_read,trace);
1233 CUM_HOLD_ACCUMULATE (l->cbuff_read,
1234 total_time,
1235 dynamic,
1236 trace);
1237 }
1238 MON_ASSIGN_PC(entry->end_pc,pc,trace);
1239 MON_DATA_COLLECT(l,entry,
1240 total_time,
1241 lock_kind,
1242 MON_DURATION,
1243 trace);
1244 }
1245
1246 simple_unlock(&l->interlock);
1247
1248 ETAP_DESTROY_ENTRY(entry);
1249
1250 if (do_wakeup)
1251 thread_wakeup((event_t) l);
1252 }
1253
1254 void
1255 lock_read(
1256 register lock_t * l)
1257 {
1258 register int i;
1259 start_data_node_t entry = {0};
1260 boolean_t lock_miss = FALSE;
1261 unsigned short dynamic = 0;
1262 unsigned short trace = 0;
1263 etap_time_t total_time;
1264 etap_time_t stop_wait_time;
1265 pc_t pc;
1266 #if MACH_LDEBUG
1267 int decrementer;
1268 #endif /* MACH_LDEBUG */
1269
1270 ETAP_STAMP(lock_event_table(l), trace, dynamic);
1271 ETAP_CREATE_ENTRY(entry, trace);
1272 MON_ASSIGN_PC(entry->start_pc, pc, trace);
1273
1274 simple_lock(&l->interlock);
1275
1276 /*
1277 * Link the new start_list entry
1278 */
1279 ETAP_LINK_ENTRY(l,entry,trace);
1280
1281 #if MACH_LDEBUG
1282 decrementer = DECREMENTER_TIMEOUT;
1283 #endif /* MACH_LDEBUG */
1284 while (l->want_write || l->want_upgrade) {
1285 if (!lock_miss) {
1286 ETAP_CONTENTION_TIMESTAMP(entry, trace);
1287 lock_miss = TRUE;
1288 }
1289
1290 i = lock_wait_time[l->can_sleep ? 1 : 0];
1291
1292 if (i != 0) {
1293 simple_unlock(&l->interlock);
1294 #if MACH_LDEBUG
1295 if (!--decrementer)
1296 Debugger("timeout - wait no writers");
1297 #endif /* MACH_LDEBUG */
1298 while (--i != 0 && (l->want_write || l->want_upgrade))
1299 continue;
1300 simple_lock(&l->interlock);
1301 }
1302
1303 if (l->can_sleep && (l->want_write || l->want_upgrade)) {
1304 l->waiting = TRUE;
1305 thread_sleep_simple_lock((event_t) l,
1306 simple_lock_addr(l->interlock), FALSE);
1307 simple_lock(&l->interlock);
1308 }
1309 }
1310
1311 l->read_count++;
1312
1313 /*
1314 * Do not collect wait data if the lock was free
1315 * or if no wait traces are enabled.
1316 */
1317
1318 if (lock_miss && ETAP_CONTENTION_ENABLED(trace)) {
1319 ETAP_TIMESTAMP(stop_wait_time);
1320 ETAP_TOTAL_TIME(total_time,
1321 stop_wait_time,
1322 entry->start_wait_time);
1323 CUM_READ_ENTRY_RESERVE(l, l->cbuff_read, trace);
1324 CUM_WAIT_ACCUMULATE(l->cbuff_read, total_time, dynamic, trace);
1325 MON_DATA_COLLECT(l,
1326 entry,
1327 total_time,
1328 READ_LOCK,
1329 MON_CONTENTION,
1330 trace);
1331 }
1332 simple_unlock(&l->interlock);
1333
1334 /*
1335 * Set start hold time if some type of hold tracing is enabled.
1336 *
1337 * Note: if the stop_wait_time was already stamped, use
1338 * it instead of doing an expensive bus access.
1339 *
1340 */
1341
1342 if (lock_miss && ETAP_CONTENTION_ENABLED(trace))
1343 ETAP_COPY_START_HOLD_TIME(entry, stop_wait_time, trace);
1344 else
1345 ETAP_DURATION_TIMESTAMP(entry,trace);
1346 }
1347
1348
1349 /*
1350 * Routine: lock_read_to_write
1351 * Function:
1352 * Improves a read-only lock to one with
1353 * write permission. If another reader has
1354 * already requested an upgrade to a write lock,
1355 * no lock is held upon return.
1356 *
1357 * Returns TRUE if the upgrade *failed*.
1358 */
1359
1360 boolean_t
1361 lock_read_to_write(
1362 register lock_t * l)
1363 {
1364 register int i;
1365 boolean_t do_wakeup = FALSE;
1366 start_data_node_t entry = {0};
1367 boolean_t lock_miss = FALSE;
1368 unsigned short dynamic = 0;
1369 unsigned short trace = 0;
1370 etap_time_t total_time;
1371 etap_time_t stop_time;
1372 pc_t pc;
1373 #if MACH_LDEBUG
1374 int decrementer;
1375 #endif /* MACH_LDEBUG */
1376
1377
1378 ETAP_STAMP(lock_event_table(l), trace, dynamic);
1379
1380 simple_lock(&l->interlock);
1381
1382 l->read_count--;
1383
1384 /*
1385 * Since the read lock is lost whether the write lock
1386 * is acquired or not, read hold data is collected here.
1387 * This, of course, is assuming some type of hold
1388 * tracing is enabled.
1389 *
1390 * Note: trace is set to zero if the entry does not exist.
1391 */
1392
1393 ETAP_FIND_ENTRY(l, entry, trace);
1394
1395 if (ETAP_DURATION_ENABLED(trace)) {
1396 ETAP_TIMESTAMP(stop_time);
1397 ETAP_TOTAL_TIME(total_time, stop_time, entry->start_hold_time);
1398 CUM_HOLD_ACCUMULATE(l->cbuff_read, total_time, dynamic, trace);
1399 MON_ASSIGN_PC(entry->end_pc, pc, trace);
1400 MON_DATA_COLLECT(l,
1401 entry,
1402 total_time,
1403 READ_LOCK,
1404 MON_DURATION,
1405 trace);
1406 }
1407
1408 if (l->want_upgrade) {
1409 /*
1410 * Someone else has requested upgrade.
1411 * Since we've released a read lock, wake
1412 * him up.
1413 */
1414 if (l->waiting && (l->read_count == 0)) {
1415 l->waiting = FALSE;
1416 do_wakeup = TRUE;
1417 }
1418
1419 ETAP_UNLINK_ENTRY(l, entry);
1420 simple_unlock(&l->interlock);
1421 ETAP_DESTROY_ENTRY(entry);
1422
1423 if (do_wakeup)
1424 thread_wakeup((event_t) l);
1425 return (TRUE);
1426 }
1427
1428 l->want_upgrade = TRUE;
1429
1430 MON_ASSIGN_PC(entry->start_pc, pc, trace);
1431
1432 #if MACH_LDEBUG
1433 decrementer = DECREMENTER_TIMEOUT;
1434 #endif /* MACH_LDEBUG */
1435 while (l->read_count != 0) {
1436 if (!lock_miss) {
1437 ETAP_CONTENTION_TIMESTAMP(entry, trace);
1438 lock_miss = TRUE;
1439 }
1440
1441 i = lock_wait_time[l->can_sleep ? 1 : 0];
1442
1443 if (i != 0) {
1444 simple_unlock(&l->interlock);
1445 #if MACH_LDEBUG
1446 if (!--decrementer)
1447 Debugger("timeout - read_count");
1448 #endif /* MACH_LDEBUG */
1449 while (--i != 0 && l->read_count != 0)
1450 continue;
1451 simple_lock(&l->interlock);
1452 }
1453
1454 if (l->can_sleep && l->read_count != 0) {
1455 l->waiting = TRUE;
1456 thread_sleep_simple_lock((event_t) l,
1457 simple_lock_addr(l->interlock), FALSE);
1458 simple_lock(&l->interlock);
1459 }
1460 }
1461
1462 /*
1463 * do not collect wait data if the lock was free
1464 * or if no wait traces are enabled.
1465 */
1466
1467 if (lock_miss && ETAP_CONTENTION_ENABLED(trace)) {
1468 ETAP_TIMESTAMP (stop_time);
1469 ETAP_TOTAL_TIME(total_time, stop_time, entry->start_wait_time);
1470 CUM_WAIT_ACCUMULATE(l->cbuff_write, total_time, dynamic, trace);
1471 MON_DATA_COLLECT(l,
1472 entry,
1473 total_time,
1474 WRITE_LOCK,
1475 MON_CONTENTION,
1476 trace);
1477 }
1478
1479 simple_unlock(&l->interlock);
1480
1481 /*
1482 * Set start hold time if some type of hold tracing is enabled
1483 *
1484 * Note: if the stop_time was already stamped, use
1485 * it as the new start_hold_time instead of doing
1486 * an expensive VME access.
1487 *
1488 */
1489
1490 if (lock_miss && ETAP_CONTENTION_ENABLED(trace))
1491 ETAP_COPY_START_HOLD_TIME(entry, stop_time, trace);
1492 else
1493 ETAP_DURATION_TIMESTAMP(entry, trace);
1494
1495 return (FALSE);
1496 }
1497
1498 void
1499 lock_write_to_read(
1500 register lock_t * l)
1501 {
1502 boolean_t do_wakeup = FALSE;
1503 start_data_node_t entry = {0};
1504 unsigned short dynamic = 0;
1505 unsigned short trace = 0;
1506 etap_time_t stop_hold_time;
1507 etap_time_t total_time;
1508 pc_t pc;
1509
1510 ETAP_STAMP(lock_event_table(l), trace,dynamic);
1511
1512 simple_lock(&l->interlock);
1513
1514 l->read_count++;
1515 if (l->want_upgrade)
1516 l->want_upgrade = FALSE;
1517 else
1518 l->want_write = FALSE;
1519
1520 if (l->waiting) {
1521 l->waiting = FALSE;
1522 do_wakeup = TRUE;
1523 }
1524
1525 /*
1526 * Since we are switching from a write lock to a read lock,
1527 * the write lock data is stored and the read lock data
1528 * collection begins.
1529 *
1530 * Note: trace is set to zero if the entry does not exist.
1531 */
1532
1533 ETAP_FIND_ENTRY(l, entry, trace);
1534
1535 if (ETAP_DURATION_ENABLED(trace)) {
1536 ETAP_TIMESTAMP (stop_hold_time);
1537 ETAP_TOTAL_TIME(total_time, stop_hold_time, entry->start_hold_time);
1538 CUM_HOLD_ACCUMULATE(l->cbuff_write, total_time, dynamic, trace);
1539 MON_ASSIGN_PC(entry->end_pc, pc, trace);
1540 MON_DATA_COLLECT(l,
1541 entry,
1542 total_time,
1543 WRITE_LOCK,
1544 MON_DURATION,
1545 trace);
1546 }
1547
1548 simple_unlock(&l->interlock);
1549
1550 /*
1551 * Set start hold time if some type of hold tracing is enabled
1552 *
1553 * Note: if the stop_hold_time was already stamped, use
1554 * it as the new start_hold_time instead of doing
1555 * an expensive bus access.
1556 *
1557 */
1558
1559 if (ETAP_DURATION_ENABLED(trace))
1560 ETAP_COPY_START_HOLD_TIME(entry, stop_hold_time, trace);
1561 else
1562 ETAP_DURATION_TIMESTAMP(entry, trace);
1563
1564 MON_ASSIGN_PC(entry->start_pc, pc, trace);
1565
1566 if (do_wakeup)
1567 thread_wakeup((event_t) l);
1568 }
1569
1570
1571 #if 0 /* Unused */
1572 /*
1573 * Routine: lock_try_write
1574 * Function:
1575 * Tries to get a write lock.
1576 *
1577 * Returns FALSE if the lock is not held on return.
1578 */
1579
1580 boolean_t
1581 lock_try_write(
1582 register lock_t * l)
1583 {
1584 start_data_node_t entry = {0};
1585 unsigned short trace = 0;
1586 pc_t pc;
1587
1588 ETAP_STAMP(lock_event_table(l), trace, trace);
1589 ETAP_CREATE_ENTRY(entry, trace);
1590
1591 simple_lock(&l->interlock);
1592
1593 if (l->want_write || l->want_upgrade || l->read_count) {
1594 /*
1595 * Can't get lock.
1596 */
1597 simple_unlock(&l->interlock);
1598 ETAP_DESTROY_ENTRY(entry);
1599 return(FALSE);
1600 }
1601
1602 /*
1603 * Have lock.
1604 */
1605
1606 l->want_write = TRUE;
1607
1608 ETAP_LINK_ENTRY(l, entry, trace);
1609
1610 simple_unlock(&l->interlock);
1611
1612 MON_ASSIGN_PC(entry->start_pc, pc, trace);
1613 ETAP_DURATION_TIMESTAMP(entry, trace);
1614
1615 return(TRUE);
1616 }
1617
1618 /*
1619 * Routine: lock_try_read
1620 * Function:
1621 * Tries to get a read lock.
1622 *
1623 * Returns FALSE if the lock is not held on return.
1624 */
1625
1626 boolean_t
1627 lock_try_read(
1628 register lock_t * l)
1629 {
1630 start_data_node_t entry = {0};
1631 unsigned short trace = 0;
1632 pc_t pc;
1633
1634 ETAP_STAMP(lock_event_table(l), trace, trace);
1635 ETAP_CREATE_ENTRY(entry, trace);
1636
1637 simple_lock(&l->interlock);
1638
1639 if (l->want_write || l->want_upgrade) {
1640 simple_unlock(&l->interlock);
1641 ETAP_DESTROY_ENTRY(entry);
1642 return(FALSE);
1643 }
1644
1645 l->read_count++;
1646
1647 ETAP_LINK_ENTRY(l, entry, trace);
1648
1649 simple_unlock(&l->interlock);
1650
1651 MON_ASSIGN_PC(entry->start_pc, pc, trace);
1652 ETAP_DURATION_TIMESTAMP(entry, trace);
1653
1654 return(TRUE);
1655 }
1656 #endif /* Unused */
1657
1658 #if MACH_KDB
1659
1660 void db_show_one_lock(lock_t *);
1661
1662
1663 void
1664 db_show_one_lock(
1665 lock_t *lock)
1666 {
1667 db_printf("Read_count = 0x%x, %swant_upgrade, %swant_write, ",
1668 lock->read_count,
1669 lock->want_upgrade ? "" : "!",
1670 lock->want_write ? "" : "!");
1671 db_printf("%swaiting, %scan_sleep\n",
1672 lock->waiting ? "" : "!", lock->can_sleep ? "" : "!");
1673 db_printf("Interlock:\n");
1674 db_show_one_simple_lock((db_expr_t)simple_lock_addr(lock->interlock),
1675 TRUE, (db_expr_t)0, (char *)0);
1676 }
1677 #endif /* MACH_KDB */
1678
1679 /*
1680 * The C portion of the mutex package. These routines are only invoked
1681 * if the optimized assembler routines can't do the work.
1682 */
1683
1684 /*
1685 * Routine: lock_alloc
1686 * Function:
1687 * Allocate a mutex for external users who cannot
1688 * hard-code the structure definition into their
1689 * objects.
1690 * For now just use kalloc, but a zone is probably
1691 * warranted.
1692 */
1693 mutex_t *
1694 mutex_alloc(
1695 etap_event_t event)
1696 {
1697 mutex_t *m;
1698
1699 if ((m = (mutex_t *)kalloc(sizeof(mutex_t))) != 0)
1700 mutex_init(m, event);
1701 return(m);
1702 }
1703
1704 /*
1705 * Routine: mutex_free
1706 * Function:
1707 * Free a mutex allocated for external users.
1708 * For now just use kfree, but a zone is probably
1709 * warranted.
1710 */
1711 void
1712 mutex_free(
1713 mutex_t *m)
1714 {
1715 kfree((vm_offset_t)m, sizeof(mutex_t));
1716 }
1717
1718
1719 /*
1720 * mutex_lock_wait: Invoked if the assembler routine mutex_lock () fails
1721 * because the mutex is already held by another thread. Called with the
1722 * interlock locked and returns with the interlock unlocked.
1723 */
1724
1725 void
1726 mutex_lock_wait (
1727 mutex_t * m)
1728 {
1729 m->waiters++;
1730 ETAP_SET_REASON(current_thread(), BLOCKED_ON_MUTEX_LOCK);
1731 thread_sleep_interlock ((event_t) m, &m->interlock, THREAD_UNINT);
1732 }
1733
1734 /*
1735 * mutex_unlock_wakeup: Invoked if the assembler routine mutex_unlock ()
1736 * fails because there are thread(s) waiting for this mutex. Called and
1737 * returns with the interlock locked.
1738 */
1739
1740 void
1741 mutex_unlock_wakeup (
1742 mutex_t * m)
1743 {
1744 assert(m->waiters);
1745 m->waiters--;
1746 thread_wakeup_one ((event_t) m);
1747 }
1748
1749 /*
1750 * mutex_pause: Called by former callers of simple_lock_pause().
1751 */
1752
1753 void
1754 mutex_pause(void)
1755 {
1756 int wait_result;
1757
1758 assert_wait_timeout( 1, THREAD_INTERRUPTIBLE);
1759 ETAP_SET_REASON(current_thread(), BLOCKED_ON_MUTEX_LOCK);
1760 wait_result = thread_block((void (*)(void))0);
1761 if (wait_result != THREAD_TIMED_OUT)
1762 thread_cancel_timer();
1763 }
1764
1765 #if MACH_KDB
1766 /*
1767 * Routines to print out simple_locks and mutexes in a nicely-formatted
1768 * fashion.
1769 */
1770
1771 char *simple_lock_labels = "ENTRY ILK THREAD DURATION CALLER";
1772 char *mutex_labels = "ENTRY LOCKED WAITERS THREAD CALLER";
1773
1774 void
1775 db_show_one_simple_lock (
1776 db_expr_t addr,
1777 boolean_t have_addr,
1778 db_expr_t count,
1779 char * modif)
1780 {
1781 simple_lock_t saddr = (simple_lock_t)addr;
1782
1783 if (saddr == (simple_lock_t)0 || !have_addr) {
1784 db_error ("No simple_lock\n");
1785 }
1786 #if USLOCK_DEBUG
1787 else if (saddr->lock_type != USLOCK_TAG)
1788 db_error ("Not a simple_lock\n");
1789 #endif /* USLOCK_DEBUG */
1790
1791 db_printf ("%s\n", simple_lock_labels);
1792 db_print_simple_lock (saddr);
1793 }
1794
1795 void
1796 db_print_simple_lock (
1797 simple_lock_t addr)
1798 {
1799
1800 db_printf ("%08x %3d", addr, *hw_lock_addr(addr->interlock));
1801 #if USLOCK_DEBUG
1802 db_printf (" %08x", addr->debug.lock_thread);
1803 db_printf (" %08x ", addr->debug.duration[1]);
1804 db_printsym ((int)addr->debug.lock_pc, DB_STGY_ANY);
1805 #endif /* USLOCK_DEBUG */
1806 db_printf ("\n");
1807 }
1808
1809 void
1810 db_show_one_mutex (
1811 db_expr_t addr,
1812 boolean_t have_addr,
1813 db_expr_t count,
1814 char * modif)
1815 {
1816 mutex_t * maddr = (mutex_t *)addr;
1817
1818 if (maddr == (mutex_t *)0 || !have_addr)
1819 db_error ("No mutex\n");
1820 #if MACH_LDEBUG
1821 else if (maddr->type != MUTEX_TAG)
1822 db_error ("Not a mutex\n");
1823 #endif /* MACH_LDEBUG */
1824
1825 db_printf ("%s\n", mutex_labels);
1826 db_print_mutex (maddr);
1827 }
1828
1829 void
1830 db_print_mutex (
1831 mutex_t * addr)
1832 {
1833 db_printf ("%08x %6d %7d",
1834 addr, *hw_lock_addr(addr->locked), addr->waiters);
1835 #if MACH_LDEBUG
1836 db_printf (" %08x ", addr->thread);
1837 db_printsym (addr->pc, DB_STGY_ANY);
1838 #endif /* MACH_LDEBUG */
1839 db_printf ("\n");
1840 }
1841 #endif /* MACH_KDB */
1842
1843 #if MACH_LDEBUG
1844 extern void meter_simple_lock (
1845 simple_lock_t l);
1846 extern void meter_simple_unlock (
1847 simple_lock_t l);
1848 extern void cyctm05_stamp (
1849 unsigned long * start);
1850 extern void cyctm05_diff (
1851 unsigned long * start,
1852 unsigned long * end,
1853 unsigned long * diff);
1854
1855 #if 0
1856 simple_lock_data_t loser;
1857 #endif
1858
1859 void
1860 meter_simple_lock(
1861 simple_lock_t lp)
1862 {
1863 #if 0
1864 cyctm05_stamp (lp->duration);
1865 #endif
1866 }
1867
1868 int long_simple_lock_crash;
1869 int long_simple_lock_time = 0x600;
1870 /*
1871 * This is pretty gawd-awful. XXX
1872 */
1873 decl_simple_lock_data(extern,kd_tty)
1874
1875 void
1876 meter_simple_unlock(
1877 simple_lock_t lp)
1878 {
1879 #if 0
1880 unsigned long stime[2], etime[2], delta[2];
1881
1882 if (lp == &kd_tty) /* XXX */
1883 return; /* XXX */
1884
1885 stime[0] = lp->duration[0];
1886 stime[1] = lp->duration[1];
1887
1888 cyctm05_stamp (etime);
1889
1890 if (etime[1] < stime[1]) /* XXX */
1891 return; /* XXX */
1892
1893 cyctm05_diff (stime, etime, delta);
1894
1895 if (delta[1] >= 0x10000) /* XXX */
1896 return; /* XXX */
1897
1898 lp->duration[0] = delta[0];
1899 lp->duration[1] = delta[1];
1900
1901 if (loser.duration[1] < lp->duration[1])
1902 loser = *lp;
1903
1904 assert (!long_simple_lock_crash || delta[1] < long_simple_lock_time);
1905 #endif
1906 }
1907 #endif /* MACH_LDEBUG */
1908
1909
1910 #if ETAP_LOCK_TRACE
1911
1912 /*
1913 * ==============================================================
1914 * ETAP hook when initializing a usimple_lock. May be invoked
1915 * from the portable lock package or from an optimized machine-
1916 * dependent implementation.
1917 * ==============================================================
1918 */
1919
1920 void
1921 etap_simplelock_init (
1922 simple_lock_t l,
1923 etap_event_t event)
1924 {
1925 ETAP_CLEAR_TRACE_DATA(l);
1926 etap_event_table_assign(&l->u.event_table_chain, event);
1927
1928 #if ETAP_LOCK_ACCUMULATE
1929 /* reserve an entry in the cumulative buffer */
1930 l->cbuff_entry = etap_cbuff_reserve(lock_event_table(l));
1931 /* initialize the entry if one was returned */
1932 if (l->cbuff_entry != CBUFF_ENTRY_NULL) {
1933 l->cbuff_entry->event = event;
1934 l->cbuff_entry->instance = (unsigned long) l;
1935 l->cbuff_entry->kind = SPIN_LOCK;
1936 }
1937 #endif /* ETAP_LOCK_ACCUMULATE */
1938 }
1939
1940
1941 void
1942 etap_simplelock_unlock(
1943 simple_lock_t l)
1944 {
1945 unsigned short dynamic = 0;
1946 unsigned short trace = 0;
1947 etap_time_t total_time;
1948 etap_time_t stop_hold_time;
1949 pc_t pc;
1950
1951 OBTAIN_PC(pc, l);
1952 ETAP_STAMP(lock_event_table(l), trace, dynamic);
1953
1954 /*
1955 * Calculate & collect hold time data only if
1956 * the hold tracing was enabled throughout the
1957 * whole operation. This prevents collection of
1958 * bogus data caused by mid-operation trace changes.
1959 *
1960 */
1961
1962 if (ETAP_DURATION_ENABLED(trace) && ETAP_WHOLE_OP(l)) {
1963 ETAP_TIMESTAMP (stop_hold_time);
1964 ETAP_TOTAL_TIME(total_time, stop_hold_time,
1965 l->u.s.start_hold_time);
1966 CUM_HOLD_ACCUMULATE(l->cbuff_entry, total_time, dynamic, trace);
1967 MON_ASSIGN_PC(l->end_pc, pc, trace);
1968 MON_DATA_COLLECT(l,
1969 l,
1970 total_time,
1971 SPIN_LOCK,
1972 MON_DURATION,
1973 trace);
1974 }
1975 ETAP_CLEAR_TRACE_DATA(l);
1976 }
1977
1978 /* ========================================================================
1979 * Since the the simple_lock() routine is machine dependant, it must always
1980 * be coded in assembly. The two hook routines below are used to collect
1981 * lock_stat data.
1982 * ========================================================================
1983 */
1984
1985 /*
1986 * ROUTINE: etap_simplelock_miss()
1987 *
1988 * FUNCTION: This spin lock routine is called upon the first
1989 * spin (miss) of the lock.
1990 *
1991 * A timestamp is taken at the beginning of the wait period,
1992 * if wait tracing is enabled.
1993 *
1994 *
1995 * PARAMETERS:
1996 * - lock address.
1997 * - timestamp address.
1998 *
1999 * RETURNS: Wait timestamp value. The timestamp value is later used
2000 * by etap_simplelock_hold().
2001 *
2002 * NOTES: This routine is NOT ALWAYS called. The lock may be free
2003 * (never spinning). For this reason the pc is collected in
2004 * etap_simplelock_hold().
2005 *
2006 */
2007 etap_time_t
2008 etap_simplelock_miss (
2009 simple_lock_t l)
2010
2011 {
2012 unsigned short trace = 0;
2013 unsigned short dynamic = 0;
2014 etap_time_t start_miss_time;
2015
2016 ETAP_STAMP(lock_event_table(l), trace, dynamic);
2017
2018 if (trace & ETAP_CONTENTION)
2019 ETAP_TIMESTAMP(start_miss_time);
2020
2021 return(start_miss_time);
2022 }
2023
2024 /*
2025 * ROUTINE: etap_simplelock_hold()
2026 *
2027 * FUNCTION: This spin lock routine is ALWAYS called once the lock
2028 * is acquired. Here, the contention time is calculated and
2029 * the start hold time is stamped.
2030 *
2031 * PARAMETERS:
2032 * - lock address.
2033 * - PC of the calling function.
2034 * - start wait timestamp.
2035 *
2036 */
2037
2038 void
2039 etap_simplelock_hold (
2040 simple_lock_t l,
2041 pc_t pc,
2042 etap_time_t start_hold_time)
2043 {
2044 unsigned short dynamic = 0;
2045 unsigned short trace = 0;
2046 etap_time_t total_time;
2047 etap_time_t stop_hold_time;
2048
2049 ETAP_STAMP(lock_event_table(l), trace, dynamic);
2050
2051 MON_ASSIGN_PC(l->start_pc, pc, trace);
2052
2053 /* do not collect wait data if lock was free */
2054 if (ETAP_TIME_IS_ZERO(start_hold_time) && (trace & ETAP_CONTENTION)) {
2055 ETAP_TIMESTAMP(stop_hold_time);
2056 ETAP_TOTAL_TIME(total_time,
2057 stop_hold_time,
2058 start_hold_time);
2059 CUM_WAIT_ACCUMULATE(l->cbuff_entry, total_time, dynamic, trace);
2060 MON_DATA_COLLECT(l,
2061 l,
2062 total_time,
2063 SPIN_LOCK,
2064 MON_CONTENTION,
2065 trace);
2066 ETAP_COPY_START_HOLD_TIME(&l->u.s, stop_hold_time, trace);
2067 }
2068 else
2069 ETAP_DURATION_TIMESTAMP(&l->u.s, trace);
2070 }
2071
2072 void
2073 etap_mutex_init (
2074 mutex_t *l,
2075 etap_event_t event)
2076 {
2077 ETAP_CLEAR_TRACE_DATA(l);
2078 etap_event_table_assign(&l->u.event_table_chain, event);
2079
2080 #if ETAP_LOCK_ACCUMULATE
2081 /* reserve an entry in the cumulative buffer */
2082 l->cbuff_entry = etap_cbuff_reserve(lock_event_table(l));
2083 /* initialize the entry if one was returned */
2084 if (l->cbuff_entry != CBUFF_ENTRY_NULL) {
2085 l->cbuff_entry->event = event;
2086 l->cbuff_entry->instance = (unsigned long) l;
2087 l->cbuff_entry->kind = MUTEX_LOCK;
2088 }
2089 #endif /* ETAP_LOCK_ACCUMULATE */
2090 }
2091
2092 etap_time_t
2093 etap_mutex_miss (
2094 mutex_t *l)
2095 {
2096 unsigned short trace = 0;
2097 unsigned short dynamic = 0;
2098 etap_time_t start_miss_time;
2099
2100 ETAP_STAMP(lock_event_table(l), trace, dynamic);
2101
2102 if (trace & ETAP_CONTENTION)
2103 ETAP_TIMESTAMP(start_miss_time);
2104 else
2105 ETAP_TIME_CLEAR(start_miss_time);
2106
2107 return(start_miss_time);
2108 }
2109
2110 void
2111 etap_mutex_hold (
2112 mutex_t *l,
2113 pc_t pc,
2114 etap_time_t start_hold_time)
2115 {
2116 unsigned short dynamic = 0;
2117 unsigned short trace = 0;
2118 etap_time_t total_time;
2119 etap_time_t stop_hold_time;
2120
2121 ETAP_STAMP(lock_event_table(l), trace, dynamic);
2122
2123 MON_ASSIGN_PC(l->start_pc, pc, trace);
2124
2125 /* do not collect wait data if lock was free */
2126 if (!ETAP_TIME_IS_ZERO(start_hold_time) && (trace & ETAP_CONTENTION)) {
2127 ETAP_TIMESTAMP(stop_hold_time);
2128 ETAP_TOTAL_TIME(total_time,
2129 stop_hold_time,
2130 start_hold_time);
2131 CUM_WAIT_ACCUMULATE(l->cbuff_entry, total_time, dynamic, trace);
2132 MON_DATA_COLLECT(l,
2133 l,
2134 total_time,
2135 MUTEX_LOCK,
2136 MON_CONTENTION,
2137 trace);
2138 ETAP_COPY_START_HOLD_TIME(&l->u.s, stop_hold_time, trace);
2139 }
2140 else
2141 ETAP_DURATION_TIMESTAMP(&l->u.s, trace);
2142 }
2143
2144 void
2145 etap_mutex_unlock(
2146 mutex_t *l)
2147 {
2148 unsigned short dynamic = 0;
2149 unsigned short trace = 0;
2150 etap_time_t total_time;
2151 etap_time_t stop_hold_time;
2152 pc_t pc;
2153
2154 OBTAIN_PC(pc, l);
2155 ETAP_STAMP(lock_event_table(l), trace, dynamic);
2156
2157 /*
2158 * Calculate & collect hold time data only if
2159 * the hold tracing was enabled throughout the
2160 * whole operation. This prevents collection of
2161 * bogus data caused by mid-operation trace changes.
2162 *
2163 */
2164
2165 if (ETAP_DURATION_ENABLED(trace) && ETAP_WHOLE_OP(l)) {
2166 ETAP_TIMESTAMP(stop_hold_time);
2167 ETAP_TOTAL_TIME(total_time, stop_hold_time,
2168 l->u.s.start_hold_time);
2169 CUM_HOLD_ACCUMULATE(l->cbuff_entry, total_time, dynamic, trace);
2170 MON_ASSIGN_PC(l->end_pc, pc, trace);
2171 MON_DATA_COLLECT(l,
2172 l,
2173 total_time,
2174 MUTEX_LOCK,
2175 MON_DURATION,
2176 trace);
2177 }
2178 ETAP_CLEAR_TRACE_DATA(l);
2179 }
2180
2181 #endif /* ETAP_LOCK_TRACE */