]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kdebug.c
xnu-4570.71.2.tar.gz
[apple/xnu.git] / bsd / kern / kdebug.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @Apple_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
21 */
22
23 #include <sys/errno.h>
24 #include <sys/param.h>
25 #include <sys/systm.h>
26 #include <sys/proc_internal.h>
27 #include <sys/vm.h>
28 #include <sys/sysctl.h>
29 #include <sys/kdebug.h>
30 #include <sys/kauth.h>
31 #include <sys/ktrace.h>
32 #include <sys/sysproto.h>
33 #include <sys/bsdtask_info.h>
34 #include <sys/random.h>
35
36 #include <mach/clock_types.h>
37 #include <mach/mach_types.h>
38 #include <mach/mach_time.h>
39 #include <mach/mach_vm.h>
40 #include <machine/machine_routines.h>
41
42 #include <mach/machine.h>
43 #include <mach/vm_map.h>
44
45 #if defined(__i386__) || defined(__x86_64__)
46 #include <i386/rtclock_protos.h>
47 #include <i386/mp.h>
48 #include <i386/machine_routines.h>
49 #include <i386/tsc.h>
50 #endif
51
52 #include <kern/clock.h>
53
54 #include <kern/thread.h>
55 #include <kern/task.h>
56 #include <kern/debug.h>
57 #include <kern/kalloc.h>
58 #include <kern/cpu_data.h>
59 #include <kern/assert.h>
60 #include <kern/telemetry.h>
61 #include <kern/sched_prim.h>
62 #include <vm/vm_kern.h>
63 #include <sys/lock.h>
64 #include <kperf/kperf.h>
65 #include <pexpert/device_tree.h>
66
67 #include <sys/malloc.h>
68 #include <sys/mcache.h>
69
70 #include <sys/vnode.h>
71 #include <sys/vnode_internal.h>
72 #include <sys/fcntl.h>
73 #include <sys/file_internal.h>
74 #include <sys/ubc.h>
75 #include <sys/param.h> /* for isset() */
76
77 #include <mach/mach_host.h> /* for host_info() */
78 #include <libkern/OSAtomic.h>
79
80 #include <machine/pal_routines.h>
81 #include <machine/atomic.h>
82
83 /*
84 * IOP(s)
85 *
86 * https://coreoswiki.apple.com/wiki/pages/U6z3i0q9/Consistent_Logging_Implementers_Guide.html
87 *
88 * IOP(s) are auxiliary cores that want to participate in kdebug event logging.
89 * They are registered dynamically. Each is assigned a cpu_id at registration.
90 *
91 * NOTE: IOP trace events may not use the same clock hardware as "normal"
92 * cpus. There is an effort made to synchronize the IOP timebase with the
93 * AP, but it should be understood that there may be discrepancies.
94 *
95 * Once registered, an IOP is permanent, it cannot be unloaded/unregistered.
96 * The current implementation depends on this for thread safety.
97 *
98 * New registrations occur by allocating an kd_iop struct and assigning
99 * a provisional cpu_id of list_head->cpu_id + 1. Then a CAS to claim the
100 * list_head pointer resolves any races.
101 *
102 * You may safely walk the kd_iops list at any time, without holding locks.
103 *
104 * When allocating buffers, the current kd_iops head is captured. Any operations
105 * that depend on the buffer state (such as flushing IOP traces on reads,
106 * etc.) should use the captured list head. This will allow registrations to
107 * take place while trace is in use.
108 */
109
110 typedef struct kd_iop {
111 kd_callback_t callback;
112 uint32_t cpu_id;
113 uint64_t last_timestamp; /* Prevent timer rollback */
114 struct kd_iop* next;
115 } kd_iop_t;
116
117 static kd_iop_t* kd_iops = NULL;
118
119 /*
120 * Typefilter(s)
121 *
122 * A typefilter is a 8KB bitmap that is used to selectively filter events
123 * being recorded. It is able to individually address every class & subclass.
124 *
125 * There is a shared typefilter in the kernel which is lazily allocated. Once
126 * allocated, the shared typefilter is never deallocated. The shared typefilter
127 * is also mapped on demand into userspace processes that invoke kdebug_trace
128 * API from Libsyscall. When mapped into a userspace process, the memory is
129 * read only, and does not have a fixed address.
130 *
131 * It is a requirement that the kernel's shared typefilter always pass DBG_TRACE
132 * events. This is enforced automatically, by having the needed bits set any
133 * time the shared typefilter is mutated.
134 */
135
136 typedef uint8_t* typefilter_t;
137
138 static typefilter_t kdbg_typefilter;
139 static mach_port_t kdbg_typefilter_memory_entry;
140
141 /*
142 * There are 3 combinations of page sizes:
143 *
144 * 4KB / 4KB
145 * 4KB / 16KB
146 * 16KB / 16KB
147 *
148 * The typefilter is exactly 8KB. In the first two scenarios, we would like
149 * to use 2 pages exactly; in the third scenario we must make certain that
150 * a full page is allocated so we do not inadvertantly share 8KB of random
151 * data to userspace. The round_page_32 macro rounds to kernel page size.
152 */
153 #define TYPEFILTER_ALLOC_SIZE MAX(round_page_32(KDBG_TYPEFILTER_BITMAP_SIZE), KDBG_TYPEFILTER_BITMAP_SIZE)
154
155 static typefilter_t typefilter_create(void)
156 {
157 typefilter_t tf;
158 if (KERN_SUCCESS == kmem_alloc(kernel_map, (vm_offset_t*)&tf, TYPEFILTER_ALLOC_SIZE, VM_KERN_MEMORY_DIAG)) {
159 memset(&tf[KDBG_TYPEFILTER_BITMAP_SIZE], 0, TYPEFILTER_ALLOC_SIZE - KDBG_TYPEFILTER_BITMAP_SIZE);
160 return tf;
161 }
162 return NULL;
163 }
164
165 static void typefilter_deallocate(typefilter_t tf)
166 {
167 assert(tf != NULL);
168 assert(tf != kdbg_typefilter);
169 kmem_free(kernel_map, (vm_offset_t)tf, TYPEFILTER_ALLOC_SIZE);
170 }
171
172 static void typefilter_copy(typefilter_t dst, typefilter_t src)
173 {
174 assert(src != NULL);
175 assert(dst != NULL);
176 memcpy(dst, src, KDBG_TYPEFILTER_BITMAP_SIZE);
177 }
178
179 static void typefilter_reject_all(typefilter_t tf)
180 {
181 assert(tf != NULL);
182 memset(tf, 0, KDBG_TYPEFILTER_BITMAP_SIZE);
183 }
184
185 static void typefilter_allow_class(typefilter_t tf, uint8_t class)
186 {
187 assert(tf != NULL);
188 const uint32_t BYTES_PER_CLASS = 256 / 8; // 256 subclasses, 1 bit each
189 memset(&tf[class * BYTES_PER_CLASS], 0xFF, BYTES_PER_CLASS);
190 }
191
192 static void typefilter_allow_csc(typefilter_t tf, uint16_t csc)
193 {
194 assert(tf != NULL);
195 setbit(tf, csc);
196 }
197
198 static bool typefilter_is_debugid_allowed(typefilter_t tf, uint32_t id)
199 {
200 assert(tf != NULL);
201 return isset(tf, KDBG_EXTRACT_CSC(id));
202 }
203
204 static mach_port_t typefilter_create_memory_entry(typefilter_t tf)
205 {
206 assert(tf != NULL);
207
208 mach_port_t memory_entry = MACH_PORT_NULL;
209 memory_object_size_t size = TYPEFILTER_ALLOC_SIZE;
210
211 mach_make_memory_entry_64(kernel_map,
212 &size,
213 (memory_object_offset_t)tf,
214 VM_PROT_READ,
215 &memory_entry,
216 MACH_PORT_NULL);
217
218 return memory_entry;
219 }
220
221 static int kdbg_copyin_typefilter(user_addr_t addr, size_t size);
222 static void kdbg_enable_typefilter(void);
223 static void kdbg_disable_typefilter(void);
224
225 /*
226 * External prototypes
227 */
228
229 void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *);
230 int cpu_number(void); /* XXX <machine/...> include path broken */
231 void commpage_update_kdebug_state(void); /* XXX sign */
232
233 extern int log_leaks;
234
235 /*
236 * This flag is for testing purposes only -- it's highly experimental and tools
237 * have not been updated to support it.
238 */
239 static bool kdbg_continuous_time = false;
240
241 static inline uint64_t
242 kdbg_timestamp(void)
243 {
244 if (kdbg_continuous_time) {
245 return mach_continuous_time();
246 } else {
247 return mach_absolute_time();
248 }
249 }
250
251 #if KDEBUG_MOJO_TRACE
252 #include <sys/kdebugevents.h>
253 static void kdebug_serial_print( /* forward */
254 uint32_t, uint32_t, uint64_t,
255 uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
256 #endif
257
258 int kdbg_control(int *, u_int, user_addr_t, size_t *);
259
260 static int kdbg_read(user_addr_t, size_t *, vnode_t, vfs_context_t, uint32_t);
261 static int kdbg_readcpumap(user_addr_t, size_t *);
262 static int kdbg_readthrmap_v3(user_addr_t, size_t, int);
263 static int kdbg_readcurthrmap(user_addr_t, size_t *);
264 static int kdbg_setreg(kd_regtype *);
265 static int kdbg_setpidex(kd_regtype *);
266 static int kdbg_setpid(kd_regtype *);
267 static void kdbg_thrmap_init(void);
268 static int kdbg_reinit(boolean_t);
269 static int kdbg_bootstrap(boolean_t);
270 static int kdbg_test(size_t flavor);
271
272 static int kdbg_write_v1_header(boolean_t write_thread_map, vnode_t vp, vfs_context_t ctx);
273 static int kdbg_write_thread_map(vnode_t vp, vfs_context_t ctx);
274 static int kdbg_copyout_thread_map(user_addr_t buffer, size_t *buffer_size);
275 static void kdbg_clear_thread_map(void);
276
277 static boolean_t kdbg_wait(uint64_t timeout_ms, boolean_t locked_wait);
278 static void kdbg_wakeup(void);
279
280 int kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count,
281 uint8_t** cpumap, uint32_t* cpumap_size);
282
283 static kd_threadmap *kdbg_thrmap_init_internal(unsigned int count,
284 unsigned int *mapsize,
285 unsigned int *mapcount);
286
287 static boolean_t kdebug_current_proc_enabled(uint32_t debugid);
288 static errno_t kdebug_check_trace_string(uint32_t debugid, uint64_t str_id);
289
290 int kdbg_write_v3_header(user_addr_t, size_t *, int);
291 int kdbg_write_v3_chunk_header(user_addr_t buffer, uint32_t tag,
292 uint32_t sub_tag, uint64_t length,
293 vnode_t vp, vfs_context_t ctx);
294
295 user_addr_t kdbg_write_v3_event_chunk_header(user_addr_t buffer, uint32_t tag,
296 uint64_t length, vnode_t vp,
297 vfs_context_t ctx);
298
299 // Helper functions
300
301 static int create_buffers(boolean_t);
302 static void delete_buffers(void);
303
304 extern int tasks_count;
305 extern int threads_count;
306 extern char *proc_best_name(proc_t p);
307 extern void IOSleep(int);
308
309 /* trace enable status */
310 unsigned int kdebug_enable = 0;
311
312 /* A static buffer to record events prior to the start of regular logging */
313
314 #define KD_EARLY_BUFFER_SIZE (16 * 1024)
315 #define KD_EARLY_BUFFER_NBUFS (KD_EARLY_BUFFER_SIZE / sizeof(kd_buf))
316 #if CONFIG_EMBEDDED
317 /*
318 * On embedded, the space for this is carved out by osfmk/arm/data.s -- clang
319 * has problems aligning to greater than 4K.
320 */
321 extern kd_buf kd_early_buffer[KD_EARLY_BUFFER_NBUFS];
322 #else /* CONFIG_EMBEDDED */
323 __attribute__((aligned(KD_EARLY_BUFFER_SIZE)))
324 static kd_buf kd_early_buffer[KD_EARLY_BUFFER_NBUFS];
325 #endif /* !CONFIG_EMBEDDED */
326
327 static unsigned int kd_early_index = 0;
328 static bool kd_early_overflow = false;
329 static bool kd_early_done = false;
330
331 #define SLOW_NOLOG 0x01
332 #define SLOW_CHECKS 0x02
333
334 #define EVENTS_PER_STORAGE_UNIT 2048
335 #define MIN_STORAGE_UNITS_PER_CPU 4
336
337 #define POINTER_FROM_KDS_PTR(x) (&kd_bufs[x.buffer_index].kdsb_addr[x.offset])
338
339 union kds_ptr {
340 struct {
341 uint32_t buffer_index:21;
342 uint16_t offset:11;
343 };
344 uint32_t raw;
345 };
346
347 struct kd_storage {
348 union kds_ptr kds_next;
349 uint32_t kds_bufindx;
350 uint32_t kds_bufcnt;
351 uint32_t kds_readlast;
352 boolean_t kds_lostevents;
353 uint64_t kds_timestamp;
354
355 kd_buf kds_records[EVENTS_PER_STORAGE_UNIT];
356 };
357
358 #define MAX_BUFFER_SIZE (1024 * 1024 * 128)
359 #define N_STORAGE_UNITS_PER_BUFFER (MAX_BUFFER_SIZE / sizeof(struct kd_storage))
360 static_assert(N_STORAGE_UNITS_PER_BUFFER <= 0x7ff,
361 "shoudn't overflow kds_ptr.offset");
362
363 struct kd_storage_buffers {
364 struct kd_storage *kdsb_addr;
365 uint32_t kdsb_size;
366 };
367
368 #define KDS_PTR_NULL 0xffffffff
369 struct kd_storage_buffers *kd_bufs = NULL;
370 int n_storage_units = 0;
371 unsigned int n_storage_buffers = 0;
372 int n_storage_threshold = 0;
373 int kds_waiter = 0;
374
375 #pragma pack(0)
376 struct kd_bufinfo {
377 union kds_ptr kd_list_head;
378 union kds_ptr kd_list_tail;
379 boolean_t kd_lostevents;
380 uint32_t _pad;
381 uint64_t kd_prev_timebase;
382 uint32_t num_bufs;
383 } __attribute__(( aligned(MAX_CPU_CACHE_LINE_SIZE) ));
384
385
386 /*
387 * In principle, this control block can be shared in DRAM with other
388 * coprocessors and runtimes, for configuring what tracing is enabled.
389 */
390 struct kd_ctrl_page_t {
391 union kds_ptr kds_free_list;
392 uint32_t enabled :1;
393 uint32_t _pad0 :31;
394 int kds_inuse_count;
395 uint32_t kdebug_flags;
396 uint32_t kdebug_slowcheck;
397 uint64_t oldest_time;
398 /*
399 * The number of kd_bufinfo structs allocated may not match the current
400 * number of active cpus. We capture the iops list head at initialization
401 * which we could use to calculate the number of cpus we allocated data for,
402 * unless it happens to be null. To avoid that case, we explicitly also
403 * capture a cpu count.
404 */
405 kd_iop_t* kdebug_iops;
406 uint32_t kdebug_cpus;
407 } kd_ctrl_page = {
408 .kds_free_list = {.raw = KDS_PTR_NULL},
409 .kdebug_slowcheck = SLOW_NOLOG,
410 .oldest_time = 0
411 };
412
413 #pragma pack()
414
415 struct kd_bufinfo *kdbip = NULL;
416
417 #define KDCOPYBUF_COUNT 8192
418 #define KDCOPYBUF_SIZE (KDCOPYBUF_COUNT * sizeof(kd_buf))
419
420 #define PAGE_4KB 4096
421 #define PAGE_16KB 16384
422
423 kd_buf *kdcopybuf = NULL;
424
425 unsigned int nkdbufs = 0;
426 unsigned int kdlog_beg=0;
427 unsigned int kdlog_end=0;
428 unsigned int kdlog_value1=0;
429 unsigned int kdlog_value2=0;
430 unsigned int kdlog_value3=0;
431 unsigned int kdlog_value4=0;
432
433 static lck_spin_t * kdw_spin_lock;
434 static lck_spin_t * kds_spin_lock;
435
436 kd_threadmap *kd_mapptr = 0;
437 unsigned int kd_mapsize = 0;
438 unsigned int kd_mapcount = 0;
439
440 off_t RAW_file_offset = 0;
441 int RAW_file_written = 0;
442
443 #define RAW_FLUSH_SIZE (2 * 1024 * 1024)
444
445 /*
446 * A globally increasing counter for identifying strings in trace. Starts at
447 * 1 because 0 is a reserved return value.
448 */
449 __attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE)))
450 static uint64_t g_curr_str_id = 1;
451
452 #define STR_ID_SIG_OFFSET (48)
453 #define STR_ID_MASK ((1ULL << STR_ID_SIG_OFFSET) - 1)
454 #define STR_ID_SIG_MASK (~STR_ID_MASK)
455
456 /*
457 * A bit pattern for identifying string IDs generated by
458 * kdebug_trace_string(2).
459 */
460 static uint64_t g_str_id_signature = (0x70acULL << STR_ID_SIG_OFFSET);
461
462 #define INTERRUPT 0x01050000
463 #define MACH_vmfault 0x01300008
464 #define BSC_SysCall 0x040c0000
465 #define MACH_SysCall 0x010c0000
466
467 /* task to string structure */
468 struct tts
469 {
470 task_t task; /* from procs task */
471 pid_t pid; /* from procs p_pid */
472 char task_comm[20]; /* from procs p_comm */
473 };
474
475 typedef struct tts tts_t;
476
477 struct krt
478 {
479 kd_threadmap *map; /* pointer to the map buffer */
480 int count;
481 int maxcount;
482 struct tts *atts;
483 };
484
485 typedef struct krt krt_t;
486
487 static uint32_t
488 kdbg_cpu_count(boolean_t early_trace)
489 {
490 if (early_trace) {
491 #if CONFIG_EMBEDDED
492 return ml_get_cpu_count();
493 #else
494 return max_ncpus;
495 #endif
496 }
497
498 host_basic_info_data_t hinfo;
499 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
500 host_info((host_t)1 /* BSD_HOST */, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
501 assert(hinfo.logical_cpu_max > 0);
502 return hinfo.logical_cpu_max;
503 }
504
505 #if MACH_ASSERT
506 #if CONFIG_EMBEDDED
507 static boolean_t
508 kdbg_iop_list_is_valid(kd_iop_t* iop)
509 {
510 if (iop) {
511 /* Is list sorted by cpu_id? */
512 kd_iop_t* temp = iop;
513 do {
514 assert(!temp->next || temp->next->cpu_id == temp->cpu_id - 1);
515 assert(temp->next || (temp->cpu_id == kdbg_cpu_count(FALSE) || temp->cpu_id == kdbg_cpu_count(TRUE)));
516 } while ((temp = temp->next));
517
518 /* Does each entry have a function and a name? */
519 temp = iop;
520 do {
521 assert(temp->callback.func);
522 assert(strlen(temp->callback.iop_name) < sizeof(temp->callback.iop_name));
523 } while ((temp = temp->next));
524 }
525
526 return TRUE;
527 }
528
529 static boolean_t
530 kdbg_iop_list_contains_cpu_id(kd_iop_t* list, uint32_t cpu_id)
531 {
532 while (list) {
533 if (list->cpu_id == cpu_id)
534 return TRUE;
535 list = list->next;
536 }
537
538 return FALSE;
539 }
540 #endif /* CONFIG_EMBEDDED */
541 #endif /* MACH_ASSERT */
542
543 static void
544 kdbg_iop_list_callback(kd_iop_t* iop, kd_callback_type type, void* arg)
545 {
546 while (iop) {
547 iop->callback.func(iop->callback.context, type, arg);
548 iop = iop->next;
549 }
550 }
551
552 static void
553 kdbg_set_tracing_enabled(boolean_t enabled, uint32_t trace_type)
554 {
555 int s = ml_set_interrupts_enabled(FALSE);
556 lck_spin_lock(kds_spin_lock);
557 if (enabled) {
558 /*
559 * The oldest valid time is now; reject old events from IOPs.
560 */
561 kd_ctrl_page.oldest_time = kdbg_timestamp();
562 kdebug_enable |= trace_type;
563 kd_ctrl_page.kdebug_slowcheck &= ~SLOW_NOLOG;
564 kd_ctrl_page.enabled = 1;
565 commpage_update_kdebug_state();
566 } else {
567 kdebug_enable &= ~(KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_PPT);
568 kd_ctrl_page.kdebug_slowcheck |= SLOW_NOLOG;
569 kd_ctrl_page.enabled = 0;
570 commpage_update_kdebug_state();
571 }
572 lck_spin_unlock(kds_spin_lock);
573 ml_set_interrupts_enabled(s);
574
575 if (enabled) {
576 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_KDEBUG_ENABLED, NULL);
577 } else {
578 /*
579 * If you do not flush the IOP trace buffers, they can linger
580 * for a considerable period; consider code which disables and
581 * deallocates without a final sync flush.
582 */
583 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_KDEBUG_DISABLED, NULL);
584 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_SYNC_FLUSH, NULL);
585 }
586 }
587
588 static void
589 kdbg_set_flags(int slowflag, int enableflag, boolean_t enabled)
590 {
591 int s = ml_set_interrupts_enabled(FALSE);
592 lck_spin_lock(kds_spin_lock);
593
594 if (enabled) {
595 kd_ctrl_page.kdebug_slowcheck |= slowflag;
596 kdebug_enable |= enableflag;
597 } else {
598 kd_ctrl_page.kdebug_slowcheck &= ~slowflag;
599 kdebug_enable &= ~enableflag;
600 }
601
602 lck_spin_unlock(kds_spin_lock);
603 ml_set_interrupts_enabled(s);
604 }
605
606 /*
607 * Disable wrapping and return true if trace wrapped, false otherwise.
608 */
609 boolean_t
610 disable_wrap(uint32_t *old_slowcheck, uint32_t *old_flags)
611 {
612 boolean_t wrapped;
613 int s = ml_set_interrupts_enabled(FALSE);
614 lck_spin_lock(kds_spin_lock);
615
616 *old_slowcheck = kd_ctrl_page.kdebug_slowcheck;
617 *old_flags = kd_ctrl_page.kdebug_flags;
618
619 wrapped = kd_ctrl_page.kdebug_flags & KDBG_WRAPPED;
620 kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
621 kd_ctrl_page.kdebug_flags |= KDBG_NOWRAP;
622
623 lck_spin_unlock(kds_spin_lock);
624 ml_set_interrupts_enabled(s);
625
626 return wrapped;
627 }
628
629 void
630 enable_wrap(uint32_t old_slowcheck, boolean_t lostevents)
631 {
632 int s = ml_set_interrupts_enabled(FALSE);
633 lck_spin_lock(kds_spin_lock);
634
635 kd_ctrl_page.kdebug_flags &= ~KDBG_NOWRAP;
636
637 if ( !(old_slowcheck & SLOW_NOLOG))
638 kd_ctrl_page.kdebug_slowcheck &= ~SLOW_NOLOG;
639
640 if (lostevents == TRUE)
641 kd_ctrl_page.kdebug_flags |= KDBG_WRAPPED;
642
643 lck_spin_unlock(kds_spin_lock);
644 ml_set_interrupts_enabled(s);
645 }
646
647 static int
648 create_buffers(boolean_t early_trace)
649 {
650 unsigned int i;
651 unsigned int p_buffer_size;
652 unsigned int f_buffer_size;
653 unsigned int f_buffers;
654 int error = 0;
655
656 /*
657 * For the duration of this allocation, trace code will only reference
658 * kdebug_iops. Any iops registered after this enabling will not be
659 * messaged until the buffers are reallocated.
660 *
661 * TLDR; Must read kd_iops once and only once!
662 */
663 kd_ctrl_page.kdebug_iops = kd_iops;
664
665 #if CONFIG_EMBEDDED
666 assert(kdbg_iop_list_is_valid(kd_ctrl_page.kdebug_iops));
667 #endif
668
669 /*
670 * If the list is valid, it is sorted, newest -> oldest. Each iop entry
671 * has a cpu_id of "the older entry + 1", so the highest cpu_id will
672 * be the list head + 1.
673 */
674
675 kd_ctrl_page.kdebug_cpus = kd_ctrl_page.kdebug_iops ? kd_ctrl_page.kdebug_iops->cpu_id + 1 : kdbg_cpu_count(early_trace);
676
677 if (kmem_alloc(kernel_map, (vm_offset_t *)&kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
678 error = ENOSPC;
679 goto out;
680 }
681
682 if (nkdbufs < (kd_ctrl_page.kdebug_cpus * EVENTS_PER_STORAGE_UNIT * MIN_STORAGE_UNITS_PER_CPU))
683 n_storage_units = kd_ctrl_page.kdebug_cpus * MIN_STORAGE_UNITS_PER_CPU;
684 else
685 n_storage_units = nkdbufs / EVENTS_PER_STORAGE_UNIT;
686
687 nkdbufs = n_storage_units * EVENTS_PER_STORAGE_UNIT;
688
689 f_buffers = n_storage_units / N_STORAGE_UNITS_PER_BUFFER;
690 n_storage_buffers = f_buffers;
691
692 f_buffer_size = N_STORAGE_UNITS_PER_BUFFER * sizeof(struct kd_storage);
693 p_buffer_size = (n_storage_units % N_STORAGE_UNITS_PER_BUFFER) * sizeof(struct kd_storage);
694
695 if (p_buffer_size)
696 n_storage_buffers++;
697
698 kd_bufs = NULL;
699
700 if (kdcopybuf == 0) {
701 if (kmem_alloc(kernel_map, (vm_offset_t *)&kdcopybuf, (vm_size_t)KDCOPYBUF_SIZE, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
702 error = ENOSPC;
703 goto out;
704 }
705 }
706 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers)), VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
707 error = ENOSPC;
708 goto out;
709 }
710 bzero(kd_bufs, n_storage_buffers * sizeof(struct kd_storage_buffers));
711
712 for (i = 0; i < f_buffers; i++) {
713 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)f_buffer_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
714 error = ENOSPC;
715 goto out;
716 }
717 bzero(kd_bufs[i].kdsb_addr, f_buffer_size);
718
719 kd_bufs[i].kdsb_size = f_buffer_size;
720 }
721 if (p_buffer_size) {
722 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)p_buffer_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
723 error = ENOSPC;
724 goto out;
725 }
726 bzero(kd_bufs[i].kdsb_addr, p_buffer_size);
727
728 kd_bufs[i].kdsb_size = p_buffer_size;
729 }
730 n_storage_units = 0;
731
732 for (i = 0; i < n_storage_buffers; i++) {
733 struct kd_storage *kds;
734 int n_elements;
735 int n;
736
737 n_elements = kd_bufs[i].kdsb_size / sizeof(struct kd_storage);
738 kds = kd_bufs[i].kdsb_addr;
739
740 for (n = 0; n < n_elements; n++) {
741 kds[n].kds_next.buffer_index = kd_ctrl_page.kds_free_list.buffer_index;
742 kds[n].kds_next.offset = kd_ctrl_page.kds_free_list.offset;
743
744 kd_ctrl_page.kds_free_list.buffer_index = i;
745 kd_ctrl_page.kds_free_list.offset = n;
746 }
747 n_storage_units += n_elements;
748 }
749
750 bzero((char *)kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus);
751
752 for (i = 0; i < kd_ctrl_page.kdebug_cpus; i++) {
753 kdbip[i].kd_list_head.raw = KDS_PTR_NULL;
754 kdbip[i].kd_list_tail.raw = KDS_PTR_NULL;
755 kdbip[i].kd_lostevents = FALSE;
756 kdbip[i].num_bufs = 0;
757 }
758
759 kd_ctrl_page.kdebug_flags |= KDBG_BUFINIT;
760
761 kd_ctrl_page.kds_inuse_count = 0;
762 n_storage_threshold = n_storage_units / 2;
763 out:
764 if (error)
765 delete_buffers();
766
767 return(error);
768 }
769
770 static void
771 delete_buffers(void)
772 {
773 unsigned int i;
774
775 if (kd_bufs) {
776 for (i = 0; i < n_storage_buffers; i++) {
777 if (kd_bufs[i].kdsb_addr) {
778 kmem_free(kernel_map, (vm_offset_t)kd_bufs[i].kdsb_addr, (vm_size_t)kd_bufs[i].kdsb_size);
779 }
780 }
781 kmem_free(kernel_map, (vm_offset_t)kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers)));
782
783 kd_bufs = NULL;
784 n_storage_buffers = 0;
785 }
786 if (kdcopybuf) {
787 kmem_free(kernel_map, (vm_offset_t)kdcopybuf, KDCOPYBUF_SIZE);
788
789 kdcopybuf = NULL;
790 }
791 kd_ctrl_page.kds_free_list.raw = KDS_PTR_NULL;
792
793 if (kdbip) {
794 kmem_free(kernel_map, (vm_offset_t)kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus);
795
796 kdbip = NULL;
797 }
798 kd_ctrl_page.kdebug_iops = NULL;
799 kd_ctrl_page.kdebug_cpus = 0;
800 kd_ctrl_page.kdebug_flags &= ~KDBG_BUFINIT;
801 }
802
803 void
804 release_storage_unit(int cpu, uint32_t kdsp_raw)
805 {
806 int s = 0;
807 struct kd_storage *kdsp_actual;
808 struct kd_bufinfo *kdbp;
809 union kds_ptr kdsp;
810
811 kdsp.raw = kdsp_raw;
812
813 s = ml_set_interrupts_enabled(FALSE);
814 lck_spin_lock(kds_spin_lock);
815
816 kdbp = &kdbip[cpu];
817
818 if (kdsp.raw == kdbp->kd_list_head.raw) {
819 /*
820 * it's possible for the storage unit pointed to
821 * by kdsp to have already been stolen... so
822 * check to see if it's still the head of the list
823 * now that we're behind the lock that protects
824 * adding and removing from the queue...
825 * since we only ever release and steal units from
826 * that position, if it's no longer the head
827 * we having nothing to do in this context
828 */
829 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
830 kdbp->kd_list_head = kdsp_actual->kds_next;
831
832 kdsp_actual->kds_next = kd_ctrl_page.kds_free_list;
833 kd_ctrl_page.kds_free_list = kdsp;
834
835 kd_ctrl_page.kds_inuse_count--;
836 }
837 lck_spin_unlock(kds_spin_lock);
838 ml_set_interrupts_enabled(s);
839 }
840
841
842 boolean_t
843 allocate_storage_unit(int cpu)
844 {
845 union kds_ptr kdsp;
846 struct kd_storage *kdsp_actual, *kdsp_next_actual;
847 struct kd_bufinfo *kdbp, *kdbp_vict, *kdbp_try;
848 uint64_t oldest_ts, ts;
849 boolean_t retval = TRUE;
850 int s = 0;
851
852 s = ml_set_interrupts_enabled(FALSE);
853 lck_spin_lock(kds_spin_lock);
854
855 kdbp = &kdbip[cpu];
856
857 /* If someone beat us to the allocate, return success */
858 if (kdbp->kd_list_tail.raw != KDS_PTR_NULL) {
859 kdsp_actual = POINTER_FROM_KDS_PTR(kdbp->kd_list_tail);
860
861 if (kdsp_actual->kds_bufindx < EVENTS_PER_STORAGE_UNIT)
862 goto out;
863 }
864
865 if ((kdsp = kd_ctrl_page.kds_free_list).raw != KDS_PTR_NULL) {
866 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
867 kd_ctrl_page.kds_free_list = kdsp_actual->kds_next;
868
869 kd_ctrl_page.kds_inuse_count++;
870 } else {
871 if (kd_ctrl_page.kdebug_flags & KDBG_NOWRAP) {
872 kd_ctrl_page.kdebug_slowcheck |= SLOW_NOLOG;
873 kdbp->kd_lostevents = TRUE;
874 retval = FALSE;
875 goto out;
876 }
877 kdbp_vict = NULL;
878 oldest_ts = UINT64_MAX;
879
880 for (kdbp_try = &kdbip[0]; kdbp_try < &kdbip[kd_ctrl_page.kdebug_cpus]; kdbp_try++) {
881
882 if (kdbp_try->kd_list_head.raw == KDS_PTR_NULL) {
883 /*
884 * no storage unit to steal
885 */
886 continue;
887 }
888
889 kdsp_actual = POINTER_FROM_KDS_PTR(kdbp_try->kd_list_head);
890
891 if (kdsp_actual->kds_bufcnt < EVENTS_PER_STORAGE_UNIT) {
892 /*
893 * make sure we don't steal the storage unit
894 * being actively recorded to... need to
895 * move on because we don't want an out-of-order
896 * set of events showing up later
897 */
898 continue;
899 }
900
901 /*
902 * When wrapping, steal the storage unit with the
903 * earliest timestamp on its last event, instead of the
904 * earliest timestamp on the first event. This allows a
905 * storage unit with more recent events to be preserved,
906 * even if the storage unit contains events that are
907 * older than those found in other CPUs.
908 */
909 ts = kdbg_get_timestamp(&kdsp_actual->kds_records[EVENTS_PER_STORAGE_UNIT - 1]);
910 if (ts < oldest_ts) {
911 oldest_ts = ts;
912 kdbp_vict = kdbp_try;
913 }
914 }
915 if (kdbp_vict == NULL) {
916 kdebug_enable = 0;
917 kd_ctrl_page.enabled = 0;
918 commpage_update_kdebug_state();
919 retval = FALSE;
920 goto out;
921 }
922 kdsp = kdbp_vict->kd_list_head;
923 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
924 kdbp_vict->kd_list_head = kdsp_actual->kds_next;
925
926 if (kdbp_vict->kd_list_head.raw != KDS_PTR_NULL) {
927 kdsp_next_actual = POINTER_FROM_KDS_PTR(kdbp_vict->kd_list_head);
928 kdsp_next_actual->kds_lostevents = TRUE;
929 } else
930 kdbp_vict->kd_lostevents = TRUE;
931
932 kd_ctrl_page.oldest_time = oldest_ts;
933 kd_ctrl_page.kdebug_flags |= KDBG_WRAPPED;
934 }
935 kdsp_actual->kds_timestamp = kdbg_timestamp();
936 kdsp_actual->kds_next.raw = KDS_PTR_NULL;
937 kdsp_actual->kds_bufcnt = 0;
938 kdsp_actual->kds_readlast = 0;
939
940 kdsp_actual->kds_lostevents = kdbp->kd_lostevents;
941 kdbp->kd_lostevents = FALSE;
942 kdsp_actual->kds_bufindx = 0;
943
944 if (kdbp->kd_list_head.raw == KDS_PTR_NULL)
945 kdbp->kd_list_head = kdsp;
946 else
947 POINTER_FROM_KDS_PTR(kdbp->kd_list_tail)->kds_next = kdsp;
948 kdbp->kd_list_tail = kdsp;
949 out:
950 lck_spin_unlock(kds_spin_lock);
951 ml_set_interrupts_enabled(s);
952
953 return (retval);
954 }
955
956 int
957 kernel_debug_register_callback(kd_callback_t callback)
958 {
959 kd_iop_t* iop;
960 if (kmem_alloc(kernel_map, (vm_offset_t *)&iop, sizeof(kd_iop_t), VM_KERN_MEMORY_DIAG) == KERN_SUCCESS) {
961 memcpy(&iop->callback, &callback, sizeof(kd_callback_t));
962
963 /*
964 * <rdar://problem/13351477> Some IOP clients are not providing a name.
965 *
966 * Remove when fixed.
967 */
968 {
969 boolean_t is_valid_name = FALSE;
970 for (uint32_t length=0; length<sizeof(callback.iop_name); ++length) {
971 /* This is roughly isprintable(c) */
972 if (callback.iop_name[length] > 0x20 && callback.iop_name[length] < 0x7F)
973 continue;
974 if (callback.iop_name[length] == 0) {
975 if (length)
976 is_valid_name = TRUE;
977 break;
978 }
979 }
980
981 if (!is_valid_name) {
982 strlcpy(iop->callback.iop_name, "IOP-???", sizeof(iop->callback.iop_name));
983 }
984 }
985
986 iop->last_timestamp = 0;
987
988 do {
989 /*
990 * We use two pieces of state, the old list head
991 * pointer, and the value of old_list_head->cpu_id.
992 * If we read kd_iops more than once, it can change
993 * between reads.
994 *
995 * TLDR; Must not read kd_iops more than once per loop.
996 */
997 iop->next = kd_iops;
998 iop->cpu_id = iop->next ? (iop->next->cpu_id+1) : kdbg_cpu_count(FALSE);
999
1000 /*
1001 * Header says OSCompareAndSwapPtr has a memory barrier
1002 */
1003 } while (!OSCompareAndSwapPtr(iop->next, iop, (void* volatile*)&kd_iops));
1004
1005 return iop->cpu_id;
1006 }
1007
1008 return 0;
1009 }
1010
1011 void
1012 kernel_debug_enter(
1013 uint32_t coreid,
1014 uint32_t debugid,
1015 uint64_t timestamp,
1016 uintptr_t arg1,
1017 uintptr_t arg2,
1018 uintptr_t arg3,
1019 uintptr_t arg4,
1020 uintptr_t threadid
1021 )
1022 {
1023 uint32_t bindx;
1024 kd_buf *kd;
1025 struct kd_bufinfo *kdbp;
1026 struct kd_storage *kdsp_actual;
1027 union kds_ptr kds_raw;
1028
1029 if (kd_ctrl_page.kdebug_slowcheck) {
1030
1031 if ( (kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) || !(kdebug_enable & (KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_PPT)))
1032 goto out1;
1033
1034 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
1035 if (typefilter_is_debugid_allowed(kdbg_typefilter, debugid))
1036 goto record_event;
1037 goto out1;
1038 }
1039 else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
1040 if (debugid >= kdlog_beg && debugid <= kdlog_end)
1041 goto record_event;
1042 goto out1;
1043 }
1044 else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
1045 if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 &&
1046 (debugid & KDBG_EVENTID_MASK) != kdlog_value2 &&
1047 (debugid & KDBG_EVENTID_MASK) != kdlog_value3 &&
1048 (debugid & KDBG_EVENTID_MASK) != kdlog_value4)
1049 goto out1;
1050 }
1051 }
1052
1053 record_event:
1054 if (timestamp < kd_ctrl_page.oldest_time) {
1055 goto out1;
1056 }
1057
1058 #if CONFIG_EMBEDDED
1059 /*
1060 * When start_kern_tracing is called by the kernel to trace very
1061 * early kernel events, it saves data to a secondary buffer until
1062 * it is possible to initialize ktrace, and then dumps the events
1063 * into the ktrace buffer using this method. In this case, iops will
1064 * be NULL, and the coreid will be zero. It is not possible to have
1065 * a valid IOP coreid of zero, so pass if both iops is NULL and coreid
1066 * is zero.
1067 */
1068 assert(kdbg_iop_list_contains_cpu_id(kd_ctrl_page.kdebug_iops, coreid) || (kd_ctrl_page.kdebug_iops == NULL && coreid == 0));
1069 #endif
1070
1071 disable_preemption();
1072
1073 if (kd_ctrl_page.enabled == 0)
1074 goto out;
1075
1076 kdbp = &kdbip[coreid];
1077 timestamp &= KDBG_TIMESTAMP_MASK;
1078
1079 #if KDEBUG_MOJO_TRACE
1080 if (kdebug_enable & KDEBUG_ENABLE_SERIAL)
1081 kdebug_serial_print(coreid, debugid, timestamp,
1082 arg1, arg2, arg3, arg4, threadid);
1083 #endif
1084
1085 retry_q:
1086 kds_raw = kdbp->kd_list_tail;
1087
1088 if (kds_raw.raw != KDS_PTR_NULL) {
1089 kdsp_actual = POINTER_FROM_KDS_PTR(kds_raw);
1090 bindx = kdsp_actual->kds_bufindx;
1091 } else {
1092 kdsp_actual = NULL;
1093 bindx = EVENTS_PER_STORAGE_UNIT;
1094 }
1095
1096 if (kdsp_actual == NULL || bindx >= EVENTS_PER_STORAGE_UNIT) {
1097 if (allocate_storage_unit(coreid) == FALSE) {
1098 /*
1099 * this can only happen if wrapping
1100 * has been disabled
1101 */
1102 goto out;
1103 }
1104 goto retry_q;
1105 }
1106 if ( !OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx))
1107 goto retry_q;
1108
1109 // IOP entries can be allocated before xnu allocates and inits the buffer
1110 if (timestamp < kdsp_actual->kds_timestamp)
1111 kdsp_actual->kds_timestamp = timestamp;
1112
1113 kd = &kdsp_actual->kds_records[bindx];
1114
1115 kd->debugid = debugid;
1116 kd->arg1 = arg1;
1117 kd->arg2 = arg2;
1118 kd->arg3 = arg3;
1119 kd->arg4 = arg4;
1120 kd->arg5 = threadid;
1121
1122 kdbg_set_timestamp_and_cpu(kd, timestamp, coreid);
1123
1124 OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
1125 out:
1126 enable_preemption();
1127 out1:
1128 if ((kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold)) {
1129 kdbg_wakeup();
1130 }
1131 }
1132
1133 static void
1134 kernel_debug_internal(
1135 boolean_t only_filter,
1136 uint32_t debugid,
1137 uintptr_t arg1,
1138 uintptr_t arg2,
1139 uintptr_t arg3,
1140 uintptr_t arg4,
1141 uintptr_t arg5)
1142 {
1143 struct proc *curproc;
1144 uint64_t now;
1145 uint32_t bindx;
1146 kd_buf *kd;
1147 int cpu;
1148 struct kd_bufinfo *kdbp;
1149 struct kd_storage *kdsp_actual;
1150 union kds_ptr kds_raw;
1151
1152 if (kd_ctrl_page.kdebug_slowcheck) {
1153 if ((kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) ||
1154 !(kdebug_enable & (KDEBUG_ENABLE_TRACE | KDEBUG_ENABLE_PPT)))
1155 {
1156 goto out1;
1157 }
1158
1159 if ( !ml_at_interrupt_context()) {
1160 if (kd_ctrl_page.kdebug_flags & KDBG_PIDCHECK) {
1161 /*
1162 * If kdebug flag is not set for current proc, return
1163 */
1164 curproc = current_proc();
1165
1166 if ((curproc && !(curproc->p_kdebug)) &&
1167 ((debugid & 0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE)) &&
1168 (debugid >> 24 != DBG_TRACE))
1169 goto out1;
1170 }
1171 else if (kd_ctrl_page.kdebug_flags & KDBG_PIDEXCLUDE) {
1172 /*
1173 * If kdebug flag is set for current proc, return
1174 */
1175 curproc = current_proc();
1176
1177 if ((curproc && curproc->p_kdebug) &&
1178 ((debugid & 0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE)) &&
1179 (debugid >> 24 != DBG_TRACE))
1180 goto out1;
1181 }
1182 }
1183
1184 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
1185 if (typefilter_is_debugid_allowed(kdbg_typefilter, debugid))
1186 goto record_event;
1187
1188 goto out1;
1189 } else if (only_filter == TRUE) {
1190 goto out1;
1191 }
1192 else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
1193 /* Always record trace system info */
1194 if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE)
1195 goto record_event;
1196
1197 if (debugid < kdlog_beg || debugid > kdlog_end)
1198 goto out1;
1199 }
1200 else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
1201 /* Always record trace system info */
1202 if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE)
1203 goto record_event;
1204
1205 if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 &&
1206 (debugid & KDBG_EVENTID_MASK) != kdlog_value2 &&
1207 (debugid & KDBG_EVENTID_MASK) != kdlog_value3 &&
1208 (debugid & KDBG_EVENTID_MASK) != kdlog_value4)
1209 goto out1;
1210 }
1211 } else if (only_filter == TRUE) {
1212 goto out1;
1213 }
1214
1215 record_event:
1216 disable_preemption();
1217
1218 if (kd_ctrl_page.enabled == 0)
1219 goto out;
1220
1221 cpu = cpu_number();
1222 kdbp = &kdbip[cpu];
1223
1224 #if KDEBUG_MOJO_TRACE
1225 if (kdebug_enable & KDEBUG_ENABLE_SERIAL)
1226 kdebug_serial_print(cpu, debugid,
1227 kdbg_timestamp() & KDBG_TIMESTAMP_MASK,
1228 arg1, arg2, arg3, arg4, arg5);
1229 #endif
1230
1231 retry_q:
1232 kds_raw = kdbp->kd_list_tail;
1233
1234 if (kds_raw.raw != KDS_PTR_NULL) {
1235 kdsp_actual = POINTER_FROM_KDS_PTR(kds_raw);
1236 bindx = kdsp_actual->kds_bufindx;
1237 } else {
1238 kdsp_actual = NULL;
1239 bindx = EVENTS_PER_STORAGE_UNIT;
1240 }
1241
1242 if (kdsp_actual == NULL || bindx >= EVENTS_PER_STORAGE_UNIT) {
1243 if (allocate_storage_unit(cpu) == FALSE) {
1244 /*
1245 * this can only happen if wrapping
1246 * has been disabled
1247 */
1248 goto out;
1249 }
1250 goto retry_q;
1251 }
1252 now = kdbg_timestamp() & KDBG_TIMESTAMP_MASK;
1253
1254 if ( !OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx))
1255 goto retry_q;
1256
1257 kd = &kdsp_actual->kds_records[bindx];
1258
1259 kd->debugid = debugid;
1260 kd->arg1 = arg1;
1261 kd->arg2 = arg2;
1262 kd->arg3 = arg3;
1263 kd->arg4 = arg4;
1264 kd->arg5 = arg5;
1265
1266 kdbg_set_timestamp_and_cpu(kd, now, cpu);
1267
1268 OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
1269
1270 #if KPERF
1271 kperf_kdebug_callback(debugid, __builtin_frame_address(0));
1272 #endif
1273 out:
1274 enable_preemption();
1275 out1:
1276 if (kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold) {
1277 uint32_t etype;
1278 uint32_t stype;
1279
1280 etype = debugid & KDBG_EVENTID_MASK;
1281 stype = debugid & KDBG_CSC_MASK;
1282
1283 if (etype == INTERRUPT || etype == MACH_vmfault ||
1284 stype == BSC_SysCall || stype == MACH_SysCall) {
1285 kdbg_wakeup();
1286 }
1287 }
1288 }
1289
1290 void
1291 kernel_debug(
1292 uint32_t debugid,
1293 uintptr_t arg1,
1294 uintptr_t arg2,
1295 uintptr_t arg3,
1296 uintptr_t arg4,
1297 __unused uintptr_t arg5)
1298 {
1299 kernel_debug_internal(FALSE, debugid, arg1, arg2, arg3, arg4,
1300 (uintptr_t)thread_tid(current_thread()));
1301 }
1302
1303 void
1304 kernel_debug1(
1305 uint32_t debugid,
1306 uintptr_t arg1,
1307 uintptr_t arg2,
1308 uintptr_t arg3,
1309 uintptr_t arg4,
1310 uintptr_t arg5)
1311 {
1312 kernel_debug_internal(FALSE, debugid, arg1, arg2, arg3, arg4, arg5);
1313 }
1314
1315 void
1316 kernel_debug_filtered(
1317 uint32_t debugid,
1318 uintptr_t arg1,
1319 uintptr_t arg2,
1320 uintptr_t arg3,
1321 uintptr_t arg4)
1322 {
1323 kernel_debug_internal(TRUE, debugid, arg1, arg2, arg3, arg4,
1324 (uintptr_t)thread_tid(current_thread()));
1325 }
1326
1327 void
1328 kernel_debug_string_early(const char *message)
1329 {
1330 uintptr_t arg[4] = {0, 0, 0, 0};
1331
1332 /* Stuff the message string in the args and log it. */
1333 strncpy((char *)arg, message, MIN(sizeof(arg), strlen(message)));
1334 KERNEL_DEBUG_EARLY(
1335 TRACE_INFO_STRING,
1336 arg[0], arg[1], arg[2], arg[3]);
1337 }
1338
1339 #define SIMPLE_STR_LEN (64)
1340 static_assert(SIMPLE_STR_LEN % sizeof(uintptr_t) == 0);
1341
1342 void
1343 kernel_debug_string_simple(uint32_t eventid, const char *str)
1344 {
1345 if (!kdebug_enable) {
1346 return;
1347 }
1348
1349 /* array of uintptr_ts simplifies emitting the string as arguments */
1350 uintptr_t str_buf[(SIMPLE_STR_LEN / sizeof(uintptr_t)) + 1] = { 0 };
1351 size_t len = strlcpy((char *)str_buf, str, SIMPLE_STR_LEN + 1);
1352
1353 uintptr_t thread_id = (uintptr_t)thread_tid(current_thread());
1354 uint32_t debugid = eventid | DBG_FUNC_START;
1355
1356 /* string can fit in a single tracepoint */
1357 if (len <= (4 * sizeof(uintptr_t))) {
1358 debugid |= DBG_FUNC_END;
1359 }
1360
1361 kernel_debug_internal(FALSE, debugid, str_buf[0],
1362 str_buf[1],
1363 str_buf[2],
1364 str_buf[3], thread_id);
1365
1366 debugid &= KDBG_EVENTID_MASK;
1367 int i = 4;
1368 size_t written = 4 * sizeof(uintptr_t);
1369
1370 for (; written < len; i += 4, written += 4 * sizeof(uintptr_t)) {
1371 /* if this is the last tracepoint to be emitted */
1372 if ((written + (4 * sizeof(uintptr_t))) >= len) {
1373 debugid |= DBG_FUNC_END;
1374 }
1375 kernel_debug_internal(FALSE, debugid, str_buf[i],
1376 str_buf[i + 1],
1377 str_buf[i + 2],
1378 str_buf[i + 3], thread_id);
1379 }
1380 }
1381
1382 extern int master_cpu; /* MACH_KERNEL_PRIVATE */
1383 /*
1384 * Used prior to start_kern_tracing() being called.
1385 * Log temporarily into a static buffer.
1386 */
1387 void
1388 kernel_debug_early(
1389 uint32_t debugid,
1390 uintptr_t arg1,
1391 uintptr_t arg2,
1392 uintptr_t arg3,
1393 uintptr_t arg4)
1394 {
1395 /* If early tracing is over, use the normal path. */
1396 if (kd_early_done) {
1397 KERNEL_DEBUG_CONSTANT(debugid, arg1, arg2, arg3, arg4, 0);
1398 return;
1399 }
1400
1401 /* Do nothing if the buffer is full or we're not on the boot cpu. */
1402 kd_early_overflow = kd_early_index >= KD_EARLY_BUFFER_NBUFS;
1403 if (kd_early_overflow || cpu_number() != master_cpu) {
1404 return;
1405 }
1406
1407 kd_early_buffer[kd_early_index].debugid = debugid;
1408 kd_early_buffer[kd_early_index].timestamp = mach_absolute_time();
1409 kd_early_buffer[kd_early_index].arg1 = arg1;
1410 kd_early_buffer[kd_early_index].arg2 = arg2;
1411 kd_early_buffer[kd_early_index].arg3 = arg3;
1412 kd_early_buffer[kd_early_index].arg4 = arg4;
1413 kd_early_buffer[kd_early_index].arg5 = 0;
1414 kd_early_index++;
1415 }
1416
1417 /*
1418 * Transfer the contents of the temporary buffer into the trace buffers.
1419 * Precede that by logging the rebase time (offset) - the TSC-based time (in ns)
1420 * when mach_absolute_time is set to 0.
1421 */
1422 static void
1423 kernel_debug_early_end(void)
1424 {
1425 if (cpu_number() != master_cpu) {
1426 panic("kernel_debug_early_end() not call on boot processor");
1427 }
1428
1429 /* reset the current oldest time to allow early events */
1430 kd_ctrl_page.oldest_time = 0;
1431
1432 #if !CONFIG_EMBEDDED
1433 /* Fake sentinel marking the start of kernel time relative to TSC */
1434 kernel_debug_enter(0,
1435 TRACE_TIMESTAMPS,
1436 0,
1437 (uint32_t)(tsc_rebase_abs_time >> 32),
1438 (uint32_t)tsc_rebase_abs_time,
1439 tsc_at_boot,
1440 0,
1441 0);
1442 #endif
1443 for (unsigned int i = 0; i < kd_early_index; i++) {
1444 kernel_debug_enter(0,
1445 kd_early_buffer[i].debugid,
1446 kd_early_buffer[i].timestamp,
1447 kd_early_buffer[i].arg1,
1448 kd_early_buffer[i].arg2,
1449 kd_early_buffer[i].arg3,
1450 kd_early_buffer[i].arg4,
1451 0);
1452 }
1453
1454 /* Cut events-lost event on overflow */
1455 if (kd_early_overflow) {
1456 KDBG_RELEASE(TRACE_LOST_EVENTS, 1);
1457 }
1458
1459 kd_early_done = true;
1460
1461 /* This trace marks the start of kernel tracing */
1462 kernel_debug_string_early("early trace done");
1463 }
1464
1465 void
1466 kernel_debug_disable(void)
1467 {
1468 if (kdebug_enable) {
1469 kdbg_set_tracing_enabled(FALSE, 0);
1470 }
1471 }
1472
1473 /*
1474 * Returns non-zero if debugid is in a reserved class.
1475 */
1476 static int
1477 kdebug_validate_debugid(uint32_t debugid)
1478 {
1479 uint8_t debugid_class;
1480
1481 debugid_class = KDBG_EXTRACT_CLASS(debugid);
1482 switch (debugid_class) {
1483 case DBG_TRACE:
1484 return EPERM;
1485 }
1486
1487 return 0;
1488 }
1489
1490 /*
1491 * Support syscall SYS_kdebug_typefilter.
1492 */
1493 int
1494 kdebug_typefilter(__unused struct proc* p,
1495 struct kdebug_typefilter_args* uap,
1496 __unused int *retval)
1497 {
1498 int ret = KERN_SUCCESS;
1499
1500 if (uap->addr == USER_ADDR_NULL ||
1501 uap->size == USER_ADDR_NULL) {
1502 return EINVAL;
1503 }
1504
1505 /*
1506 * The atomic load is to close a race window with setting the typefilter
1507 * and memory entry values. A description follows:
1508 *
1509 * Thread 1 (writer)
1510 *
1511 * Allocate Typefilter
1512 * Allocate MemoryEntry
1513 * Write Global MemoryEntry Ptr
1514 * Atomic Store (Release) Global Typefilter Ptr
1515 *
1516 * Thread 2 (reader, AKA us)
1517 *
1518 * if ((Atomic Load (Acquire) Global Typefilter Ptr) == NULL)
1519 * return;
1520 *
1521 * Without the atomic store, it isn't guaranteed that the write of
1522 * Global MemoryEntry Ptr is visible before we can see the write of
1523 * Global Typefilter Ptr.
1524 *
1525 * Without the atomic load, it isn't guaranteed that the loads of
1526 * Global MemoryEntry Ptr aren't speculated.
1527 *
1528 * The global pointers transition from NULL -> valid once and only once,
1529 * and never change after becoming valid. This means that having passed
1530 * the first atomic load test of Global Typefilter Ptr, this function
1531 * can then safely use the remaining global state without atomic checks.
1532 */
1533 if (!__c11_atomic_load((_Atomic typefilter_t *)&kdbg_typefilter, memory_order_acquire)) {
1534 return EINVAL;
1535 }
1536
1537 assert(kdbg_typefilter_memory_entry);
1538
1539 mach_vm_offset_t user_addr = 0;
1540 vm_map_t user_map = current_map();
1541
1542 ret = mach_to_bsd_errno(
1543 mach_vm_map_kernel(user_map, // target map
1544 &user_addr, // [in, out] target address
1545 TYPEFILTER_ALLOC_SIZE, // initial size
1546 0, // mask (alignment?)
1547 VM_FLAGS_ANYWHERE, // flags
1548 VM_KERN_MEMORY_NONE,
1549 kdbg_typefilter_memory_entry, // port (memory entry!)
1550 0, // offset (in memory entry)
1551 FALSE, // should copy
1552 VM_PROT_READ, // cur_prot
1553 VM_PROT_READ, // max_prot
1554 VM_INHERIT_SHARE)); // inherit behavior on fork
1555
1556 if (ret == KERN_SUCCESS) {
1557 vm_size_t user_ptr_size = vm_map_is_64bit(user_map) ? 8 : 4;
1558 ret = copyout(CAST_DOWN(void *, &user_addr), uap->addr, user_ptr_size );
1559
1560 if (ret != KERN_SUCCESS) {
1561 mach_vm_deallocate(user_map, user_addr, TYPEFILTER_ALLOC_SIZE);
1562 }
1563 }
1564
1565 return ret;
1566 }
1567
1568 /*
1569 * Support syscall SYS_kdebug_trace. U64->K32 args may get truncated in kdebug_trace64
1570 */
1571 int
1572 kdebug_trace(struct proc *p, struct kdebug_trace_args *uap, int32_t *retval)
1573 {
1574 struct kdebug_trace64_args uap64;
1575
1576 uap64.code = uap->code;
1577 uap64.arg1 = uap->arg1;
1578 uap64.arg2 = uap->arg2;
1579 uap64.arg3 = uap->arg3;
1580 uap64.arg4 = uap->arg4;
1581
1582 return kdebug_trace64(p, &uap64, retval);
1583 }
1584
1585 /*
1586 * Support syscall SYS_kdebug_trace64. 64-bit args on K32 will get truncated
1587 * to fit in 32-bit record format.
1588 *
1589 * It is intentional that error conditions are not checked until kdebug is
1590 * enabled. This is to match the userspace wrapper behavior, which is optimizing
1591 * for non-error case performance.
1592 */
1593 int kdebug_trace64(__unused struct proc *p, struct kdebug_trace64_args *uap, __unused int32_t *retval)
1594 {
1595 int err;
1596
1597 if ( __probable(kdebug_enable == 0) )
1598 return(0);
1599
1600 if ((err = kdebug_validate_debugid(uap->code)) != 0) {
1601 return err;
1602 }
1603
1604 kernel_debug_internal(FALSE, uap->code,
1605 (uintptr_t)uap->arg1,
1606 (uintptr_t)uap->arg2,
1607 (uintptr_t)uap->arg3,
1608 (uintptr_t)uap->arg4,
1609 (uintptr_t)thread_tid(current_thread()));
1610
1611 return(0);
1612 }
1613
1614 /*
1615 * Adding enough padding to contain a full tracepoint for the last
1616 * portion of the string greatly simplifies the logic of splitting the
1617 * string between tracepoints. Full tracepoints can be generated using
1618 * the buffer itself, without having to manually add zeros to pad the
1619 * arguments.
1620 */
1621
1622 /* 2 string args in first tracepoint and 9 string data tracepoints */
1623 #define STR_BUF_ARGS (2 + (9 * 4))
1624 /* times the size of each arg on K64 */
1625 #define MAX_STR_LEN (STR_BUF_ARGS * sizeof(uint64_t))
1626 /* on K32, ending straddles a tracepoint, so reserve blanks */
1627 #define STR_BUF_SIZE (MAX_STR_LEN + (2 * sizeof(uint32_t)))
1628
1629 /*
1630 * This function does no error checking and assumes that it is called with
1631 * the correct arguments, including that the buffer pointed to by str is at
1632 * least STR_BUF_SIZE bytes. However, str must be aligned to word-size and
1633 * be NUL-terminated. In cases where a string can fit evenly into a final
1634 * tracepoint without its NUL-terminator, this function will not end those
1635 * strings with a NUL in trace. It's up to clients to look at the function
1636 * qualifier for DBG_FUNC_END in this case, to end the string.
1637 */
1638 static uint64_t
1639 kernel_debug_string_internal(uint32_t debugid, uint64_t str_id, void *vstr,
1640 size_t str_len)
1641 {
1642 /* str must be word-aligned */
1643 uintptr_t *str = vstr;
1644 size_t written = 0;
1645 uintptr_t thread_id;
1646 int i;
1647 uint32_t trace_debugid = TRACEDBG_CODE(DBG_TRACE_STRING,
1648 TRACE_STRING_GLOBAL);
1649
1650 thread_id = (uintptr_t)thread_tid(current_thread());
1651
1652 /* if the ID is being invalidated, just emit that */
1653 if (str_id != 0 && str_len == 0) {
1654 kernel_debug_internal(FALSE, trace_debugid | DBG_FUNC_START | DBG_FUNC_END,
1655 (uintptr_t)debugid, (uintptr_t)str_id, 0, 0,
1656 thread_id);
1657 return str_id;
1658 }
1659
1660 /* generate an ID, if necessary */
1661 if (str_id == 0) {
1662 str_id = OSIncrementAtomic64((SInt64 *)&g_curr_str_id);
1663 str_id = (str_id & STR_ID_MASK) | g_str_id_signature;
1664 }
1665
1666 trace_debugid |= DBG_FUNC_START;
1667 /* string can fit in a single tracepoint */
1668 if (str_len <= (2 * sizeof(uintptr_t))) {
1669 trace_debugid |= DBG_FUNC_END;
1670 }
1671
1672 kernel_debug_internal(FALSE, trace_debugid, (uintptr_t)debugid,
1673 (uintptr_t)str_id, str[0],
1674 str[1], thread_id);
1675
1676 trace_debugid &= KDBG_EVENTID_MASK;
1677 i = 2;
1678 written += 2 * sizeof(uintptr_t);
1679
1680 for (; written < str_len; i += 4, written += 4 * sizeof(uintptr_t)) {
1681 if ((written + (4 * sizeof(uintptr_t))) >= str_len) {
1682 trace_debugid |= DBG_FUNC_END;
1683 }
1684 kernel_debug_internal(FALSE, trace_debugid, str[i],
1685 str[i + 1],
1686 str[i + 2],
1687 str[i + 3], thread_id);
1688 }
1689
1690 return str_id;
1691 }
1692
1693 /*
1694 * Returns true if the current process can emit events, and false otherwise.
1695 * Trace system and scheduling events circumvent this check, as do events
1696 * emitted in interrupt context.
1697 */
1698 static boolean_t
1699 kdebug_current_proc_enabled(uint32_t debugid)
1700 {
1701 /* can't determine current process in interrupt context */
1702 if (ml_at_interrupt_context()) {
1703 return TRUE;
1704 }
1705
1706 /* always emit trace system and scheduling events */
1707 if ((KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE ||
1708 (debugid & KDBG_CSC_MASK) == MACHDBG_CODE(DBG_MACH_SCHED, 0)))
1709 {
1710 return TRUE;
1711 }
1712
1713 if (kd_ctrl_page.kdebug_flags & KDBG_PIDCHECK) {
1714 proc_t cur_proc = current_proc();
1715
1716 /* only the process with the kdebug bit set is allowed */
1717 if (cur_proc && !(cur_proc->p_kdebug)) {
1718 return FALSE;
1719 }
1720 } else if (kd_ctrl_page.kdebug_flags & KDBG_PIDEXCLUDE) {
1721 proc_t cur_proc = current_proc();
1722
1723 /* every process except the one with the kdebug bit set is allowed */
1724 if (cur_proc && cur_proc->p_kdebug) {
1725 return FALSE;
1726 }
1727 }
1728
1729 return TRUE;
1730 }
1731
1732 boolean_t
1733 kdebug_debugid_enabled(uint32_t debugid)
1734 {
1735 /* if no filtering is enabled */
1736 if (!kd_ctrl_page.kdebug_slowcheck) {
1737 return TRUE;
1738 }
1739
1740 return kdebug_debugid_explicitly_enabled(debugid);
1741 }
1742
1743 boolean_t
1744 kdebug_debugid_explicitly_enabled(uint32_t debugid)
1745 {
1746 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
1747 return typefilter_is_debugid_allowed(kdbg_typefilter, debugid);
1748 } else if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE) {
1749 return TRUE;
1750 } else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
1751 if (debugid < kdlog_beg || debugid > kdlog_end) {
1752 return FALSE;
1753 }
1754 } else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
1755 if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 &&
1756 (debugid & KDBG_EVENTID_MASK) != kdlog_value2 &&
1757 (debugid & KDBG_EVENTID_MASK) != kdlog_value3 &&
1758 (debugid & KDBG_EVENTID_MASK) != kdlog_value4)
1759 {
1760 return FALSE;
1761 }
1762 }
1763
1764 return TRUE;
1765 }
1766
1767 /*
1768 * Returns 0 if a string can be traced with these arguments. Returns errno
1769 * value if error occurred.
1770 */
1771 static errno_t
1772 kdebug_check_trace_string(uint32_t debugid, uint64_t str_id)
1773 {
1774 /* if there are function qualifiers on the debugid */
1775 if (debugid & ~KDBG_EVENTID_MASK) {
1776 return EINVAL;
1777 }
1778
1779 if (kdebug_validate_debugid(debugid)) {
1780 return EPERM;
1781 }
1782
1783 if (str_id != 0 && (str_id & STR_ID_SIG_MASK) != g_str_id_signature) {
1784 return EINVAL;
1785 }
1786
1787 return 0;
1788 }
1789
1790 /*
1791 * Implementation of KPI kernel_debug_string.
1792 */
1793 int
1794 kernel_debug_string(uint32_t debugid, uint64_t *str_id, const char *str)
1795 {
1796 /* arguments to tracepoints must be word-aligned */
1797 __attribute__((aligned(sizeof(uintptr_t)))) char str_buf[STR_BUF_SIZE];
1798 static_assert(sizeof(str_buf) > MAX_STR_LEN);
1799 vm_size_t len_copied;
1800 int err;
1801
1802 assert(str_id);
1803
1804 if (__probable(kdebug_enable == 0)) {
1805 return 0;
1806 }
1807
1808 if (!kdebug_current_proc_enabled(debugid)) {
1809 return 0;
1810 }
1811
1812 if (!kdebug_debugid_enabled(debugid)) {
1813 return 0;
1814 }
1815
1816 if ((err = kdebug_check_trace_string(debugid, *str_id)) != 0) {
1817 return err;
1818 }
1819
1820 if (str == NULL) {
1821 if (str_id == 0) {
1822 return EINVAL;
1823 }
1824
1825 *str_id = kernel_debug_string_internal(debugid, *str_id, NULL, 0);
1826 return 0;
1827 }
1828
1829 memset(str_buf, 0, sizeof(str_buf));
1830 len_copied = strlcpy(str_buf, str, MAX_STR_LEN + 1);
1831 *str_id = kernel_debug_string_internal(debugid, *str_id, str_buf,
1832 len_copied);
1833 return 0;
1834 }
1835
1836 /*
1837 * Support syscall kdebug_trace_string.
1838 */
1839 int
1840 kdebug_trace_string(__unused struct proc *p,
1841 struct kdebug_trace_string_args *uap,
1842 uint64_t *retval)
1843 {
1844 __attribute__((aligned(sizeof(uintptr_t)))) char str_buf[STR_BUF_SIZE];
1845 static_assert(sizeof(str_buf) > MAX_STR_LEN);
1846 size_t len_copied;
1847 int err;
1848
1849 if (__probable(kdebug_enable == 0)) {
1850 return 0;
1851 }
1852
1853 if (!kdebug_current_proc_enabled(uap->debugid)) {
1854 return 0;
1855 }
1856
1857 if (!kdebug_debugid_enabled(uap->debugid)) {
1858 return 0;
1859 }
1860
1861 if ((err = kdebug_check_trace_string(uap->debugid, uap->str_id)) != 0) {
1862 return err;
1863 }
1864
1865 if (uap->str == USER_ADDR_NULL) {
1866 if (uap->str_id == 0) {
1867 return EINVAL;
1868 }
1869
1870 *retval = kernel_debug_string_internal(uap->debugid, uap->str_id,
1871 NULL, 0);
1872 return 0;
1873 }
1874
1875 memset(str_buf, 0, sizeof(str_buf));
1876 err = copyinstr(uap->str, str_buf, MAX_STR_LEN + 1, &len_copied);
1877
1878 /* it's alright to truncate the string, so allow ENAMETOOLONG */
1879 if (err == ENAMETOOLONG) {
1880 str_buf[MAX_STR_LEN] = '\0';
1881 } else if (err) {
1882 return err;
1883 }
1884
1885 if (len_copied <= 1) {
1886 return EINVAL;
1887 }
1888
1889 /* convert back to a length */
1890 len_copied--;
1891
1892 *retval = kernel_debug_string_internal(uap->debugid, uap->str_id, str_buf,
1893 len_copied);
1894 return 0;
1895 }
1896
1897 static void
1898 kdbg_lock_init(void)
1899 {
1900 static lck_grp_attr_t *kdebug_lck_grp_attr = NULL;
1901 static lck_grp_t *kdebug_lck_grp = NULL;
1902 static lck_attr_t *kdebug_lck_attr = NULL;
1903
1904 if (kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT) {
1905 return;
1906 }
1907
1908 assert(kdebug_lck_grp_attr == NULL);
1909 kdebug_lck_grp_attr = lck_grp_attr_alloc_init();
1910 kdebug_lck_grp = lck_grp_alloc_init("kdebug", kdebug_lck_grp_attr);
1911 kdebug_lck_attr = lck_attr_alloc_init();
1912
1913 kds_spin_lock = lck_spin_alloc_init(kdebug_lck_grp, kdebug_lck_attr);
1914 kdw_spin_lock = lck_spin_alloc_init(kdebug_lck_grp, kdebug_lck_attr);
1915
1916 kd_ctrl_page.kdebug_flags |= KDBG_LOCKINIT;
1917 }
1918
1919 int
1920 kdbg_bootstrap(boolean_t early_trace)
1921 {
1922 kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
1923
1924 return (create_buffers(early_trace));
1925 }
1926
1927 int
1928 kdbg_reinit(boolean_t early_trace)
1929 {
1930 int ret = 0;
1931
1932 /*
1933 * Disable trace collecting
1934 * First make sure we're not in
1935 * the middle of cutting a trace
1936 */
1937 kernel_debug_disable();
1938
1939 /*
1940 * make sure the SLOW_NOLOG is seen
1941 * by everyone that might be trying
1942 * to cut a trace..
1943 */
1944 IOSleep(100);
1945
1946 delete_buffers();
1947
1948 kdbg_clear_thread_map();
1949 ret = kdbg_bootstrap(early_trace);
1950
1951 RAW_file_offset = 0;
1952 RAW_file_written = 0;
1953
1954 return(ret);
1955 }
1956
1957 void
1958 kdbg_trace_data(struct proc *proc, long *arg_pid, long *arg_uniqueid)
1959 {
1960 if (!proc) {
1961 *arg_pid = 0;
1962 *arg_uniqueid = 0;
1963 } else {
1964 *arg_pid = proc->p_pid;
1965 *arg_uniqueid = proc->p_uniqueid;
1966 if ((uint64_t) *arg_uniqueid != proc->p_uniqueid) {
1967 *arg_uniqueid = 0;
1968 }
1969 }
1970 }
1971
1972
1973 void
1974 kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3, long *arg4)
1975 {
1976 char *dbg_nameptr;
1977 int dbg_namelen;
1978 long dbg_parms[4];
1979
1980 if (!proc) {
1981 *arg1 = 0;
1982 *arg2 = 0;
1983 *arg3 = 0;
1984 *arg4 = 0;
1985 return;
1986 }
1987 /*
1988 * Collect the pathname for tracing
1989 */
1990 dbg_nameptr = proc->p_comm;
1991 dbg_namelen = (int)strlen(proc->p_comm);
1992 dbg_parms[0]=0L;
1993 dbg_parms[1]=0L;
1994 dbg_parms[2]=0L;
1995 dbg_parms[3]=0L;
1996
1997 if(dbg_namelen > (int)sizeof(dbg_parms))
1998 dbg_namelen = (int)sizeof(dbg_parms);
1999
2000 strncpy((char *)dbg_parms, dbg_nameptr, dbg_namelen);
2001
2002 *arg1=dbg_parms[0];
2003 *arg2=dbg_parms[1];
2004 *arg3=dbg_parms[2];
2005 *arg4=dbg_parms[3];
2006 }
2007
2008 static void
2009 kdbg_resolve_map(thread_t th_act, void *opaque)
2010 {
2011 kd_threadmap *mapptr;
2012 krt_t *t = (krt_t *)opaque;
2013
2014 if (t->count < t->maxcount) {
2015 mapptr = &t->map[t->count];
2016 mapptr->thread = (uintptr_t)thread_tid(th_act);
2017
2018 (void) strlcpy (mapptr->command, t->atts->task_comm,
2019 sizeof(t->atts->task_comm));
2020 /*
2021 * Some kernel threads have no associated pid.
2022 * We still need to mark the entry as valid.
2023 */
2024 if (t->atts->pid)
2025 mapptr->valid = t->atts->pid;
2026 else
2027 mapptr->valid = 1;
2028
2029 t->count++;
2030 }
2031 }
2032
2033 /*
2034 *
2035 * Writes a cpumap for the given iops_list/cpu_count to the provided buffer.
2036 *
2037 * You may provide a buffer and size, or if you set the buffer to NULL, a
2038 * buffer of sufficient size will be allocated.
2039 *
2040 * If you provide a buffer and it is too small, sets cpumap_size to the number
2041 * of bytes required and returns EINVAL.
2042 *
2043 * On success, if you provided a buffer, cpumap_size is set to the number of
2044 * bytes written. If you did not provide a buffer, cpumap is set to the newly
2045 * allocated buffer and cpumap_size is set to the number of bytes allocated.
2046 *
2047 * NOTE: It may seem redundant to pass both iops and a cpu_count.
2048 *
2049 * We may be reporting data from "now", or from the "past".
2050 *
2051 * The "past" data would be for kdbg_readcpumap().
2052 *
2053 * If we do not pass both iops and cpu_count, and iops is NULL, this function
2054 * will need to read "now" state to get the number of cpus, which would be in
2055 * error if we were reporting "past" state.
2056 */
2057
2058 int
2059 kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count, uint8_t** cpumap, uint32_t* cpumap_size)
2060 {
2061 assert(cpumap);
2062 assert(cpumap_size);
2063 assert(cpu_count);
2064 assert(!iops || iops->cpu_id + 1 == cpu_count);
2065
2066 uint32_t bytes_needed = sizeof(kd_cpumap_header) + cpu_count * sizeof(kd_cpumap);
2067 uint32_t bytes_available = *cpumap_size;
2068 *cpumap_size = bytes_needed;
2069
2070 if (*cpumap == NULL) {
2071 if (kmem_alloc(kernel_map, (vm_offset_t*)cpumap, (vm_size_t)*cpumap_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
2072 return ENOMEM;
2073 }
2074 bzero(*cpumap, *cpumap_size);
2075 } else if (bytes_available < bytes_needed) {
2076 return EINVAL;
2077 }
2078
2079 kd_cpumap_header* header = (kd_cpumap_header*)(uintptr_t)*cpumap;
2080
2081 header->version_no = RAW_VERSION1;
2082 header->cpu_count = cpu_count;
2083
2084 kd_cpumap* cpus = (kd_cpumap*)&header[1];
2085
2086 int32_t index = cpu_count - 1;
2087 while (iops) {
2088 cpus[index].cpu_id = iops->cpu_id;
2089 cpus[index].flags = KDBG_CPUMAP_IS_IOP;
2090 strlcpy(cpus[index].name, iops->callback.iop_name, sizeof(cpus->name));
2091
2092 iops = iops->next;
2093 index--;
2094 }
2095
2096 while (index >= 0) {
2097 cpus[index].cpu_id = index;
2098 cpus[index].flags = 0;
2099 strlcpy(cpus[index].name, "AP", sizeof(cpus->name));
2100
2101 index--;
2102 }
2103
2104 return KERN_SUCCESS;
2105 }
2106
2107 void
2108 kdbg_thrmap_init(void)
2109 {
2110 ktrace_assert_lock_held();
2111
2112 if (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) {
2113 return;
2114 }
2115
2116 kd_mapptr = kdbg_thrmap_init_internal(0, &kd_mapsize, &kd_mapcount);
2117
2118 if (kd_mapptr) {
2119 kd_ctrl_page.kdebug_flags |= KDBG_MAPINIT;
2120 }
2121 }
2122
2123 static kd_threadmap *
2124 kdbg_thrmap_init_internal(unsigned int count, unsigned int *mapsize, unsigned int *mapcount)
2125 {
2126 kd_threadmap *mapptr;
2127 proc_t p;
2128 struct krt akrt;
2129 int tts_count = 0; /* number of task-to-string structures */
2130 struct tts *tts_mapptr;
2131 unsigned int tts_mapsize = 0;
2132 vm_offset_t kaddr;
2133
2134 assert(mapsize != NULL);
2135 assert(mapcount != NULL);
2136
2137 *mapcount = threads_count;
2138 tts_count = tasks_count;
2139
2140 /*
2141 * The proc count could change during buffer allocation,
2142 * so introduce a small fudge factor to bump up the
2143 * buffer sizes. This gives new tasks some chance of
2144 * making into the tables. Bump up by 25%.
2145 */
2146 *mapcount += *mapcount / 4;
2147 tts_count += tts_count / 4;
2148
2149 *mapsize = *mapcount * sizeof(kd_threadmap);
2150
2151 if (count && count < *mapcount) {
2152 return 0;
2153 }
2154
2155 if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)*mapsize, VM_KERN_MEMORY_DIAG) == KERN_SUCCESS)) {
2156 bzero((void *)kaddr, *mapsize);
2157 mapptr = (kd_threadmap *)kaddr;
2158 } else {
2159 return 0;
2160 }
2161
2162 tts_mapsize = tts_count * sizeof(struct tts);
2163
2164 if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)tts_mapsize, VM_KERN_MEMORY_DIAG) == KERN_SUCCESS)) {
2165 bzero((void *)kaddr, tts_mapsize);
2166 tts_mapptr = (struct tts *)kaddr;
2167 } else {
2168 kmem_free(kernel_map, (vm_offset_t)mapptr, *mapsize);
2169
2170 return 0;
2171 }
2172
2173 /*
2174 * Save the proc's name and take a reference for each task associated
2175 * with a valid process.
2176 */
2177 proc_list_lock();
2178
2179 int i = 0;
2180 ALLPROC_FOREACH(p) {
2181 if (i >= tts_count) {
2182 break;
2183 }
2184 if (p->p_lflag & P_LEXIT) {
2185 continue;
2186 }
2187 if (p->task) {
2188 task_reference(p->task);
2189 tts_mapptr[i].task = p->task;
2190 tts_mapptr[i].pid = p->p_pid;
2191 (void)strlcpy(tts_mapptr[i].task_comm, proc_best_name(p), sizeof(tts_mapptr[i].task_comm));
2192 i++;
2193 }
2194 }
2195 tts_count = i;
2196
2197 proc_list_unlock();
2198
2199 /*
2200 * Initialize thread map data
2201 */
2202 akrt.map = mapptr;
2203 akrt.count = 0;
2204 akrt.maxcount = *mapcount;
2205
2206 for (i = 0; i < tts_count; i++) {
2207 akrt.atts = &tts_mapptr[i];
2208 task_act_iterate_wth_args(tts_mapptr[i].task, kdbg_resolve_map, &akrt);
2209 task_deallocate((task_t)tts_mapptr[i].task);
2210 }
2211 kmem_free(kernel_map, (vm_offset_t)tts_mapptr, tts_mapsize);
2212
2213 *mapcount = akrt.count;
2214
2215 return mapptr;
2216 }
2217
2218 static void
2219 kdbg_clear(void)
2220 {
2221 /*
2222 * Clean up the trace buffer
2223 * First make sure we're not in
2224 * the middle of cutting a trace
2225 */
2226 kernel_debug_disable();
2227 kdbg_disable_typefilter();
2228
2229 /*
2230 * make sure the SLOW_NOLOG is seen
2231 * by everyone that might be trying
2232 * to cut a trace..
2233 */
2234 IOSleep(100);
2235
2236 /* reset kdebug state for each process */
2237 if (kd_ctrl_page.kdebug_flags & (KDBG_PIDCHECK | KDBG_PIDEXCLUDE)) {
2238 proc_list_lock();
2239 proc_t p;
2240 ALLPROC_FOREACH(p) {
2241 p->p_kdebug = 0;
2242 }
2243 proc_list_unlock();
2244 }
2245
2246 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2247 kd_ctrl_page.kdebug_flags &= ~(KDBG_NOWRAP | KDBG_RANGECHECK | KDBG_VALCHECK);
2248 kd_ctrl_page.kdebug_flags &= ~(KDBG_PIDCHECK | KDBG_PIDEXCLUDE);
2249
2250 kd_ctrl_page.oldest_time = 0;
2251
2252 delete_buffers();
2253 nkdbufs = 0;
2254
2255 /* Clean up the thread map buffer */
2256 kdbg_clear_thread_map();
2257
2258 RAW_file_offset = 0;
2259 RAW_file_written = 0;
2260 }
2261
2262 void
2263 kdebug_reset(void)
2264 {
2265 ktrace_assert_lock_held();
2266
2267 kdbg_lock_init();
2268
2269 kdbg_clear();
2270 if (kdbg_typefilter) {
2271 typefilter_reject_all(kdbg_typefilter);
2272 typefilter_allow_class(kdbg_typefilter, DBG_TRACE);
2273 }
2274 }
2275
2276 void
2277 kdebug_free_early_buf(void)
2278 {
2279 /* Must be done with the buffer, so release it back to the VM. */
2280 ml_static_mfree((vm_offset_t)&kd_early_buffer, sizeof(kd_early_buffer));
2281 }
2282
2283 int
2284 kdbg_setpid(kd_regtype *kdr)
2285 {
2286 pid_t pid;
2287 int flag, ret=0;
2288 struct proc *p;
2289
2290 pid = (pid_t)kdr->value1;
2291 flag = (int)kdr->value2;
2292
2293 if (pid >= 0) {
2294 if ((p = proc_find(pid)) == NULL)
2295 ret = ESRCH;
2296 else {
2297 if (flag == 1) {
2298 /*
2299 * turn on pid check for this and all pids
2300 */
2301 kd_ctrl_page.kdebug_flags |= KDBG_PIDCHECK;
2302 kd_ctrl_page.kdebug_flags &= ~KDBG_PIDEXCLUDE;
2303 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2304
2305 p->p_kdebug = 1;
2306 } else {
2307 /*
2308 * turn off pid check for this pid value
2309 * Don't turn off all pid checking though
2310 *
2311 * kd_ctrl_page.kdebug_flags &= ~KDBG_PIDCHECK;
2312 */
2313 p->p_kdebug = 0;
2314 }
2315 proc_rele(p);
2316 }
2317 }
2318 else
2319 ret = EINVAL;
2320
2321 return(ret);
2322 }
2323
2324 /* This is for pid exclusion in the trace buffer */
2325 int
2326 kdbg_setpidex(kd_regtype *kdr)
2327 {
2328 pid_t pid;
2329 int flag, ret=0;
2330 struct proc *p;
2331
2332 pid = (pid_t)kdr->value1;
2333 flag = (int)kdr->value2;
2334
2335 if (pid >= 0) {
2336 if ((p = proc_find(pid)) == NULL)
2337 ret = ESRCH;
2338 else {
2339 if (flag == 1) {
2340 /*
2341 * turn on pid exclusion
2342 */
2343 kd_ctrl_page.kdebug_flags |= KDBG_PIDEXCLUDE;
2344 kd_ctrl_page.kdebug_flags &= ~KDBG_PIDCHECK;
2345 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2346
2347 p->p_kdebug = 1;
2348 }
2349 else {
2350 /*
2351 * turn off pid exclusion for this pid value
2352 * Don't turn off all pid exclusion though
2353 *
2354 * kd_ctrl_page.kdebug_flags &= ~KDBG_PIDEXCLUDE;
2355 */
2356 p->p_kdebug = 0;
2357 }
2358 proc_rele(p);
2359 }
2360 } else
2361 ret = EINVAL;
2362
2363 return(ret);
2364 }
2365
2366 /*
2367 * The following functions all operate on the "global" typefilter singleton.
2368 */
2369
2370 /*
2371 * The tf param is optional, you may pass either a valid typefilter or NULL.
2372 * If you pass a valid typefilter, you release ownership of that typefilter.
2373 */
2374 static int
2375 kdbg_initialize_typefilter(typefilter_t tf)
2376 {
2377 ktrace_assert_lock_held();
2378 assert(!kdbg_typefilter);
2379 assert(!kdbg_typefilter_memory_entry);
2380 typefilter_t deallocate_tf = NULL;
2381
2382 if (!tf && ((tf = deallocate_tf = typefilter_create()) == NULL)) {
2383 return ENOMEM;
2384 }
2385
2386 if ((kdbg_typefilter_memory_entry = typefilter_create_memory_entry(tf)) == MACH_PORT_NULL) {
2387 if (deallocate_tf) {
2388 typefilter_deallocate(deallocate_tf);
2389 }
2390 return ENOMEM;
2391 }
2392
2393 /*
2394 * The atomic store closes a race window with
2395 * the kdebug_typefilter syscall, which assumes
2396 * that any non-null kdbg_typefilter means a
2397 * valid memory_entry is available.
2398 */
2399 __c11_atomic_store(((_Atomic typefilter_t*)&kdbg_typefilter), tf, memory_order_release);
2400
2401 return KERN_SUCCESS;
2402 }
2403
2404 static int
2405 kdbg_copyin_typefilter(user_addr_t addr, size_t size)
2406 {
2407 int ret = ENOMEM;
2408 typefilter_t tf;
2409
2410 ktrace_assert_lock_held();
2411
2412 if (size != KDBG_TYPEFILTER_BITMAP_SIZE) {
2413 return EINVAL;
2414 }
2415
2416 if ((tf = typefilter_create())) {
2417 if ((ret = copyin(addr, tf, KDBG_TYPEFILTER_BITMAP_SIZE)) == 0) {
2418 /* The kernel typefilter must always allow DBG_TRACE */
2419 typefilter_allow_class(tf, DBG_TRACE);
2420
2421 /*
2422 * If this is the first typefilter; claim it.
2423 * Otherwise copy and deallocate.
2424 *
2425 * Allocating a typefilter for the copyin allows
2426 * the kernel to hold the invariant that DBG_TRACE
2427 * must always be allowed.
2428 */
2429 if (!kdbg_typefilter) {
2430 if ((ret = kdbg_initialize_typefilter(tf))) {
2431 return ret;
2432 }
2433 tf = NULL;
2434 } else {
2435 typefilter_copy(kdbg_typefilter, tf);
2436 }
2437
2438 kdbg_enable_typefilter();
2439 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_TYPEFILTER_CHANGED, kdbg_typefilter);
2440 }
2441
2442 if (tf)
2443 typefilter_deallocate(tf);
2444 }
2445
2446 return ret;
2447 }
2448
2449 /*
2450 * Enable the flags in the control page for the typefilter. Assumes that
2451 * kdbg_typefilter has already been allocated, so events being written
2452 * don't see a bad typefilter.
2453 */
2454 static void
2455 kdbg_enable_typefilter(void)
2456 {
2457 assert(kdbg_typefilter);
2458 kd_ctrl_page.kdebug_flags &= ~(KDBG_RANGECHECK | KDBG_VALCHECK);
2459 kd_ctrl_page.kdebug_flags |= KDBG_TYPEFILTER_CHECK;
2460 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2461 commpage_update_kdebug_state();
2462 }
2463
2464 /*
2465 * Disable the flags in the control page for the typefilter. The typefilter
2466 * may be safely deallocated shortly after this function returns.
2467 */
2468 static void
2469 kdbg_disable_typefilter(void)
2470 {
2471 kd_ctrl_page.kdebug_flags &= ~KDBG_TYPEFILTER_CHECK;
2472
2473 if ((kd_ctrl_page.kdebug_flags & (KDBG_PIDCHECK | KDBG_PIDEXCLUDE))) {
2474 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2475 } else {
2476 kdbg_set_flags(SLOW_CHECKS, 0, FALSE);
2477 }
2478 commpage_update_kdebug_state();
2479 }
2480
2481 uint32_t
2482 kdebug_commpage_state(void)
2483 {
2484 if (kdebug_enable) {
2485 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
2486 return KDEBUG_COMMPAGE_ENABLE_TYPEFILTER | KDEBUG_COMMPAGE_ENABLE_TRACE;
2487 }
2488
2489 return KDEBUG_COMMPAGE_ENABLE_TRACE;
2490 }
2491
2492 return 0;
2493 }
2494
2495 int
2496 kdbg_setreg(kd_regtype * kdr)
2497 {
2498 int ret=0;
2499 unsigned int val_1, val_2, val;
2500 switch (kdr->type) {
2501
2502 case KDBG_CLASSTYPE :
2503 val_1 = (kdr->value1 & 0xff);
2504 val_2 = (kdr->value2 & 0xff);
2505 kdlog_beg = (val_1<<24);
2506 kdlog_end = (val_2<<24);
2507 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2508 kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK; /* Turn off specific value check */
2509 kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_CLASSTYPE);
2510 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2511 break;
2512 case KDBG_SUBCLSTYPE :
2513 val_1 = (kdr->value1 & 0xff);
2514 val_2 = (kdr->value2 & 0xff);
2515 val = val_2 + 1;
2516 kdlog_beg = ((val_1<<24) | (val_2 << 16));
2517 kdlog_end = ((val_1<<24) | (val << 16));
2518 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2519 kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK; /* Turn off specific value check */
2520 kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_SUBCLSTYPE);
2521 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2522 break;
2523 case KDBG_RANGETYPE :
2524 kdlog_beg = (kdr->value1);
2525 kdlog_end = (kdr->value2);
2526 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2527 kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK; /* Turn off specific value check */
2528 kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_RANGETYPE);
2529 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2530 break;
2531 case KDBG_VALCHECK:
2532 kdlog_value1 = (kdr->value1);
2533 kdlog_value2 = (kdr->value2);
2534 kdlog_value3 = (kdr->value3);
2535 kdlog_value4 = (kdr->value4);
2536 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2537 kd_ctrl_page.kdebug_flags &= ~KDBG_RANGECHECK; /* Turn off range check */
2538 kd_ctrl_page.kdebug_flags |= KDBG_VALCHECK; /* Turn on specific value check */
2539 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2540 break;
2541 case KDBG_TYPENONE :
2542 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2543
2544 if ( (kd_ctrl_page.kdebug_flags & (KDBG_RANGECHECK | KDBG_VALCHECK |
2545 KDBG_PIDCHECK | KDBG_PIDEXCLUDE |
2546 KDBG_TYPEFILTER_CHECK)) )
2547 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2548 else
2549 kdbg_set_flags(SLOW_CHECKS, 0, FALSE);
2550
2551 kdlog_beg = 0;
2552 kdlog_end = 0;
2553 break;
2554 default :
2555 ret = EINVAL;
2556 break;
2557 }
2558 return(ret);
2559 }
2560
2561 static int
2562 kdbg_write_to_vnode(caddr_t buffer, size_t size, vnode_t vp, vfs_context_t ctx, off_t file_offset)
2563 {
2564 return vn_rdwr(UIO_WRITE, vp, buffer, size, file_offset, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT,
2565 vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
2566 }
2567
2568 int
2569 kdbg_write_v3_chunk_header(user_addr_t buffer, uint32_t tag, uint32_t sub_tag, uint64_t length, vnode_t vp, vfs_context_t ctx)
2570 {
2571 int ret = KERN_SUCCESS;
2572 kd_chunk_header_v3 header = {
2573 .tag = tag,
2574 .sub_tag = sub_tag,
2575 .length = length,
2576 };
2577
2578 // Check that only one of them is valid
2579 assert(!buffer ^ !vp);
2580 assert((vp == NULL) || (ctx != NULL));
2581
2582 // Write the 8-byte future_chunk_timestamp field in the payload
2583 if (buffer || vp) {
2584 if (vp) {
2585 ret = kdbg_write_to_vnode((caddr_t)&header, sizeof(kd_chunk_header_v3), vp, ctx, RAW_file_offset);
2586 if (ret) {
2587 goto write_error;
2588 }
2589 RAW_file_offset += (sizeof(kd_chunk_header_v3));
2590 }
2591 else {
2592 ret = copyout(&header, buffer, sizeof(kd_chunk_header_v3));
2593 if (ret) {
2594 goto write_error;
2595 }
2596 }
2597 }
2598 write_error:
2599 return ret;
2600 }
2601
2602 int
2603 kdbg_write_v3_chunk_header_to_buffer(void * buffer, uint32_t tag, uint32_t sub_tag, uint64_t length)
2604 {
2605 kd_chunk_header_v3 header = {
2606 .tag = tag,
2607 .sub_tag = sub_tag,
2608 .length = length,
2609 };
2610
2611 if (!buffer) {
2612 return 0;
2613 }
2614
2615 memcpy(buffer, &header, sizeof(kd_chunk_header_v3));
2616
2617 return (sizeof(kd_chunk_header_v3));
2618 }
2619
2620 int
2621 kdbg_write_v3_chunk_to_fd(uint32_t tag, uint32_t sub_tag, uint64_t length, void *payload, uint64_t payload_size, int fd)
2622 {
2623 proc_t p;
2624 struct vfs_context context;
2625 struct fileproc *fp;
2626 vnode_t vp;
2627 p = current_proc();
2628
2629 proc_fdlock(p);
2630 if ( (fp_lookup(p, fd, &fp, 1)) ) {
2631 proc_fdunlock(p);
2632 return EFAULT;
2633 }
2634
2635 context.vc_thread = current_thread();
2636 context.vc_ucred = fp->f_fglob->fg_cred;
2637
2638 if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
2639 fp_drop(p, fd, fp, 1);
2640 proc_fdunlock(p);
2641 return EBADF;
2642 }
2643 vp = (struct vnode *) fp->f_fglob->fg_data;
2644 proc_fdunlock(p);
2645
2646 if ( (vnode_getwithref(vp)) == 0 ) {
2647 RAW_file_offset = fp->f_fglob->fg_offset;
2648
2649 kd_chunk_header_v3 chunk_header = {
2650 .tag = tag,
2651 .sub_tag = sub_tag,
2652 .length = length,
2653 };
2654
2655 int ret = kdbg_write_to_vnode((caddr_t) &chunk_header, sizeof(kd_chunk_header_v3), vp, &context, RAW_file_offset);
2656 if (!ret) {
2657 RAW_file_offset += sizeof(kd_chunk_header_v3);
2658 }
2659
2660 ret = kdbg_write_to_vnode((caddr_t) payload, (size_t) payload_size, vp, &context, RAW_file_offset);
2661 if (!ret) {
2662 RAW_file_offset += payload_size;
2663 }
2664
2665 fp->f_fglob->fg_offset = RAW_file_offset;
2666 vnode_put(vp);
2667 }
2668
2669 fp_drop(p, fd, fp, 0);
2670 return KERN_SUCCESS;
2671 }
2672
2673 user_addr_t
2674 kdbg_write_v3_event_chunk_header(user_addr_t buffer, uint32_t tag, uint64_t length, vnode_t vp, vfs_context_t ctx)
2675 {
2676 uint64_t future_chunk_timestamp = 0;
2677 length += sizeof(uint64_t);
2678
2679 if (kdbg_write_v3_chunk_header(buffer, tag, V3_EVENT_DATA_VERSION, length, vp, ctx)) {
2680 return 0;
2681 }
2682 if (buffer) {
2683 buffer += sizeof(kd_chunk_header_v3);
2684 }
2685
2686 // Check that only one of them is valid
2687 assert(!buffer ^ !vp);
2688 assert((vp == NULL) || (ctx != NULL));
2689
2690 // Write the 8-byte future_chunk_timestamp field in the payload
2691 if (buffer || vp) {
2692 if (vp) {
2693 int ret = kdbg_write_to_vnode((caddr_t)&future_chunk_timestamp, sizeof(uint64_t), vp, ctx, RAW_file_offset);
2694 if (!ret) {
2695 RAW_file_offset += (sizeof(uint64_t));
2696 }
2697 }
2698 else {
2699 if (copyout(&future_chunk_timestamp, buffer, sizeof(uint64_t))) {
2700 return 0;
2701 }
2702 }
2703 }
2704
2705 return (buffer + sizeof(uint64_t));
2706 }
2707
2708 int
2709 kdbg_write_v3_header(user_addr_t user_header, size_t *user_header_size, int fd)
2710 {
2711 int ret = KERN_SUCCESS;
2712
2713 uint8_t* cpumap = 0;
2714 uint32_t cpumap_size = 0;
2715 uint32_t thrmap_size = 0;
2716
2717 size_t bytes_needed = 0;
2718
2719 // Check that only one of them is valid
2720 assert(!user_header ^ !fd);
2721 assert(user_header_size);
2722
2723 if ( !(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) ) {
2724 ret = EINVAL;
2725 goto bail;
2726 }
2727
2728 if ( !(user_header || fd) ) {
2729 ret = EINVAL;
2730 goto bail;
2731 }
2732
2733 // Initialize the cpu map
2734 ret = kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, &cpumap, &cpumap_size);
2735 if (ret != KERN_SUCCESS) {
2736 goto bail;
2737 }
2738
2739 // Check if a thread map is initialized
2740 if ( !kd_mapptr ) {
2741 ret = EINVAL;
2742 goto bail;
2743 }
2744 thrmap_size = kd_mapcount * sizeof(kd_threadmap);
2745
2746 mach_timebase_info_data_t timebase = {0, 0};
2747 clock_timebase_info(&timebase);
2748
2749 // Setup the header.
2750 // See v3 header description in sys/kdebug.h for more inforamtion.
2751 kd_header_v3 header = {
2752 .tag = RAW_VERSION3,
2753 .sub_tag = V3_HEADER_VERSION,
2754 .length = (sizeof(kd_header_v3) + cpumap_size - sizeof(kd_cpumap_header)),
2755 .timebase_numer = timebase.numer,
2756 .timebase_denom = timebase.denom,
2757 .timestamp = 0, /* FIXME rdar://problem/22053009 */
2758 .walltime_secs = 0,
2759 .walltime_usecs = 0,
2760 .timezone_minuteswest = 0,
2761 .timezone_dst = 0,
2762 #if defined(__LP64__)
2763 .flags = 1,
2764 #else
2765 .flags = 0,
2766 #endif
2767 };
2768
2769 // If its a buffer, check if we have enough space to copy the header and the maps.
2770 if (user_header) {
2771 bytes_needed = header.length + thrmap_size + (2 * sizeof(kd_chunk_header_v3));
2772 if (*user_header_size < bytes_needed) {
2773 ret = EINVAL;
2774 goto bail;
2775 }
2776 }
2777
2778 // Start writing the header
2779 if (fd) {
2780 void *hdr_ptr = (void *)(((uintptr_t) &header) + sizeof(kd_chunk_header_v3));
2781 size_t payload_size = (sizeof(kd_header_v3) - sizeof(kd_chunk_header_v3));
2782
2783 ret = kdbg_write_v3_chunk_to_fd(RAW_VERSION3, V3_HEADER_VERSION, header.length, hdr_ptr, payload_size, fd);
2784 if (ret) {
2785 goto bail;
2786 }
2787 }
2788 else {
2789 if (copyout(&header, user_header, sizeof(kd_header_v3))) {
2790 ret = EFAULT;
2791 goto bail;
2792 }
2793 // Update the user pointer
2794 user_header += sizeof(kd_header_v3);
2795 }
2796
2797 // Write a cpu map. This is a sub chunk of the header
2798 cpumap = (uint8_t*)((uintptr_t) cpumap + sizeof(kd_cpumap_header));
2799 size_t payload_size = (size_t)(cpumap_size - sizeof(kd_cpumap_header));
2800 if (fd) {
2801 ret = kdbg_write_v3_chunk_to_fd(V3_CPU_MAP, V3_CPUMAP_VERSION, payload_size, (void *)cpumap, payload_size, fd);
2802 if (ret) {
2803 goto bail;
2804 }
2805 }
2806 else {
2807 ret = kdbg_write_v3_chunk_header(user_header, V3_CPU_MAP, V3_CPUMAP_VERSION, payload_size, NULL, NULL);
2808 if (ret) {
2809 goto bail;
2810 }
2811 user_header += sizeof(kd_chunk_header_v3);
2812 if (copyout(cpumap, user_header, payload_size)) {
2813 ret = EFAULT;
2814 goto bail;
2815 }
2816 // Update the user pointer
2817 user_header += payload_size;
2818 }
2819
2820 // Write a thread map
2821 if (fd) {
2822 ret = kdbg_write_v3_chunk_to_fd(V3_THREAD_MAP, V3_THRMAP_VERSION, thrmap_size, (void *)kd_mapptr, thrmap_size, fd);
2823 if (ret) {
2824 goto bail;
2825 }
2826 }
2827 else {
2828 ret = kdbg_write_v3_chunk_header(user_header, V3_THREAD_MAP, V3_THRMAP_VERSION, thrmap_size, NULL, NULL);
2829 if (ret) {
2830 goto bail;
2831 }
2832 user_header += sizeof(kd_chunk_header_v3);
2833 if (copyout(kd_mapptr, user_header, thrmap_size)) {
2834 ret = EFAULT;
2835 goto bail;
2836 }
2837 user_header += thrmap_size;
2838 }
2839
2840 if (fd) {
2841 RAW_file_written += bytes_needed;
2842 }
2843
2844 *user_header_size = bytes_needed;
2845 bail:
2846 if (cpumap) {
2847 kmem_free(kernel_map, (vm_offset_t)cpumap, cpumap_size);
2848 }
2849 return (ret);
2850 }
2851
2852 int
2853 kdbg_readcpumap(user_addr_t user_cpumap, size_t *user_cpumap_size)
2854 {
2855 uint8_t* cpumap = NULL;
2856 uint32_t cpumap_size = 0;
2857 int ret = KERN_SUCCESS;
2858
2859 if (kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) {
2860 if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, &cpumap, &cpumap_size) == KERN_SUCCESS) {
2861 if (user_cpumap) {
2862 size_t bytes_to_copy = (*user_cpumap_size >= cpumap_size) ? cpumap_size : *user_cpumap_size;
2863 if (copyout(cpumap, user_cpumap, (size_t)bytes_to_copy)) {
2864 ret = EFAULT;
2865 }
2866 }
2867 *user_cpumap_size = cpumap_size;
2868 kmem_free(kernel_map, (vm_offset_t)cpumap, cpumap_size);
2869 } else
2870 ret = EINVAL;
2871 } else
2872 ret = EINVAL;
2873
2874 return (ret);
2875 }
2876
2877 int
2878 kdbg_readcurthrmap(user_addr_t buffer, size_t *bufsize)
2879 {
2880 kd_threadmap *mapptr;
2881 unsigned int mapsize;
2882 unsigned int mapcount;
2883 unsigned int count = 0;
2884 int ret = 0;
2885
2886 count = *bufsize/sizeof(kd_threadmap);
2887 *bufsize = 0;
2888
2889 if ( (mapptr = kdbg_thrmap_init_internal(count, &mapsize, &mapcount)) ) {
2890 if (copyout(mapptr, buffer, mapcount * sizeof(kd_threadmap)))
2891 ret = EFAULT;
2892 else
2893 *bufsize = (mapcount * sizeof(kd_threadmap));
2894
2895 kmem_free(kernel_map, (vm_offset_t)mapptr, mapsize);
2896 } else
2897 ret = EINVAL;
2898
2899 return (ret);
2900 }
2901
2902 static int
2903 kdbg_write_v1_header(boolean_t write_thread_map, vnode_t vp, vfs_context_t ctx)
2904 {
2905 int ret = 0;
2906 RAW_header header;
2907 clock_sec_t secs;
2908 clock_usec_t usecs;
2909 char *pad_buf;
2910 uint32_t pad_size;
2911 uint32_t extra_thread_count = 0;
2912 uint32_t cpumap_size;
2913 size_t map_size = 0;
2914 size_t map_count = 0;
2915
2916 if (write_thread_map) {
2917 assert(kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
2918 map_count = kd_mapcount;
2919 map_size = map_count * sizeof(kd_threadmap);
2920 }
2921
2922 /*
2923 * Without the buffers initialized, we cannot construct a CPU map or a
2924 * thread map, and cannot write a header.
2925 */
2926 if (!(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT)) {
2927 return EINVAL;
2928 }
2929
2930 /*
2931 * To write a RAW_VERSION1+ file, we must embed a cpumap in the
2932 * "padding" used to page align the events following the threadmap. If
2933 * the threadmap happens to not require enough padding, we artificially
2934 * increase its footprint until it needs enough padding.
2935 */
2936
2937 assert(vp);
2938 assert(ctx);
2939
2940 pad_size = PAGE_16KB - ((sizeof(RAW_header) + map_size) & PAGE_MASK_64);
2941 cpumap_size = sizeof(kd_cpumap_header) + kd_ctrl_page.kdebug_cpus * sizeof(kd_cpumap);
2942
2943 if (cpumap_size > pad_size) {
2944 /* If the cpu map doesn't fit in the current available pad_size,
2945 * we increase the pad_size by 16K. We do this so that the event
2946 * data is always available on a page aligned boundary for both
2947 * 4k and 16k systems. We enforce this alignment for the event
2948 * data so that we can take advantage of optimized file/disk writes.
2949 */
2950 pad_size += PAGE_16KB;
2951 }
2952
2953 /* The way we are silently embedding a cpumap in the "padding" is by artificially
2954 * increasing the number of thread entries. However, we'll also need to ensure that
2955 * the cpumap is embedded in the last 4K page before when the event data is expected.
2956 * This way the tools can read the data starting the next page boundary on both
2957 * 4K and 16K systems preserving compatibility with older versions of the tools
2958 */
2959 if (pad_size > PAGE_4KB) {
2960 pad_size -= PAGE_4KB;
2961 extra_thread_count = (pad_size / sizeof(kd_threadmap)) + 1;
2962 }
2963
2964 memset(&header, 0, sizeof(header));
2965 header.version_no = RAW_VERSION1;
2966 header.thread_count = map_count + extra_thread_count;
2967
2968 clock_get_calendar_microtime(&secs, &usecs);
2969 header.TOD_secs = secs;
2970 header.TOD_usecs = usecs;
2971
2972 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)&header, sizeof(RAW_header), RAW_file_offset,
2973 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
2974 if (ret) {
2975 goto write_error;
2976 }
2977 RAW_file_offset += sizeof(RAW_header);
2978 RAW_file_written += sizeof(RAW_header);
2979
2980 if (write_thread_map) {
2981 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)kd_mapptr, map_size, RAW_file_offset,
2982 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
2983 if (ret) {
2984 goto write_error;
2985 }
2986
2987 RAW_file_offset += map_size;
2988 RAW_file_written += map_size;
2989 }
2990
2991 if (extra_thread_count) {
2992 pad_size = extra_thread_count * sizeof(kd_threadmap);
2993 pad_buf = kalloc(pad_size);
2994 if (!pad_buf) {
2995 ret = ENOMEM;
2996 goto write_error;
2997 }
2998 memset(pad_buf, 0, pad_size);
2999
3000 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset,
3001 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
3002 kfree(pad_buf, pad_size);
3003 if (ret) {
3004 goto write_error;
3005 }
3006
3007 RAW_file_offset += pad_size;
3008 RAW_file_written += pad_size;
3009 }
3010
3011 pad_size = PAGE_SIZE - (RAW_file_offset & PAGE_MASK_64);
3012 if (pad_size) {
3013 pad_buf = (char *)kalloc(pad_size);
3014 if (!pad_buf) {
3015 ret = ENOMEM;
3016 goto write_error;
3017 }
3018 memset(pad_buf, 0, pad_size);
3019
3020 /*
3021 * embed a cpumap in the padding bytes.
3022 * older code will skip this.
3023 * newer code will know how to read it.
3024 */
3025 uint32_t temp = pad_size;
3026 if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, (uint8_t**)&pad_buf, &temp) != KERN_SUCCESS) {
3027 memset(pad_buf, 0, pad_size);
3028 }
3029
3030 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset,
3031 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
3032 kfree(pad_buf, pad_size);
3033 if (ret) {
3034 goto write_error;
3035 }
3036
3037 RAW_file_offset += pad_size;
3038 RAW_file_written += pad_size;
3039 }
3040
3041 write_error:
3042 return ret;
3043 }
3044
3045 static void
3046 kdbg_clear_thread_map(void)
3047 {
3048 ktrace_assert_lock_held();
3049
3050 if (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) {
3051 assert(kd_mapptr != NULL);
3052 kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize);
3053 kd_mapptr = NULL;
3054 kd_mapsize = 0;
3055 kd_mapcount = 0;
3056 kd_ctrl_page.kdebug_flags &= ~KDBG_MAPINIT;
3057 }
3058 }
3059
3060 /*
3061 * Write out a version 1 header and the thread map, if it is initialized, to a
3062 * vnode. Used by KDWRITEMAP and kdbg_dump_trace_to_file.
3063 *
3064 * Returns write errors from vn_rdwr if a write fails. Returns ENODATA if the
3065 * thread map has not been initialized, but the header will still be written.
3066 * Returns ENOMEM if padding could not be allocated. Returns 0 otherwise.
3067 */
3068 static int
3069 kdbg_write_thread_map(vnode_t vp, vfs_context_t ctx)
3070 {
3071 int ret = 0;
3072 boolean_t map_initialized;
3073
3074 ktrace_assert_lock_held();
3075 assert(ctx != NULL);
3076
3077 map_initialized = (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
3078
3079 ret = kdbg_write_v1_header(map_initialized, vp, ctx);
3080 if (ret == 0) {
3081 if (map_initialized) {
3082 kdbg_clear_thread_map();
3083 } else {
3084 ret = ENODATA;
3085 }
3086 }
3087
3088 return ret;
3089 }
3090
3091 /*
3092 * Copy out the thread map to a user space buffer. Used by KDTHRMAP.
3093 *
3094 * Returns copyout errors if the copyout fails. Returns ENODATA if the thread
3095 * map has not been initialized. Returns EINVAL if the buffer provided is not
3096 * large enough for the entire thread map. Returns 0 otherwise.
3097 */
3098 static int
3099 kdbg_copyout_thread_map(user_addr_t buffer, size_t *buffer_size)
3100 {
3101 boolean_t map_initialized;
3102 size_t map_size;
3103 int ret = 0;
3104
3105 ktrace_assert_lock_held();
3106 assert(buffer_size != NULL);
3107
3108 map_initialized = (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
3109 if (!map_initialized) {
3110 return ENODATA;
3111 }
3112
3113 map_size = kd_mapcount * sizeof(kd_threadmap);
3114 if (*buffer_size < map_size) {
3115 return EINVAL;
3116 }
3117
3118 ret = copyout(kd_mapptr, buffer, map_size);
3119 if (ret == 0) {
3120 kdbg_clear_thread_map();
3121 }
3122
3123 return ret;
3124 }
3125
3126 int
3127 kdbg_readthrmap_v3(user_addr_t buffer, size_t buffer_size, int fd)
3128 {
3129 int ret = 0;
3130 boolean_t map_initialized;
3131 size_t map_size;
3132
3133 ktrace_assert_lock_held();
3134
3135 if ((!fd && !buffer) || (fd && buffer)) {
3136 return EINVAL;
3137 }
3138
3139 map_initialized = (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
3140 map_size = kd_mapcount * sizeof(kd_threadmap);
3141
3142 if (map_initialized && (buffer_size >= map_size))
3143 {
3144 ret = kdbg_write_v3_header(buffer, &buffer_size, fd);
3145
3146 if (ret == 0) {
3147 kdbg_clear_thread_map();
3148 }
3149 } else {
3150 ret = EINVAL;
3151 }
3152
3153 return ret;
3154 }
3155
3156 static void
3157 kdbg_set_nkdbufs(unsigned int value)
3158 {
3159 /*
3160 * We allow a maximum buffer size of 50% of either ram or max mapped
3161 * address, whichever is smaller 'value' is the desired number of trace
3162 * entries
3163 */
3164 unsigned int max_entries = (sane_size / 2) / sizeof(kd_buf);
3165
3166 if (value <= max_entries) {
3167 nkdbufs = value;
3168 } else {
3169 nkdbufs = max_entries;
3170 }
3171 }
3172
3173 /*
3174 * Block until there are `n_storage_threshold` storage units filled with
3175 * events or `timeout_ms` milliseconds have passed. If `locked_wait` is true,
3176 * `ktrace_lock` is held while waiting. This is necessary while waiting to
3177 * write events out of the buffers.
3178 *
3179 * Returns true if the threshold was reached and false otherwise.
3180 *
3181 * Called with `ktrace_lock` locked and interrupts enabled.
3182 */
3183 static boolean_t
3184 kdbg_wait(uint64_t timeout_ms, boolean_t locked_wait)
3185 {
3186 int wait_result = THREAD_AWAKENED;
3187 uint64_t abstime = 0;
3188
3189 ktrace_assert_lock_held();
3190
3191 if (timeout_ms != 0) {
3192 uint64_t ns = timeout_ms * NSEC_PER_MSEC;
3193 nanoseconds_to_absolutetime(ns, &abstime);
3194 clock_absolutetime_interval_to_deadline(abstime, &abstime);
3195 }
3196
3197 boolean_t s = ml_set_interrupts_enabled(FALSE);
3198 if (!s) {
3199 panic("kdbg_wait() called with interrupts disabled");
3200 }
3201 lck_spin_lock(kdw_spin_lock);
3202
3203 if (!locked_wait) {
3204 /* drop the mutex to allow others to access trace */
3205 ktrace_unlock();
3206 }
3207
3208 while (wait_result == THREAD_AWAKENED &&
3209 kd_ctrl_page.kds_inuse_count < n_storage_threshold)
3210 {
3211 kds_waiter = 1;
3212
3213 if (abstime) {
3214 wait_result = lck_spin_sleep_deadline(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE, abstime);
3215 } else {
3216 wait_result = lck_spin_sleep(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE);
3217 }
3218
3219 kds_waiter = 0;
3220 }
3221
3222 /* check the count under the spinlock */
3223 boolean_t threshold_exceeded = (kd_ctrl_page.kds_inuse_count >= n_storage_threshold);
3224
3225 lck_spin_unlock(kdw_spin_lock);
3226 ml_set_interrupts_enabled(s);
3227
3228 if (!locked_wait) {
3229 /* pick the mutex back up again */
3230 ktrace_lock();
3231 }
3232
3233 /* write out whether we've exceeded the threshold */
3234 return threshold_exceeded;
3235 }
3236
3237 /*
3238 * Wakeup a thread waiting using `kdbg_wait` if there are at least
3239 * `n_storage_threshold` storage units in use.
3240 */
3241 static void
3242 kdbg_wakeup(void)
3243 {
3244 boolean_t need_kds_wakeup = FALSE;
3245
3246 /*
3247 * Try to take the lock here to synchronize with the waiter entering
3248 * the blocked state. Use the try mode to prevent deadlocks caused by
3249 * re-entering this routine due to various trace points triggered in the
3250 * lck_spin_sleep_xxxx routines used to actually enter one of our 2 wait
3251 * conditions. No problem if we fail, there will be lots of additional
3252 * events coming in that will eventually succeed in grabbing this lock.
3253 */
3254 boolean_t s = ml_set_interrupts_enabled(FALSE);
3255
3256 if (lck_spin_try_lock(kdw_spin_lock)) {
3257 if (kds_waiter &&
3258 (kd_ctrl_page.kds_inuse_count >= n_storage_threshold))
3259 {
3260 kds_waiter = 0;
3261 need_kds_wakeup = TRUE;
3262 }
3263 lck_spin_unlock(kdw_spin_lock);
3264 }
3265
3266 ml_set_interrupts_enabled(s);
3267
3268 if (need_kds_wakeup == TRUE) {
3269 wakeup(&kds_waiter);
3270 }
3271 }
3272
3273 int
3274 kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
3275 {
3276 int ret = 0;
3277 size_t size = *sizep;
3278 unsigned int value = 0;
3279 kd_regtype kd_Reg;
3280 kbufinfo_t kd_bufinfo;
3281 proc_t p;
3282
3283 if (name[0] == KERN_KDWRITETR ||
3284 name[0] == KERN_KDWRITETR_V3 ||
3285 name[0] == KERN_KDWRITEMAP ||
3286 name[0] == KERN_KDWRITEMAP_V3 ||
3287 name[0] == KERN_KDEFLAGS ||
3288 name[0] == KERN_KDDFLAGS ||
3289 name[0] == KERN_KDENABLE ||
3290 name[0] == KERN_KDSETBUF)
3291 {
3292 if (namelen < 2) {
3293 return EINVAL;
3294 }
3295 value = name[1];
3296 }
3297
3298 kdbg_lock_init();
3299 assert(kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT);
3300
3301 ktrace_lock();
3302
3303 /*
3304 * Some requests only require "read" access to kdebug trace. Regardless,
3305 * tell ktrace that a configuration or read is occurring (and see if it's
3306 * allowed).
3307 */
3308 if (name[0] != KERN_KDGETBUF &&
3309 name[0] != KERN_KDGETREG &&
3310 name[0] != KERN_KDREADCURTHRMAP)
3311 {
3312 if ((ret = ktrace_configure(KTRACE_KDEBUG))) {
3313 goto out;
3314 }
3315 } else {
3316 if ((ret = ktrace_read_check())) {
3317 goto out;
3318 }
3319 }
3320
3321 switch(name[0]) {
3322 case KERN_KDGETBUF:
3323 if (size < sizeof(kd_bufinfo.nkdbufs)) {
3324 /*
3325 * There is not enough room to return even
3326 * the first element of the info structure.
3327 */
3328 ret = EINVAL;
3329 break;
3330 }
3331
3332 memset(&kd_bufinfo, 0, sizeof(kd_bufinfo));
3333
3334 kd_bufinfo.nkdbufs = nkdbufs;
3335 kd_bufinfo.nkdthreads = kd_mapcount;
3336
3337 if ( (kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) )
3338 kd_bufinfo.nolog = 1;
3339 else
3340 kd_bufinfo.nolog = 0;
3341
3342 kd_bufinfo.flags = kd_ctrl_page.kdebug_flags;
3343 #if defined(__LP64__)
3344 kd_bufinfo.flags |= KDBG_LP64;
3345 #endif
3346 {
3347 int pid = ktrace_get_owning_pid();
3348 kd_bufinfo.bufid = (pid == 0 ? -1 : pid);
3349 }
3350
3351 if (size >= sizeof(kd_bufinfo)) {
3352 /*
3353 * Provide all the info we have
3354 */
3355 if (copyout(&kd_bufinfo, where, sizeof(kd_bufinfo)))
3356 ret = EINVAL;
3357 } else {
3358 /*
3359 * For backwards compatibility, only provide
3360 * as much info as there is room for.
3361 */
3362 if (copyout(&kd_bufinfo, where, size))
3363 ret = EINVAL;
3364 }
3365 break;
3366
3367 case KERN_KDREADCURTHRMAP:
3368 ret = kdbg_readcurthrmap(where, sizep);
3369 break;
3370
3371 case KERN_KDEFLAGS:
3372 value &= KDBG_USERFLAGS;
3373 kd_ctrl_page.kdebug_flags |= value;
3374 break;
3375
3376 case KERN_KDDFLAGS:
3377 value &= KDBG_USERFLAGS;
3378 kd_ctrl_page.kdebug_flags &= ~value;
3379 break;
3380
3381 case KERN_KDENABLE:
3382 /*
3383 * Enable tracing mechanism. Two types:
3384 * KDEBUG_TRACE is the standard one,
3385 * and KDEBUG_PPT which is a carefully
3386 * chosen subset to avoid performance impact.
3387 */
3388 if (value) {
3389 /*
3390 * enable only if buffer is initialized
3391 */
3392 if (!(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) ||
3393 !(value == KDEBUG_ENABLE_TRACE || value == KDEBUG_ENABLE_PPT)) {
3394 ret = EINVAL;
3395 break;
3396 }
3397 kdbg_thrmap_init();
3398
3399 kdbg_set_tracing_enabled(TRUE, value);
3400 }
3401 else
3402 {
3403 if (!kdebug_enable) {
3404 break;
3405 }
3406
3407 kernel_debug_disable();
3408 }
3409 break;
3410
3411 case KERN_KDSETBUF:
3412 kdbg_set_nkdbufs(value);
3413 break;
3414
3415 case KERN_KDSETUP:
3416 ret = kdbg_reinit(FALSE);
3417 break;
3418
3419 case KERN_KDREMOVE:
3420 ktrace_reset(KTRACE_KDEBUG);
3421 break;
3422
3423 case KERN_KDSETREG:
3424 if(size < sizeof(kd_regtype)) {
3425 ret = EINVAL;
3426 break;
3427 }
3428 if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
3429 ret = EINVAL;
3430 break;
3431 }
3432
3433 ret = kdbg_setreg(&kd_Reg);
3434 break;
3435
3436 case KERN_KDGETREG:
3437 ret = EINVAL;
3438 break;
3439
3440 case KERN_KDREADTR:
3441 ret = kdbg_read(where, sizep, NULL, NULL, RAW_VERSION1);
3442 break;
3443
3444 case KERN_KDWRITETR:
3445 case KERN_KDWRITETR_V3:
3446 case KERN_KDWRITEMAP:
3447 case KERN_KDWRITEMAP_V3:
3448 {
3449 struct vfs_context context;
3450 struct fileproc *fp;
3451 size_t number;
3452 vnode_t vp;
3453 int fd;
3454
3455 if (name[0] == KERN_KDWRITETR || name[0] == KERN_KDWRITETR_V3) {
3456 (void)kdbg_wait(size, TRUE);
3457 }
3458 p = current_proc();
3459 fd = value;
3460
3461 proc_fdlock(p);
3462 if ( (ret = fp_lookup(p, fd, &fp, 1)) ) {
3463 proc_fdunlock(p);
3464 break;
3465 }
3466 context.vc_thread = current_thread();
3467 context.vc_ucred = fp->f_fglob->fg_cred;
3468
3469 if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
3470 fp_drop(p, fd, fp, 1);
3471 proc_fdunlock(p);
3472
3473 ret = EBADF;
3474 break;
3475 }
3476 vp = (struct vnode *)fp->f_fglob->fg_data;
3477 proc_fdunlock(p);
3478
3479 if ((ret = vnode_getwithref(vp)) == 0) {
3480 RAW_file_offset = fp->f_fglob->fg_offset;
3481 if (name[0] == KERN_KDWRITETR || name[0] == KERN_KDWRITETR_V3) {
3482 number = nkdbufs * sizeof(kd_buf);
3483
3484 KDBG_RELEASE(TRACE_WRITING_EVENTS | DBG_FUNC_START);
3485 if (name[0] == KERN_KDWRITETR_V3)
3486 ret = kdbg_read(0, &number, vp, &context, RAW_VERSION3);
3487 else
3488 ret = kdbg_read(0, &number, vp, &context, RAW_VERSION1);
3489 KDBG_RELEASE(TRACE_WRITING_EVENTS | DBG_FUNC_END, number);
3490
3491 *sizep = number;
3492 } else {
3493 number = kd_mapcount * sizeof(kd_threadmap);
3494 if (name[0] == KERN_KDWRITEMAP_V3) {
3495 ret = kdbg_readthrmap_v3(0, number, fd);
3496 } else {
3497 ret = kdbg_write_thread_map(vp, &context);
3498 }
3499 }
3500 fp->f_fglob->fg_offset = RAW_file_offset;
3501 vnode_put(vp);
3502 }
3503 fp_drop(p, fd, fp, 0);
3504
3505 break;
3506 }
3507 case KERN_KDBUFWAIT:
3508 *sizep = kdbg_wait(size, FALSE);
3509 break;
3510
3511 case KERN_KDPIDTR:
3512 if (size < sizeof(kd_regtype)) {
3513 ret = EINVAL;
3514 break;
3515 }
3516 if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
3517 ret = EINVAL;
3518 break;
3519 }
3520
3521 ret = kdbg_setpid(&kd_Reg);
3522 break;
3523
3524 case KERN_KDPIDEX:
3525 if (size < sizeof(kd_regtype)) {
3526 ret = EINVAL;
3527 break;
3528 }
3529 if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
3530 ret = EINVAL;
3531 break;
3532 }
3533
3534 ret = kdbg_setpidex(&kd_Reg);
3535 break;
3536
3537 case KERN_KDCPUMAP:
3538 ret = kdbg_readcpumap(where, sizep);
3539 break;
3540
3541 case KERN_KDTHRMAP:
3542 ret = kdbg_copyout_thread_map(where, sizep);
3543 break;
3544
3545 case KERN_KDSET_TYPEFILTER: {
3546 ret = kdbg_copyin_typefilter(where, size);
3547 break;
3548 }
3549
3550 case KERN_KDTEST:
3551 ret = kdbg_test(size);
3552 break;
3553
3554 default:
3555 ret = EINVAL;
3556 break;
3557 }
3558 out:
3559 ktrace_unlock();
3560
3561 return ret;
3562 }
3563
3564
3565 /*
3566 * This code can run for the most part concurrently with kernel_debug_internal()...
3567 * 'release_storage_unit' will take the kds_spin_lock which may cause us to briefly
3568 * synchronize with the recording side of this puzzle... otherwise, we are able to
3569 * move through the lists w/o use of any locks
3570 */
3571 int
3572 kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx, uint32_t file_version)
3573 {
3574 unsigned int count;
3575 unsigned int cpu, min_cpu;
3576 uint64_t barrier_min = 0, barrier_max = 0, t, earliest_time;
3577 int error = 0;
3578 kd_buf *tempbuf;
3579 uint32_t rcursor;
3580 kd_buf lostevent;
3581 union kds_ptr kdsp;
3582 bool traced_retrograde = false;
3583 struct kd_storage *kdsp_actual;
3584 struct kd_bufinfo *kdbp;
3585 struct kd_bufinfo *min_kdbp;
3586 uint32_t tempbuf_count;
3587 uint32_t tempbuf_number;
3588 uint32_t old_kdebug_flags;
3589 uint32_t old_kdebug_slowcheck;
3590 boolean_t lostevents = FALSE;
3591 boolean_t out_of_events = FALSE;
3592 boolean_t wrapped = FALSE;
3593
3594 assert(number);
3595 count = *number/sizeof(kd_buf);
3596 *number = 0;
3597
3598 ktrace_assert_lock_held();
3599
3600 if (count == 0 || !(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) || kdcopybuf == 0)
3601 return EINVAL;
3602
3603 thread_set_eager_preempt(current_thread());
3604
3605 memset(&lostevent, 0, sizeof(lostevent));
3606 lostevent.debugid = TRACE_LOST_EVENTS;
3607
3608 /*
3609 * Capture the current time. Only sort events that have occured
3610 * before now. Since the IOPs are being flushed here, it is possible
3611 * that events occur on the AP while running live tracing. If we are
3612 * disabled, no new events should occur on the AP.
3613 */
3614 if (kd_ctrl_page.enabled) {
3615 barrier_max = kdbg_timestamp() & KDBG_TIMESTAMP_MASK;
3616 }
3617
3618 /*
3619 * Request each IOP to provide us with up to date entries before merging
3620 * buffers together.
3621 */
3622 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_SYNC_FLUSH, NULL);
3623
3624 /*
3625 * Disable wrap so storage units cannot be stolen out from underneath us
3626 * while merging events.
3627 *
3628 * Because we hold ktrace_lock, no other control threads can be playing
3629 * with kdebug_flags. The code that emits new events could be running,
3630 * but it grabs kds_spin_lock if it needs to acquire a new storage
3631 * chunk, which is where it examines kdebug_flags. If it is adding to
3632 * the same chunk we're reading from, check for that below.
3633 */
3634 wrapped = disable_wrap(&old_kdebug_slowcheck, &old_kdebug_flags);
3635
3636 if (count > nkdbufs)
3637 count = nkdbufs;
3638
3639 if ((tempbuf_count = count) > KDCOPYBUF_COUNT) {
3640 tempbuf_count = KDCOPYBUF_COUNT;
3641 }
3642
3643 /*
3644 * If the buffers have wrapped, capture the earliest time where there
3645 * are events for all CPUs and do not emit additional lost events for
3646 * oldest storage units.
3647 */
3648 if (wrapped) {
3649 barrier_min = kd_ctrl_page.oldest_time;
3650 kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
3651 kd_ctrl_page.oldest_time = 0;
3652
3653 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page.kdebug_cpus; cpu++, kdbp++) {
3654 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
3655 continue;
3656 }
3657 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3658 kdsp_actual->kds_lostevents = FALSE;
3659 }
3660 }
3661
3662 while (count) {
3663 tempbuf = kdcopybuf;
3664 tempbuf_number = 0;
3665
3666 if (wrapped) {
3667 /* Trace a single lost events event for wrapping. */
3668 kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, 0);
3669 *tempbuf = lostevent;
3670 wrapped = FALSE;
3671 goto nextevent;
3672 }
3673
3674 /* While space left in merged events scratch buffer. */
3675 while (tempbuf_count) {
3676 earliest_time = UINT64_MAX;
3677 min_kdbp = NULL;
3678 min_cpu = 0;
3679
3680 /* Check each CPU's buffers. */
3681 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page.kdebug_cpus; cpu++, kdbp++) {
3682 /* Skip CPUs without data. */
3683 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
3684 next_cpu:
3685 continue;
3686 }
3687 /* Debugging aid: maintain a copy of the "kdsp"
3688 * index.
3689 */
3690 volatile union kds_ptr kdsp_shadow;
3691
3692 kdsp_shadow = kdsp;
3693
3694 /* From CPU data to buffer header to buffer. */
3695 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3696
3697 volatile struct kd_storage *kdsp_actual_shadow;
3698
3699 kdsp_actual_shadow = kdsp_actual;
3700
3701 /* Skip buffer if there are no events left. */
3702 rcursor = kdsp_actual->kds_readlast;
3703
3704 if (rcursor == kdsp_actual->kds_bufindx) {
3705 continue;
3706 }
3707
3708 t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
3709
3710 /* Ignore events that have aged out due to wrapping. */
3711 while (t < barrier_min) {
3712 rcursor = ++kdsp_actual->kds_readlast;
3713
3714 if (rcursor >= EVENTS_PER_STORAGE_UNIT) {
3715 release_storage_unit(cpu, kdsp.raw);
3716
3717 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
3718 goto next_cpu;
3719 }
3720 kdsp_shadow = kdsp;
3721 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3722 kdsp_actual_shadow = kdsp_actual;
3723 rcursor = kdsp_actual->kds_readlast;
3724 }
3725
3726 t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
3727 }
3728
3729 if ((t > barrier_max) && (barrier_max > 0)) {
3730 /*
3731 * Need to flush IOPs again before we
3732 * can sort any more data from the
3733 * buffers.
3734 */
3735 out_of_events = TRUE;
3736 break;
3737 }
3738 if (t < kdsp_actual->kds_timestamp) {
3739 /*
3740 * indicates we've not yet completed filling
3741 * in this event...
3742 * this should only occur when we're looking
3743 * at the buf that the record head is utilizing
3744 * we'll pick these events up on the next
3745 * call to kdbg_read
3746 * we bail at this point so that we don't
3747 * get an out-of-order timestream by continuing
3748 * to read events from the other CPUs' timestream(s)
3749 */
3750 out_of_events = TRUE;
3751 break;
3752 }
3753 if (t < earliest_time) {
3754 earliest_time = t;
3755 min_kdbp = kdbp;
3756 min_cpu = cpu;
3757 }
3758 }
3759 if (min_kdbp == NULL || out_of_events == TRUE) {
3760 /*
3761 * all buffers ran empty
3762 */
3763 out_of_events = TRUE;
3764 break;
3765 }
3766
3767 kdsp = min_kdbp->kd_list_head;
3768 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3769
3770 /* Copy earliest event into merged events scratch buffer. */
3771 *tempbuf = kdsp_actual->kds_records[kdsp_actual->kds_readlast++];
3772
3773 if (kdsp_actual->kds_readlast == EVENTS_PER_STORAGE_UNIT)
3774 release_storage_unit(min_cpu, kdsp.raw);
3775
3776 /*
3777 * Watch for out of order timestamps
3778 */
3779 if (earliest_time < min_kdbp->kd_prev_timebase) {
3780 /*
3781 * If we haven't already, emit a retrograde events event.
3782 */
3783 if (traced_retrograde) {
3784 continue;
3785 }
3786
3787 kdbg_set_timestamp_and_cpu(tempbuf, min_kdbp->kd_prev_timebase, kdbg_get_cpu(tempbuf));
3788 tempbuf->arg1 = tempbuf->debugid;
3789 tempbuf->arg2 = earliest_time;
3790 tempbuf->arg3 = 0;
3791 tempbuf->arg4 = 0;
3792 tempbuf->debugid = TRACE_RETROGRADE_EVENTS;
3793 traced_retrograde = true;
3794 } else {
3795 min_kdbp->kd_prev_timebase = earliest_time;
3796 }
3797 nextevent:
3798 tempbuf_count--;
3799 tempbuf_number++;
3800 tempbuf++;
3801
3802 if ((RAW_file_written += sizeof(kd_buf)) >= RAW_FLUSH_SIZE)
3803 break;
3804 }
3805 if (tempbuf_number) {
3806 if (file_version == RAW_VERSION3) {
3807 if ( !(kdbg_write_v3_event_chunk_header(buffer, V3_RAW_EVENTS, (tempbuf_number * sizeof(kd_buf)), vp, ctx))) {
3808 error = EFAULT;
3809 goto check_error;
3810 }
3811 if (buffer)
3812 buffer += (sizeof(kd_chunk_header_v3) + sizeof(uint64_t));
3813
3814 assert(count >= (sizeof(kd_chunk_header_v3) + sizeof(uint64_t)));
3815 count -= (sizeof(kd_chunk_header_v3) + sizeof(uint64_t));
3816 *number += (sizeof(kd_chunk_header_v3) + sizeof(uint64_t));
3817 }
3818 if (vp) {
3819 size_t write_size = tempbuf_number * sizeof(kd_buf);
3820 error = kdbg_write_to_vnode((caddr_t)kdcopybuf, write_size, vp, ctx, RAW_file_offset);
3821 if (!error)
3822 RAW_file_offset += write_size;
3823
3824 if (RAW_file_written >= RAW_FLUSH_SIZE) {
3825 error = VNOP_FSYNC(vp, MNT_NOWAIT, ctx);
3826
3827 RAW_file_written = 0;
3828 }
3829 } else {
3830 error = copyout(kdcopybuf, buffer, tempbuf_number * sizeof(kd_buf));
3831 buffer += (tempbuf_number * sizeof(kd_buf));
3832 }
3833 check_error:
3834 if (error) {
3835 *number = 0;
3836 error = EINVAL;
3837 break;
3838 }
3839 count -= tempbuf_number;
3840 *number += tempbuf_number;
3841 }
3842 if (out_of_events == TRUE)
3843 /*
3844 * all trace buffers are empty
3845 */
3846 break;
3847
3848 if ((tempbuf_count = count) > KDCOPYBUF_COUNT)
3849 tempbuf_count = KDCOPYBUF_COUNT;
3850 }
3851 if ( !(old_kdebug_flags & KDBG_NOWRAP)) {
3852 enable_wrap(old_kdebug_slowcheck, lostevents);
3853 }
3854 thread_clear_eager_preempt(current_thread());
3855 return (error);
3856 }
3857
3858 static int
3859 kdbg_test(size_t flavor)
3860 {
3861 int code = 0;
3862 int dummy_iop = 0;
3863
3864 #define KDEBUG_TEST_CODE(code) BSDDBG_CODE(DBG_BSD_KDEBUG_TEST, (code))
3865 switch (flavor) {
3866 case 1:
3867 /* try each macro */
3868 KDBG(KDEBUG_TEST_CODE(code)); code++;
3869 KDBG(KDEBUG_TEST_CODE(code), 1); code++;
3870 KDBG(KDEBUG_TEST_CODE(code), 1, 2); code++;
3871 KDBG(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
3872 KDBG(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
3873
3874 KDBG_RELEASE(KDEBUG_TEST_CODE(code)); code++;
3875 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1); code++;
3876 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1, 2); code++;
3877 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
3878 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
3879
3880 KDBG_FILTERED(KDEBUG_TEST_CODE(code)); code++;
3881 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1); code++;
3882 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1, 2); code++;
3883 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
3884 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
3885
3886 KDBG_DEBUG(KDEBUG_TEST_CODE(code)); code++;
3887 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1); code++;
3888 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1, 2); code++;
3889 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
3890 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
3891 break;
3892
3893 case 2:
3894 if (kd_ctrl_page.kdebug_iops) {
3895 /* avoid the assertion in kernel_debug_enter for a valid IOP */
3896 dummy_iop = kd_ctrl_page.kdebug_iops[0].cpu_id;
3897 }
3898
3899 /* ensure old timestamps are not emitted from kernel_debug_enter */
3900 kernel_debug_enter(dummy_iop, KDEBUG_TEST_CODE(code),
3901 100 /* very old timestamp */, 0, 0, 0,
3902 0, (uintptr_t)thread_tid(current_thread()));
3903 code++;
3904 kernel_debug_enter(dummy_iop, KDEBUG_TEST_CODE(code),
3905 kdbg_timestamp(), 0, 0, 0, 0,
3906 (uintptr_t)thread_tid(current_thread()));
3907 code++;
3908 break;
3909 default:
3910 return ENOTSUP;
3911 }
3912 #undef KDEBUG_TEST_CODE
3913
3914 return 0;
3915 }
3916
3917 void
3918 kdebug_init(unsigned int n_events, char *filter_desc, boolean_t wrapping)
3919 {
3920 assert(filter_desc != NULL);
3921
3922 #if defined(__x86_64__)
3923 /* only trace MACH events when outputting kdebug to serial */
3924 if (kdebug_serial) {
3925 n_events = 1;
3926 if (filter_desc[0] == '\0') {
3927 filter_desc[0] = 'C';
3928 filter_desc[1] = '1';
3929 filter_desc[2] = '\0';
3930 }
3931 }
3932 #endif /* defined(__x86_64__) */
3933
3934 if (log_leaks && n_events == 0) {
3935 n_events = 200000;
3936 }
3937
3938 kdebug_trace_start(n_events, filter_desc, wrapping, FALSE);
3939 }
3940
3941 static void
3942 kdbg_set_typefilter_string(const char *filter_desc)
3943 {
3944 char *end = NULL;
3945
3946 ktrace_assert_lock_held();
3947
3948 assert(filter_desc != NULL);
3949
3950 typefilter_reject_all(kdbg_typefilter);
3951 typefilter_allow_class(kdbg_typefilter, DBG_TRACE);
3952
3953 /* if the filter description starts with a number, assume it's a csc */
3954 if (filter_desc[0] >= '0' && filter_desc[0] <= '9'){
3955 unsigned long csc = strtoul(filter_desc, NULL, 0);
3956 if (filter_desc != end && csc <= KDBG_CSC_MAX) {
3957 typefilter_allow_csc(kdbg_typefilter, csc);
3958 }
3959 return;
3960 }
3961
3962 while (filter_desc[0] != '\0') {
3963 unsigned long allow_value;
3964
3965 char filter_type = filter_desc[0];
3966 if (filter_type != 'C' && filter_type != 'S') {
3967 return;
3968 }
3969 filter_desc++;
3970
3971 allow_value = strtoul(filter_desc, &end, 0);
3972 if (filter_desc == end) {
3973 /* cannot parse as integer */
3974 return;
3975 }
3976
3977 switch (filter_type) {
3978 case 'C':
3979 if (allow_value <= KDBG_CLASS_MAX) {
3980 typefilter_allow_class(kdbg_typefilter, allow_value);
3981 } else {
3982 /* illegal class */
3983 return;
3984 }
3985 break;
3986 case 'S':
3987 if (allow_value <= KDBG_CSC_MAX) {
3988 typefilter_allow_csc(kdbg_typefilter, allow_value);
3989 } else {
3990 /* illegal class subclass */
3991 return;
3992 }
3993 break;
3994 default:
3995 return;
3996 }
3997
3998 /* advance to next filter entry */
3999 filter_desc = end;
4000 if (filter_desc[0] == ',') {
4001 filter_desc++;
4002 }
4003 }
4004 }
4005
4006 /*
4007 * This function is meant to be called from the bootstrap thread or coming out
4008 * of acpi_idle_kernel.
4009 */
4010 void
4011 kdebug_trace_start(unsigned int n_events, const char *filter_desc,
4012 boolean_t wrapping, boolean_t at_wake)
4013 {
4014 if (!n_events) {
4015 kd_early_done = true;
4016 return;
4017 }
4018
4019 ktrace_start_single_threaded();
4020
4021 kdbg_lock_init();
4022
4023 ktrace_kernel_configure(KTRACE_KDEBUG);
4024
4025 kdbg_set_nkdbufs(n_events);
4026
4027 kernel_debug_string_early("start_kern_tracing");
4028
4029 if (kdbg_reinit(TRUE)) {
4030 printf("error from kdbg_reinit, kernel tracing not started\n");
4031 goto out;
4032 }
4033
4034 /*
4035 * Wrapping is disabled because boot and wake tracing is interested in
4036 * the earliest events, at the expense of later ones.
4037 */
4038 if (!wrapping) {
4039 uint32_t old1, old2;
4040 (void)disable_wrap(&old1, &old2);
4041 }
4042
4043 if (filter_desc && filter_desc[0] != '\0') {
4044 if (kdbg_initialize_typefilter(NULL) == KERN_SUCCESS) {
4045 kdbg_set_typefilter_string(filter_desc);
4046 kdbg_enable_typefilter();
4047 }
4048 }
4049
4050 /*
4051 * Hold off interrupts between getting a thread map and enabling trace
4052 * and until the early traces are recorded.
4053 */
4054 boolean_t s = ml_set_interrupts_enabled(FALSE);
4055
4056 if (at_wake) {
4057 kdbg_thrmap_init();
4058 }
4059
4060 kdbg_set_tracing_enabled(TRUE, KDEBUG_ENABLE_TRACE | (kdebug_serial ?
4061 KDEBUG_ENABLE_SERIAL : 0));
4062
4063 if (!at_wake) {
4064 /*
4065 * Transfer all very early events from the static buffer into the real
4066 * buffers.
4067 */
4068 kernel_debug_early_end();
4069 }
4070
4071 ml_set_interrupts_enabled(s);
4072
4073 printf("kernel tracing started with %u events\n", n_events);
4074
4075 #if KDEBUG_MOJO_TRACE
4076 if (kdebug_serial) {
4077 printf("serial output enabled with %lu named events\n",
4078 sizeof(kd_events)/sizeof(kd_event_t));
4079 }
4080 #endif /* KDEBUG_MOJO_TRACE */
4081
4082 out:
4083 ktrace_end_single_threaded();
4084 }
4085
4086 void
4087 kdbg_dump_trace_to_file(const char *filename)
4088 {
4089 vfs_context_t ctx;
4090 vnode_t vp;
4091 size_t write_size;
4092 int ret;
4093
4094 ktrace_lock();
4095
4096 if (!(kdebug_enable & KDEBUG_ENABLE_TRACE)) {
4097 goto out;
4098 }
4099
4100 if (ktrace_get_owning_pid() != 0) {
4101 /*
4102 * Another process owns ktrace and is still active, disable tracing to
4103 * prevent wrapping.
4104 */
4105 kdebug_enable = 0;
4106 kd_ctrl_page.enabled = 0;
4107 commpage_update_kdebug_state();
4108 goto out;
4109 }
4110
4111 KDBG_RELEASE(TRACE_WRITING_EVENTS | DBG_FUNC_START);
4112
4113 kdebug_enable = 0;
4114 kd_ctrl_page.enabled = 0;
4115 commpage_update_kdebug_state();
4116
4117 ctx = vfs_context_kernel();
4118
4119 if (vnode_open(filename, (O_CREAT | FWRITE | O_NOFOLLOW), 0600, 0, &vp, ctx)) {
4120 goto out;
4121 }
4122
4123 kdbg_write_thread_map(vp, ctx);
4124
4125 write_size = nkdbufs * sizeof(kd_buf);
4126 ret = kdbg_read(0, &write_size, vp, ctx, RAW_VERSION1);
4127 if (ret) {
4128 goto out_close;
4129 }
4130
4131 /*
4132 * Wait to synchronize the file to capture the I/O in the
4133 * TRACE_WRITING_EVENTS interval.
4134 */
4135 ret = VNOP_FSYNC(vp, MNT_WAIT, ctx);
4136
4137 /*
4138 * Balance the starting TRACE_WRITING_EVENTS tracepoint manually.
4139 */
4140 kd_buf end_event = {
4141 .debugid = TRACE_WRITING_EVENTS | DBG_FUNC_END,
4142 .arg1 = write_size,
4143 .arg2 = ret,
4144 .arg5 = thread_tid(current_thread()),
4145 };
4146 kdbg_set_timestamp_and_cpu(&end_event, kdbg_timestamp(),
4147 cpu_number());
4148
4149 /* this is best effort -- ignore any errors */
4150 (void)kdbg_write_to_vnode((caddr_t)&end_event, sizeof(kd_buf), vp, ctx,
4151 RAW_file_offset);
4152
4153 out_close:
4154 vnode_close(vp, FWRITE, ctx);
4155 sync(current_proc(), (void *)NULL, (int *)NULL);
4156
4157 out:
4158 ktrace_unlock();
4159 }
4160
4161 static int
4162 kdbg_sysctl_continuous SYSCTL_HANDLER_ARGS
4163 {
4164 #pragma unused(oidp, arg1, arg2)
4165 int value = kdbg_continuous_time;
4166 int ret = sysctl_io_number(req, value, sizeof(value), &value, NULL);
4167
4168 if (ret || !req->newptr) {
4169 return ret;
4170 }
4171
4172 kdbg_continuous_time = value;
4173 return 0;
4174 }
4175
4176 SYSCTL_NODE(_kern, OID_AUTO, kdbg, CTLFLAG_RD | CTLFLAG_LOCKED, 0,
4177 "kdbg");
4178
4179 SYSCTL_PROC(_kern_kdbg, OID_AUTO, experimental_continuous,
4180 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0,
4181 sizeof(int), kdbg_sysctl_continuous, "I",
4182 "Set kdebug to use mach_continuous_time");
4183
4184 SYSCTL_QUAD(_kern_kdbg, OID_AUTO, oldest_time,
4185 CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED,
4186 &kd_ctrl_page.oldest_time,
4187 "Find the oldest timestamp still in trace");
4188
4189 #if KDEBUG_MOJO_TRACE
4190 static kd_event_t *
4191 binary_search(uint32_t id)
4192 {
4193 int low, high, mid;
4194
4195 low = 0;
4196 high = sizeof(kd_events)/sizeof(kd_event_t) - 1;
4197
4198 while (TRUE)
4199 {
4200 mid = (low + high) / 2;
4201
4202 if (low > high)
4203 return NULL; /* failed */
4204 else if ( low + 1 >= high) {
4205 /* We have a match */
4206 if (kd_events[high].id == id)
4207 return &kd_events[high];
4208 else if (kd_events[low].id == id)
4209 return &kd_events[low];
4210 else
4211 return NULL; /* search failed */
4212 }
4213 else if (id < kd_events[mid].id)
4214 high = mid;
4215 else
4216 low = mid;
4217 }
4218 }
4219
4220 /*
4221 * Look up event id to get name string.
4222 * Using a per-cpu cache of a single entry
4223 * before resorting to a binary search of the full table.
4224 */
4225 #define NCACHE 1
4226 static kd_event_t *last_hit[MAX_CPUS];
4227 static kd_event_t *
4228 event_lookup_cache(uint32_t cpu, uint32_t id)
4229 {
4230 if (last_hit[cpu] == NULL || last_hit[cpu]->id != id)
4231 last_hit[cpu] = binary_search(id);
4232 return last_hit[cpu];
4233 }
4234
4235 static uint64_t kd_last_timstamp;
4236
4237 static void
4238 kdebug_serial_print(
4239 uint32_t cpunum,
4240 uint32_t debugid,
4241 uint64_t timestamp,
4242 uintptr_t arg1,
4243 uintptr_t arg2,
4244 uintptr_t arg3,
4245 uintptr_t arg4,
4246 uintptr_t threadid
4247 )
4248 {
4249 char kprintf_line[192];
4250 char event[40];
4251 uint64_t us = timestamp / NSEC_PER_USEC;
4252 uint64_t us_tenth = (timestamp % NSEC_PER_USEC) / 100;
4253 uint64_t delta = timestamp - kd_last_timstamp;
4254 uint64_t delta_us = delta / NSEC_PER_USEC;
4255 uint64_t delta_us_tenth = (delta % NSEC_PER_USEC) / 100;
4256 uint32_t event_id = debugid & KDBG_EVENTID_MASK;
4257 const char *command;
4258 const char *bra;
4259 const char *ket;
4260 kd_event_t *ep;
4261
4262 /* event time and delta from last */
4263 snprintf(kprintf_line, sizeof(kprintf_line),
4264 "%11llu.%1llu %8llu.%1llu ",
4265 us, us_tenth, delta_us, delta_us_tenth);
4266
4267
4268 /* event (id or name) - start prefixed by "[", end postfixed by "]" */
4269 bra = (debugid & DBG_FUNC_START) ? "[" : " ";
4270 ket = (debugid & DBG_FUNC_END) ? "]" : " ";
4271 ep = event_lookup_cache(cpunum, event_id);
4272 if (ep) {
4273 if (strlen(ep->name) < sizeof(event) - 3)
4274 snprintf(event, sizeof(event), "%s%s%s",
4275 bra, ep->name, ket);
4276 else
4277 snprintf(event, sizeof(event), "%s%x(name too long)%s",
4278 bra, event_id, ket);
4279 } else {
4280 snprintf(event, sizeof(event), "%s%x%s",
4281 bra, event_id, ket);
4282 }
4283 snprintf(kprintf_line + strlen(kprintf_line),
4284 sizeof(kprintf_line) - strlen(kprintf_line),
4285 "%-40s ", event);
4286
4287 /* arg1 .. arg4 with special cases for strings */
4288 switch (event_id) {
4289 case VFS_LOOKUP:
4290 case VFS_LOOKUP_DONE:
4291 if (debugid & DBG_FUNC_START) {
4292 /* arg1 hex then arg2..arg4 chars */
4293 snprintf(kprintf_line + strlen(kprintf_line),
4294 sizeof(kprintf_line) - strlen(kprintf_line),
4295 "%-16lx %-8s%-8s%-8s ",
4296 arg1, (char*)&arg2, (char*)&arg3, (char*)&arg4);
4297 break;
4298 }
4299 /* else fall through for arg1..arg4 chars */
4300 case TRACE_STRING_EXEC:
4301 case TRACE_STRING_NEWTHREAD:
4302 case TRACE_INFO_STRING:
4303 snprintf(kprintf_line + strlen(kprintf_line),
4304 sizeof(kprintf_line) - strlen(kprintf_line),
4305 "%-8s%-8s%-8s%-8s ",
4306 (char*)&arg1, (char*)&arg2, (char*)&arg3, (char*)&arg4);
4307 break;
4308 default:
4309 snprintf(kprintf_line + strlen(kprintf_line),
4310 sizeof(kprintf_line) - strlen(kprintf_line),
4311 "%-16lx %-16lx %-16lx %-16lx",
4312 arg1, arg2, arg3, arg4);
4313 }
4314
4315 /* threadid, cpu and command name */
4316 if (threadid == (uintptr_t)thread_tid(current_thread()) &&
4317 current_proc() &&
4318 current_proc()->p_comm[0])
4319 command = current_proc()->p_comm;
4320 else
4321 command = "-";
4322 snprintf(kprintf_line + strlen(kprintf_line),
4323 sizeof(kprintf_line) - strlen(kprintf_line),
4324 " %-16lx %-2d %s\n",
4325 threadid, cpunum, command);
4326
4327 kprintf("%s", kprintf_line);
4328 kd_last_timstamp = timestamp;
4329 }
4330
4331 #endif