]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kdebug.c
f901211e07789f798821d8e6fa2d36a46a616279
[apple/xnu.git] / bsd / kern / kdebug.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @Apple_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
21 */
22
23 #include <sys/errno.h>
24 #include <sys/param.h>
25 #include <sys/systm.h>
26 #include <sys/proc_internal.h>
27 #include <sys/vm.h>
28 #include <sys/sysctl.h>
29 #include <sys/kdebug.h>
30 #include <sys/kauth.h>
31 #include <sys/ktrace.h>
32 #include <sys/sysproto.h>
33 #include <sys/bsdtask_info.h>
34 #include <sys/random.h>
35
36 #include <mach/clock_types.h>
37 #include <mach/mach_types.h>
38 #include <mach/mach_time.h>
39 #include <mach/mach_vm.h>
40 #include <machine/machine_routines.h>
41
42 #include <mach/machine.h>
43 #include <mach/vm_map.h>
44
45 #if defined(__i386__) || defined(__x86_64__)
46 #include <i386/rtclock_protos.h>
47 #include <i386/mp.h>
48 #include <i386/machine_routines.h>
49 #include <i386/tsc.h>
50 #endif
51
52 #include <kern/clock.h>
53
54 #include <kern/thread.h>
55 #include <kern/task.h>
56 #include <kern/debug.h>
57 #include <kern/kalloc.h>
58 #include <kern/cpu_data.h>
59 #include <kern/assert.h>
60 #include <kern/telemetry.h>
61 #include <kern/sched_prim.h>
62 #include <vm/vm_kern.h>
63 #include <sys/lock.h>
64 #include <kperf/kperf.h>
65 #include <pexpert/device_tree.h>
66
67 #include <sys/malloc.h>
68 #include <sys/mcache.h>
69
70 #include <sys/vnode.h>
71 #include <sys/vnode_internal.h>
72 #include <sys/fcntl.h>
73 #include <sys/file_internal.h>
74 #include <sys/ubc.h>
75 #include <sys/param.h> /* for isset() */
76
77 #include <mach/mach_host.h> /* for host_info() */
78 #include <libkern/OSAtomic.h>
79
80 #include <machine/pal_routines.h>
81 #include <machine/atomic.h>
82
83 /*
84 * IOP(s)
85 *
86 * https://coreoswiki.apple.com/wiki/pages/U6z3i0q9/Consistent_Logging_Implementers_Guide.html
87 *
88 * IOP(s) are auxiliary cores that want to participate in kdebug event logging.
89 * They are registered dynamically. Each is assigned a cpu_id at registration.
90 *
91 * NOTE: IOP trace events may not use the same clock hardware as "normal"
92 * cpus. There is an effort made to synchronize the IOP timebase with the
93 * AP, but it should be understood that there may be discrepancies.
94 *
95 * Once registered, an IOP is permanent, it cannot be unloaded/unregistered.
96 * The current implementation depends on this for thread safety.
97 *
98 * New registrations occur by allocating an kd_iop struct and assigning
99 * a provisional cpu_id of list_head->cpu_id + 1. Then a CAS to claim the
100 * list_head pointer resolves any races.
101 *
102 * You may safely walk the kd_iops list at any time, without holding locks.
103 *
104 * When allocating buffers, the current kd_iops head is captured. Any operations
105 * that depend on the buffer state (such as flushing IOP traces on reads,
106 * etc.) should use the captured list head. This will allow registrations to
107 * take place while trace is in use.
108 */
109
110 typedef struct kd_iop {
111 kd_callback_t callback;
112 uint32_t cpu_id;
113 uint64_t last_timestamp; /* Prevent timer rollback */
114 struct kd_iop* next;
115 } kd_iop_t;
116
117 static kd_iop_t* kd_iops = NULL;
118
119 /*
120 * Typefilter(s)
121 *
122 * A typefilter is a 8KB bitmap that is used to selectively filter events
123 * being recorded. It is able to individually address every class & subclass.
124 *
125 * There is a shared typefilter in the kernel which is lazily allocated. Once
126 * allocated, the shared typefilter is never deallocated. The shared typefilter
127 * is also mapped on demand into userspace processes that invoke kdebug_trace
128 * API from Libsyscall. When mapped into a userspace process, the memory is
129 * read only, and does not have a fixed address.
130 *
131 * It is a requirement that the kernel's shared typefilter always pass DBG_TRACE
132 * events. This is enforced automatically, by having the needed bits set any
133 * time the shared typefilter is mutated.
134 */
135
136 typedef uint8_t* typefilter_t;
137
138 static typefilter_t kdbg_typefilter;
139 static mach_port_t kdbg_typefilter_memory_entry;
140
141 /*
142 * There are 3 combinations of page sizes:
143 *
144 * 4KB / 4KB
145 * 4KB / 16KB
146 * 16KB / 16KB
147 *
148 * The typefilter is exactly 8KB. In the first two scenarios, we would like
149 * to use 2 pages exactly; in the third scenario we must make certain that
150 * a full page is allocated so we do not inadvertantly share 8KB of random
151 * data to userspace. The round_page_32 macro rounds to kernel page size.
152 */
153 #define TYPEFILTER_ALLOC_SIZE MAX(round_page_32(KDBG_TYPEFILTER_BITMAP_SIZE), KDBG_TYPEFILTER_BITMAP_SIZE)
154
155 static typefilter_t
156 typefilter_create(void)
157 {
158 typefilter_t tf;
159 if (KERN_SUCCESS == kmem_alloc(kernel_map, (vm_offset_t*)&tf, TYPEFILTER_ALLOC_SIZE, VM_KERN_MEMORY_DIAG)) {
160 memset(&tf[KDBG_TYPEFILTER_BITMAP_SIZE], 0, TYPEFILTER_ALLOC_SIZE - KDBG_TYPEFILTER_BITMAP_SIZE);
161 return tf;
162 }
163 return NULL;
164 }
165
166 static void
167 typefilter_deallocate(typefilter_t tf)
168 {
169 assert(tf != NULL);
170 assert(tf != kdbg_typefilter);
171 kmem_free(kernel_map, (vm_offset_t)tf, TYPEFILTER_ALLOC_SIZE);
172 }
173
174 static void
175 typefilter_copy(typefilter_t dst, typefilter_t src)
176 {
177 assert(src != NULL);
178 assert(dst != NULL);
179 memcpy(dst, src, KDBG_TYPEFILTER_BITMAP_SIZE);
180 }
181
182 static void
183 typefilter_reject_all(typefilter_t tf)
184 {
185 assert(tf != NULL);
186 memset(tf, 0, KDBG_TYPEFILTER_BITMAP_SIZE);
187 }
188
189 static void
190 typefilter_allow_all(typefilter_t tf)
191 {
192 assert(tf != NULL);
193 memset(tf, ~0, KDBG_TYPEFILTER_BITMAP_SIZE);
194 }
195
196 static void
197 typefilter_allow_class(typefilter_t tf, uint8_t class)
198 {
199 assert(tf != NULL);
200 const uint32_t BYTES_PER_CLASS = 256 / 8; // 256 subclasses, 1 bit each
201 memset(&tf[class * BYTES_PER_CLASS], 0xFF, BYTES_PER_CLASS);
202 }
203
204 static void
205 typefilter_allow_csc(typefilter_t tf, uint16_t csc)
206 {
207 assert(tf != NULL);
208 setbit(tf, csc);
209 }
210
211 static bool
212 typefilter_is_debugid_allowed(typefilter_t tf, uint32_t id)
213 {
214 assert(tf != NULL);
215 return isset(tf, KDBG_EXTRACT_CSC(id));
216 }
217
218 static mach_port_t
219 typefilter_create_memory_entry(typefilter_t tf)
220 {
221 assert(tf != NULL);
222
223 mach_port_t memory_entry = MACH_PORT_NULL;
224 memory_object_size_t size = TYPEFILTER_ALLOC_SIZE;
225
226 mach_make_memory_entry_64(kernel_map,
227 &size,
228 (memory_object_offset_t)tf,
229 VM_PROT_READ,
230 &memory_entry,
231 MACH_PORT_NULL);
232
233 return memory_entry;
234 }
235
236 static int kdbg_copyin_typefilter(user_addr_t addr, size_t size);
237 static void kdbg_enable_typefilter(void);
238 static void kdbg_disable_typefilter(void);
239
240 /*
241 * External prototypes
242 */
243
244 void task_act_iterate_wth_args(task_t, void (*)(thread_t, void *), void *);
245 int cpu_number(void); /* XXX <machine/...> include path broken */
246 void commpage_update_kdebug_state(void); /* XXX sign */
247
248 extern int log_leaks;
249
250 /*
251 * This flag is for testing purposes only -- it's highly experimental and tools
252 * have not been updated to support it.
253 */
254 static bool kdbg_continuous_time = false;
255
256 static inline uint64_t
257 kdbg_timestamp(void)
258 {
259 if (kdbg_continuous_time) {
260 return mach_continuous_time();
261 } else {
262 return mach_absolute_time();
263 }
264 }
265
266 static int kdbg_debug = 0;
267
268 #if KDEBUG_MOJO_TRACE
269 #include <sys/kdebugevents.h>
270 static void kdebug_serial_print( /* forward */
271 uint32_t, uint32_t, uint64_t,
272 uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
273 #endif
274
275 int kdbg_control(int *, u_int, user_addr_t, size_t *);
276
277 static int kdbg_read(user_addr_t, size_t *, vnode_t, vfs_context_t, uint32_t);
278 static int kdbg_readcpumap(user_addr_t, size_t *);
279 static int kdbg_readthrmap_v3(user_addr_t, size_t, int);
280 static int kdbg_readcurthrmap(user_addr_t, size_t *);
281 static int kdbg_setreg(kd_regtype *);
282 static int kdbg_setpidex(kd_regtype *);
283 static int kdbg_setpid(kd_regtype *);
284 static void kdbg_thrmap_init(void);
285 static int kdbg_reinit(boolean_t);
286 static int kdbg_bootstrap(boolean_t);
287 static int kdbg_test(size_t flavor);
288
289 static int kdbg_write_v1_header(boolean_t write_thread_map, vnode_t vp, vfs_context_t ctx);
290 static int kdbg_write_thread_map(vnode_t vp, vfs_context_t ctx);
291 static int kdbg_copyout_thread_map(user_addr_t buffer, size_t *buffer_size);
292 static void kdbg_clear_thread_map(void);
293
294 static boolean_t kdbg_wait(uint64_t timeout_ms, boolean_t locked_wait);
295 static void kdbg_wakeup(void);
296
297 int kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count,
298 uint8_t** cpumap, uint32_t* cpumap_size);
299
300 static kd_threadmap *kdbg_thrmap_init_internal(unsigned int count,
301 unsigned int *mapsize,
302 unsigned int *mapcount);
303
304 static boolean_t kdebug_current_proc_enabled(uint32_t debugid);
305 static errno_t kdebug_check_trace_string(uint32_t debugid, uint64_t str_id);
306
307 int kdbg_write_v3_header(user_addr_t, size_t *, int);
308 int kdbg_write_v3_chunk_header(user_addr_t buffer, uint32_t tag,
309 uint32_t sub_tag, uint64_t length,
310 vnode_t vp, vfs_context_t ctx);
311
312 user_addr_t kdbg_write_v3_event_chunk_header(user_addr_t buffer, uint32_t tag,
313 uint64_t length, vnode_t vp,
314 vfs_context_t ctx);
315
316 // Helper functions
317
318 static int create_buffers(boolean_t);
319 static void delete_buffers(void);
320
321 extern int tasks_count;
322 extern int threads_count;
323 extern void IOSleep(int);
324
325 /* trace enable status */
326 unsigned int kdebug_enable = 0;
327
328 /* A static buffer to record events prior to the start of regular logging */
329
330 #define KD_EARLY_BUFFER_SIZE (16 * 1024)
331 #define KD_EARLY_BUFFER_NBUFS (KD_EARLY_BUFFER_SIZE / sizeof(kd_buf))
332 #if CONFIG_EMBEDDED
333 /*
334 * On embedded, the space for this is carved out by osfmk/arm/data.s -- clang
335 * has problems aligning to greater than 4K.
336 */
337 extern kd_buf kd_early_buffer[KD_EARLY_BUFFER_NBUFS];
338 #else /* CONFIG_EMBEDDED */
339 __attribute__((aligned(KD_EARLY_BUFFER_SIZE)))
340 static kd_buf kd_early_buffer[KD_EARLY_BUFFER_NBUFS];
341 #endif /* !CONFIG_EMBEDDED */
342
343 static unsigned int kd_early_index = 0;
344 static bool kd_early_overflow = false;
345 static bool kd_early_done = false;
346
347 #define SLOW_NOLOG 0x01
348 #define SLOW_CHECKS 0x02
349
350 #define EVENTS_PER_STORAGE_UNIT 2048
351 #define MIN_STORAGE_UNITS_PER_CPU 4
352
353 #define POINTER_FROM_KDS_PTR(x) (&kd_bufs[x.buffer_index].kdsb_addr[x.offset])
354
355 union kds_ptr {
356 struct {
357 uint32_t buffer_index:21;
358 uint16_t offset:11;
359 };
360 uint32_t raw;
361 };
362
363 struct kd_storage {
364 union kds_ptr kds_next;
365 uint32_t kds_bufindx;
366 uint32_t kds_bufcnt;
367 uint32_t kds_readlast;
368 boolean_t kds_lostevents;
369 uint64_t kds_timestamp;
370
371 kd_buf kds_records[EVENTS_PER_STORAGE_UNIT];
372 };
373
374 #define MAX_BUFFER_SIZE (1024 * 1024 * 128)
375 #define N_STORAGE_UNITS_PER_BUFFER (MAX_BUFFER_SIZE / sizeof(struct kd_storage))
376 static_assert(N_STORAGE_UNITS_PER_BUFFER <= 0x7ff,
377 "shoudn't overflow kds_ptr.offset");
378
379 struct kd_storage_buffers {
380 struct kd_storage *kdsb_addr;
381 uint32_t kdsb_size;
382 };
383
384 #define KDS_PTR_NULL 0xffffffff
385 struct kd_storage_buffers *kd_bufs = NULL;
386 int n_storage_units = 0;
387 unsigned int n_storage_buffers = 0;
388 int n_storage_threshold = 0;
389 int kds_waiter = 0;
390
391 #pragma pack(0)
392 struct kd_bufinfo {
393 union kds_ptr kd_list_head;
394 union kds_ptr kd_list_tail;
395 boolean_t kd_lostevents;
396 uint32_t _pad;
397 uint64_t kd_prev_timebase;
398 uint32_t num_bufs;
399 } __attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE)));
400
401
402 /*
403 * In principle, this control block can be shared in DRAM with other
404 * coprocessors and runtimes, for configuring what tracing is enabled.
405 */
406 struct kd_ctrl_page_t {
407 union kds_ptr kds_free_list;
408 uint32_t enabled :1;
409 uint32_t _pad0 :31;
410 int kds_inuse_count;
411 uint32_t kdebug_flags;
412 uint32_t kdebug_slowcheck;
413 uint64_t oldest_time;
414 /*
415 * The number of kd_bufinfo structs allocated may not match the current
416 * number of active cpus. We capture the iops list head at initialization
417 * which we could use to calculate the number of cpus we allocated data for,
418 * unless it happens to be null. To avoid that case, we explicitly also
419 * capture a cpu count.
420 */
421 kd_iop_t* kdebug_iops;
422 uint32_t kdebug_cpus;
423 } kd_ctrl_page = {
424 .kds_free_list = {.raw = KDS_PTR_NULL},
425 .kdebug_slowcheck = SLOW_NOLOG,
426 .oldest_time = 0
427 };
428
429 #pragma pack()
430
431 struct kd_bufinfo *kdbip = NULL;
432
433 #define KDCOPYBUF_COUNT 8192
434 #define KDCOPYBUF_SIZE (KDCOPYBUF_COUNT * sizeof(kd_buf))
435
436 #define PAGE_4KB 4096
437 #define PAGE_16KB 16384
438
439 kd_buf *kdcopybuf = NULL;
440
441 unsigned int nkdbufs = 0;
442 unsigned int kdlog_beg = 0;
443 unsigned int kdlog_end = 0;
444 unsigned int kdlog_value1 = 0;
445 unsigned int kdlog_value2 = 0;
446 unsigned int kdlog_value3 = 0;
447 unsigned int kdlog_value4 = 0;
448
449 static lck_spin_t * kdw_spin_lock;
450 static lck_spin_t * kds_spin_lock;
451
452 kd_threadmap *kd_mapptr = 0;
453 unsigned int kd_mapsize = 0;
454 unsigned int kd_mapcount = 0;
455
456 off_t RAW_file_offset = 0;
457 int RAW_file_written = 0;
458
459 #define RAW_FLUSH_SIZE (2 * 1024 * 1024)
460
461 /*
462 * A globally increasing counter for identifying strings in trace. Starts at
463 * 1 because 0 is a reserved return value.
464 */
465 __attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE)))
466 static uint64_t g_curr_str_id = 1;
467
468 #define STR_ID_SIG_OFFSET (48)
469 #define STR_ID_MASK ((1ULL << STR_ID_SIG_OFFSET) - 1)
470 #define STR_ID_SIG_MASK (~STR_ID_MASK)
471
472 /*
473 * A bit pattern for identifying string IDs generated by
474 * kdebug_trace_string(2).
475 */
476 static uint64_t g_str_id_signature = (0x70acULL << STR_ID_SIG_OFFSET);
477
478 #define INTERRUPT 0x01050000
479 #define MACH_vmfault 0x01300008
480 #define BSC_SysCall 0x040c0000
481 #define MACH_SysCall 0x010c0000
482
483 /* task to string structure */
484 struct tts {
485 task_t task; /* from procs task */
486 pid_t pid; /* from procs p_pid */
487 char task_comm[20];/* from procs p_comm */
488 };
489
490 typedef struct tts tts_t;
491
492 struct krt {
493 kd_threadmap *map; /* pointer to the map buffer */
494 int count;
495 int maxcount;
496 struct tts *atts;
497 };
498
499 typedef struct krt krt_t;
500
501 static uint32_t
502 kdbg_cpu_count(boolean_t early_trace)
503 {
504 if (early_trace) {
505 #if CONFIG_EMBEDDED
506 return ml_get_cpu_count();
507 #else
508 return max_ncpus;
509 #endif
510 }
511
512 host_basic_info_data_t hinfo;
513 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
514 host_info((host_t)1 /* BSD_HOST */, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
515 assert(hinfo.logical_cpu_max > 0);
516 return hinfo.logical_cpu_max;
517 }
518
519 #if MACH_ASSERT
520 #if CONFIG_EMBEDDED
521 static boolean_t
522 kdbg_iop_list_is_valid(kd_iop_t* iop)
523 {
524 if (iop) {
525 /* Is list sorted by cpu_id? */
526 kd_iop_t* temp = iop;
527 do {
528 assert(!temp->next || temp->next->cpu_id == temp->cpu_id - 1);
529 assert(temp->next || (temp->cpu_id == kdbg_cpu_count(FALSE) || temp->cpu_id == kdbg_cpu_count(TRUE)));
530 } while ((temp = temp->next));
531
532 /* Does each entry have a function and a name? */
533 temp = iop;
534 do {
535 assert(temp->callback.func);
536 assert(strlen(temp->callback.iop_name) < sizeof(temp->callback.iop_name));
537 } while ((temp = temp->next));
538 }
539
540 return TRUE;
541 }
542
543 static boolean_t
544 kdbg_iop_list_contains_cpu_id(kd_iop_t* list, uint32_t cpu_id)
545 {
546 while (list) {
547 if (list->cpu_id == cpu_id) {
548 return TRUE;
549 }
550 list = list->next;
551 }
552
553 return FALSE;
554 }
555 #endif /* CONFIG_EMBEDDED */
556 #endif /* MACH_ASSERT */
557
558 static void
559 kdbg_iop_list_callback(kd_iop_t* iop, kd_callback_type type, void* arg)
560 {
561 while (iop) {
562 iop->callback.func(iop->callback.context, type, arg);
563 iop = iop->next;
564 }
565 }
566
567 static lck_grp_t *kdebug_lck_grp = NULL;
568
569 static void
570 kdbg_set_tracing_enabled(boolean_t enabled, uint32_t trace_type)
571 {
572 int s = ml_set_interrupts_enabled(FALSE);
573 lck_spin_lock_grp(kds_spin_lock, kdebug_lck_grp);
574 if (enabled) {
575 /*
576 * The oldest valid time is now; reject old events from IOPs.
577 */
578 kd_ctrl_page.oldest_time = kdbg_timestamp();
579 kdebug_enable |= trace_type;
580 kd_ctrl_page.kdebug_slowcheck &= ~SLOW_NOLOG;
581 kd_ctrl_page.enabled = 1;
582 commpage_update_kdebug_state();
583 } else {
584 kdebug_enable &= ~(KDEBUG_ENABLE_TRACE | KDEBUG_ENABLE_PPT);
585 kd_ctrl_page.kdebug_slowcheck |= SLOW_NOLOG;
586 kd_ctrl_page.enabled = 0;
587 commpage_update_kdebug_state();
588 }
589 lck_spin_unlock(kds_spin_lock);
590 ml_set_interrupts_enabled(s);
591
592 if (enabled) {
593 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_KDEBUG_ENABLED, NULL);
594 } else {
595 /*
596 * If you do not flush the IOP trace buffers, they can linger
597 * for a considerable period; consider code which disables and
598 * deallocates without a final sync flush.
599 */
600 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_KDEBUG_DISABLED, NULL);
601 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_SYNC_FLUSH, NULL);
602 }
603 }
604
605 static void
606 kdbg_set_flags(int slowflag, int enableflag, boolean_t enabled)
607 {
608 int s = ml_set_interrupts_enabled(FALSE);
609 lck_spin_lock_grp(kds_spin_lock, kdebug_lck_grp);
610
611 if (enabled) {
612 kd_ctrl_page.kdebug_slowcheck |= slowflag;
613 kdebug_enable |= enableflag;
614 } else {
615 kd_ctrl_page.kdebug_slowcheck &= ~slowflag;
616 kdebug_enable &= ~enableflag;
617 }
618
619 lck_spin_unlock(kds_spin_lock);
620 ml_set_interrupts_enabled(s);
621 }
622
623 /*
624 * Disable wrapping and return true if trace wrapped, false otherwise.
625 */
626 static boolean_t
627 disable_wrap(uint32_t *old_slowcheck, uint32_t *old_flags)
628 {
629 boolean_t wrapped;
630 int s = ml_set_interrupts_enabled(FALSE);
631 lck_spin_lock_grp(kds_spin_lock, kdebug_lck_grp);
632
633 *old_slowcheck = kd_ctrl_page.kdebug_slowcheck;
634 *old_flags = kd_ctrl_page.kdebug_flags;
635
636 wrapped = kd_ctrl_page.kdebug_flags & KDBG_WRAPPED;
637 kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
638 kd_ctrl_page.kdebug_flags |= KDBG_NOWRAP;
639
640 lck_spin_unlock(kds_spin_lock);
641 ml_set_interrupts_enabled(s);
642
643 return wrapped;
644 }
645
646 static void
647 enable_wrap(uint32_t old_slowcheck)
648 {
649 int s = ml_set_interrupts_enabled(FALSE);
650 lck_spin_lock_grp(kds_spin_lock, kdebug_lck_grp);
651
652 kd_ctrl_page.kdebug_flags &= ~KDBG_NOWRAP;
653
654 if (!(old_slowcheck & SLOW_NOLOG)) {
655 kd_ctrl_page.kdebug_slowcheck &= ~SLOW_NOLOG;
656 }
657
658 lck_spin_unlock(kds_spin_lock);
659 ml_set_interrupts_enabled(s);
660 }
661
662 static int
663 create_buffers(boolean_t early_trace)
664 {
665 unsigned int i;
666 unsigned int p_buffer_size;
667 unsigned int f_buffer_size;
668 unsigned int f_buffers;
669 int error = 0;
670
671 /*
672 * For the duration of this allocation, trace code will only reference
673 * kdebug_iops. Any iops registered after this enabling will not be
674 * messaged until the buffers are reallocated.
675 *
676 * TLDR; Must read kd_iops once and only once!
677 */
678 kd_ctrl_page.kdebug_iops = kd_iops;
679
680 #if CONFIG_EMBEDDED
681 assert(kdbg_iop_list_is_valid(kd_ctrl_page.kdebug_iops));
682 #endif
683
684 /*
685 * If the list is valid, it is sorted, newest -> oldest. Each iop entry
686 * has a cpu_id of "the older entry + 1", so the highest cpu_id will
687 * be the list head + 1.
688 */
689
690 kd_ctrl_page.kdebug_cpus = kd_ctrl_page.kdebug_iops ? kd_ctrl_page.kdebug_iops->cpu_id + 1 : kdbg_cpu_count(early_trace);
691
692 if (kmem_alloc(kernel_map, (vm_offset_t *)&kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
693 error = ENOSPC;
694 goto out;
695 }
696
697 if (nkdbufs < (kd_ctrl_page.kdebug_cpus * EVENTS_PER_STORAGE_UNIT * MIN_STORAGE_UNITS_PER_CPU)) {
698 n_storage_units = kd_ctrl_page.kdebug_cpus * MIN_STORAGE_UNITS_PER_CPU;
699 } else {
700 n_storage_units = nkdbufs / EVENTS_PER_STORAGE_UNIT;
701 }
702
703 nkdbufs = n_storage_units * EVENTS_PER_STORAGE_UNIT;
704
705 f_buffers = n_storage_units / N_STORAGE_UNITS_PER_BUFFER;
706 n_storage_buffers = f_buffers;
707
708 f_buffer_size = N_STORAGE_UNITS_PER_BUFFER * sizeof(struct kd_storage);
709 p_buffer_size = (n_storage_units % N_STORAGE_UNITS_PER_BUFFER) * sizeof(struct kd_storage);
710
711 if (p_buffer_size) {
712 n_storage_buffers++;
713 }
714
715 kd_bufs = NULL;
716
717 if (kdcopybuf == 0) {
718 if (kmem_alloc(kernel_map, (vm_offset_t *)&kdcopybuf, (vm_size_t)KDCOPYBUF_SIZE, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
719 error = ENOSPC;
720 goto out;
721 }
722 }
723 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers)), VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
724 error = ENOSPC;
725 goto out;
726 }
727 bzero(kd_bufs, n_storage_buffers * sizeof(struct kd_storage_buffers));
728
729 for (i = 0; i < f_buffers; i++) {
730 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)f_buffer_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
731 error = ENOSPC;
732 goto out;
733 }
734 bzero(kd_bufs[i].kdsb_addr, f_buffer_size);
735
736 kd_bufs[i].kdsb_size = f_buffer_size;
737 }
738 if (p_buffer_size) {
739 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)p_buffer_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
740 error = ENOSPC;
741 goto out;
742 }
743 bzero(kd_bufs[i].kdsb_addr, p_buffer_size);
744
745 kd_bufs[i].kdsb_size = p_buffer_size;
746 }
747 n_storage_units = 0;
748
749 for (i = 0; i < n_storage_buffers; i++) {
750 struct kd_storage *kds;
751 int n_elements;
752 int n;
753
754 n_elements = kd_bufs[i].kdsb_size / sizeof(struct kd_storage);
755 kds = kd_bufs[i].kdsb_addr;
756
757 for (n = 0; n < n_elements; n++) {
758 kds[n].kds_next.buffer_index = kd_ctrl_page.kds_free_list.buffer_index;
759 kds[n].kds_next.offset = kd_ctrl_page.kds_free_list.offset;
760
761 kd_ctrl_page.kds_free_list.buffer_index = i;
762 kd_ctrl_page.kds_free_list.offset = n;
763 }
764 n_storage_units += n_elements;
765 }
766
767 bzero((char *)kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus);
768
769 for (i = 0; i < kd_ctrl_page.kdebug_cpus; i++) {
770 kdbip[i].kd_list_head.raw = KDS_PTR_NULL;
771 kdbip[i].kd_list_tail.raw = KDS_PTR_NULL;
772 kdbip[i].kd_lostevents = FALSE;
773 kdbip[i].num_bufs = 0;
774 }
775
776 kd_ctrl_page.kdebug_flags |= KDBG_BUFINIT;
777
778 kd_ctrl_page.kds_inuse_count = 0;
779 n_storage_threshold = n_storage_units / 2;
780 out:
781 if (error) {
782 delete_buffers();
783 }
784
785 return error;
786 }
787
788 static void
789 delete_buffers(void)
790 {
791 unsigned int i;
792
793 if (kd_bufs) {
794 for (i = 0; i < n_storage_buffers; i++) {
795 if (kd_bufs[i].kdsb_addr) {
796 kmem_free(kernel_map, (vm_offset_t)kd_bufs[i].kdsb_addr, (vm_size_t)kd_bufs[i].kdsb_size);
797 }
798 }
799 kmem_free(kernel_map, (vm_offset_t)kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers)));
800
801 kd_bufs = NULL;
802 n_storage_buffers = 0;
803 }
804 if (kdcopybuf) {
805 kmem_free(kernel_map, (vm_offset_t)kdcopybuf, KDCOPYBUF_SIZE);
806
807 kdcopybuf = NULL;
808 }
809 kd_ctrl_page.kds_free_list.raw = KDS_PTR_NULL;
810
811 if (kdbip) {
812 kmem_free(kernel_map, (vm_offset_t)kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus);
813
814 kdbip = NULL;
815 }
816 kd_ctrl_page.kdebug_iops = NULL;
817 kd_ctrl_page.kdebug_cpus = 0;
818 kd_ctrl_page.kdebug_flags &= ~KDBG_BUFINIT;
819 }
820
821 void
822 release_storage_unit(int cpu, uint32_t kdsp_raw)
823 {
824 int s = 0;
825 struct kd_storage *kdsp_actual;
826 struct kd_bufinfo *kdbp;
827 union kds_ptr kdsp;
828
829 kdsp.raw = kdsp_raw;
830
831 s = ml_set_interrupts_enabled(FALSE);
832 lck_spin_lock_grp(kds_spin_lock, kdebug_lck_grp);
833
834 kdbp = &kdbip[cpu];
835
836 if (kdsp.raw == kdbp->kd_list_head.raw) {
837 /*
838 * it's possible for the storage unit pointed to
839 * by kdsp to have already been stolen... so
840 * check to see if it's still the head of the list
841 * now that we're behind the lock that protects
842 * adding and removing from the queue...
843 * since we only ever release and steal units from
844 * that position, if it's no longer the head
845 * we having nothing to do in this context
846 */
847 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
848 kdbp->kd_list_head = kdsp_actual->kds_next;
849
850 kdsp_actual->kds_next = kd_ctrl_page.kds_free_list;
851 kd_ctrl_page.kds_free_list = kdsp;
852
853 kd_ctrl_page.kds_inuse_count--;
854 }
855 lck_spin_unlock(kds_spin_lock);
856 ml_set_interrupts_enabled(s);
857 }
858
859
860 boolean_t
861 allocate_storage_unit(int cpu)
862 {
863 union kds_ptr kdsp;
864 struct kd_storage *kdsp_actual, *kdsp_next_actual;
865 struct kd_bufinfo *kdbp, *kdbp_vict, *kdbp_try;
866 uint64_t oldest_ts, ts;
867 boolean_t retval = TRUE;
868 int s = 0;
869
870 s = ml_set_interrupts_enabled(FALSE);
871 lck_spin_lock_grp(kds_spin_lock, kdebug_lck_grp);
872
873 kdbp = &kdbip[cpu];
874
875 /* If someone beat us to the allocate, return success */
876 if (kdbp->kd_list_tail.raw != KDS_PTR_NULL) {
877 kdsp_actual = POINTER_FROM_KDS_PTR(kdbp->kd_list_tail);
878
879 if (kdsp_actual->kds_bufindx < EVENTS_PER_STORAGE_UNIT) {
880 goto out;
881 }
882 }
883
884 if ((kdsp = kd_ctrl_page.kds_free_list).raw != KDS_PTR_NULL) {
885 /*
886 * If there's a free page, grab it from the free list.
887 */
888 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
889 kd_ctrl_page.kds_free_list = kdsp_actual->kds_next;
890
891 kd_ctrl_page.kds_inuse_count++;
892 } else {
893 /*
894 * Otherwise, we're going to lose events and repurpose the oldest
895 * storage unit we can find.
896 */
897 if (kd_ctrl_page.kdebug_flags & KDBG_NOWRAP) {
898 kd_ctrl_page.kdebug_slowcheck |= SLOW_NOLOG;
899 kdbp->kd_lostevents = TRUE;
900 retval = FALSE;
901 goto out;
902 }
903 kdbp_vict = NULL;
904 oldest_ts = UINT64_MAX;
905
906 for (kdbp_try = &kdbip[0]; kdbp_try < &kdbip[kd_ctrl_page.kdebug_cpus]; kdbp_try++) {
907 if (kdbp_try->kd_list_head.raw == KDS_PTR_NULL) {
908 /*
909 * no storage unit to steal
910 */
911 continue;
912 }
913
914 kdsp_actual = POINTER_FROM_KDS_PTR(kdbp_try->kd_list_head);
915
916 if (kdsp_actual->kds_bufcnt < EVENTS_PER_STORAGE_UNIT) {
917 /*
918 * make sure we don't steal the storage unit
919 * being actively recorded to... need to
920 * move on because we don't want an out-of-order
921 * set of events showing up later
922 */
923 continue;
924 }
925
926 /*
927 * When wrapping, steal the storage unit with the
928 * earliest timestamp on its last event, instead of the
929 * earliest timestamp on the first event. This allows a
930 * storage unit with more recent events to be preserved,
931 * even if the storage unit contains events that are
932 * older than those found in other CPUs.
933 */
934 ts = kdbg_get_timestamp(&kdsp_actual->kds_records[EVENTS_PER_STORAGE_UNIT - 1]);
935 if (ts < oldest_ts) {
936 oldest_ts = ts;
937 kdbp_vict = kdbp_try;
938 }
939 }
940 if (kdbp_vict == NULL) {
941 kdebug_enable = 0;
942 kd_ctrl_page.enabled = 0;
943 commpage_update_kdebug_state();
944 retval = FALSE;
945 goto out;
946 }
947 kdsp = kdbp_vict->kd_list_head;
948 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
949 kdbp_vict->kd_list_head = kdsp_actual->kds_next;
950
951 if (kdbp_vict->kd_list_head.raw != KDS_PTR_NULL) {
952 kdsp_next_actual = POINTER_FROM_KDS_PTR(kdbp_vict->kd_list_head);
953 kdsp_next_actual->kds_lostevents = TRUE;
954 } else {
955 kdbp_vict->kd_lostevents = TRUE;
956 }
957
958 if (kd_ctrl_page.oldest_time < oldest_ts) {
959 kd_ctrl_page.oldest_time = oldest_ts;
960 }
961 kd_ctrl_page.kdebug_flags |= KDBG_WRAPPED;
962 }
963 kdsp_actual->kds_timestamp = kdbg_timestamp();
964 kdsp_actual->kds_next.raw = KDS_PTR_NULL;
965 kdsp_actual->kds_bufcnt = 0;
966 kdsp_actual->kds_readlast = 0;
967
968 kdsp_actual->kds_lostevents = kdbp->kd_lostevents;
969 kdbp->kd_lostevents = FALSE;
970 kdsp_actual->kds_bufindx = 0;
971
972 if (kdbp->kd_list_head.raw == KDS_PTR_NULL) {
973 kdbp->kd_list_head = kdsp;
974 } else {
975 POINTER_FROM_KDS_PTR(kdbp->kd_list_tail)->kds_next = kdsp;
976 }
977 kdbp->kd_list_tail = kdsp;
978 out:
979 lck_spin_unlock(kds_spin_lock);
980 ml_set_interrupts_enabled(s);
981
982 return retval;
983 }
984
985 int
986 kernel_debug_register_callback(kd_callback_t callback)
987 {
988 kd_iop_t* iop;
989 if (kmem_alloc(kernel_map, (vm_offset_t *)&iop, sizeof(kd_iop_t), VM_KERN_MEMORY_DIAG) == KERN_SUCCESS) {
990 memcpy(&iop->callback, &callback, sizeof(kd_callback_t));
991
992 /*
993 * <rdar://problem/13351477> Some IOP clients are not providing a name.
994 *
995 * Remove when fixed.
996 */
997 {
998 boolean_t is_valid_name = FALSE;
999 for (uint32_t length = 0; length < sizeof(callback.iop_name); ++length) {
1000 /* This is roughly isprintable(c) */
1001 if (callback.iop_name[length] > 0x20 && callback.iop_name[length] < 0x7F) {
1002 continue;
1003 }
1004 if (callback.iop_name[length] == 0) {
1005 if (length) {
1006 is_valid_name = TRUE;
1007 }
1008 break;
1009 }
1010 }
1011
1012 if (!is_valid_name) {
1013 strlcpy(iop->callback.iop_name, "IOP-???", sizeof(iop->callback.iop_name));
1014 }
1015 }
1016
1017 iop->last_timestamp = 0;
1018
1019 do {
1020 /*
1021 * We use two pieces of state, the old list head
1022 * pointer, and the value of old_list_head->cpu_id.
1023 * If we read kd_iops more than once, it can change
1024 * between reads.
1025 *
1026 * TLDR; Must not read kd_iops more than once per loop.
1027 */
1028 iop->next = kd_iops;
1029 iop->cpu_id = iop->next ? (iop->next->cpu_id + 1) : kdbg_cpu_count(FALSE);
1030
1031 /*
1032 * Header says OSCompareAndSwapPtr has a memory barrier
1033 */
1034 } while (!OSCompareAndSwapPtr(iop->next, iop, (void* volatile*)&kd_iops));
1035
1036 return iop->cpu_id;
1037 }
1038
1039 return 0;
1040 }
1041
1042 void
1043 kernel_debug_enter(
1044 uint32_t coreid,
1045 uint32_t debugid,
1046 uint64_t timestamp,
1047 uintptr_t arg1,
1048 uintptr_t arg2,
1049 uintptr_t arg3,
1050 uintptr_t arg4,
1051 uintptr_t threadid
1052 )
1053 {
1054 uint32_t bindx;
1055 kd_buf *kd;
1056 struct kd_bufinfo *kdbp;
1057 struct kd_storage *kdsp_actual;
1058 union kds_ptr kds_raw;
1059
1060 if (kd_ctrl_page.kdebug_slowcheck) {
1061 if ((kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) || !(kdebug_enable & (KDEBUG_ENABLE_TRACE | KDEBUG_ENABLE_PPT))) {
1062 goto out1;
1063 }
1064
1065 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
1066 if (typefilter_is_debugid_allowed(kdbg_typefilter, debugid)) {
1067 goto record_event;
1068 }
1069 goto out1;
1070 } else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
1071 if (debugid >= kdlog_beg && debugid <= kdlog_end) {
1072 goto record_event;
1073 }
1074 goto out1;
1075 } else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
1076 if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 &&
1077 (debugid & KDBG_EVENTID_MASK) != kdlog_value2 &&
1078 (debugid & KDBG_EVENTID_MASK) != kdlog_value3 &&
1079 (debugid & KDBG_EVENTID_MASK) != kdlog_value4) {
1080 goto out1;
1081 }
1082 }
1083 }
1084
1085 record_event:
1086 if (timestamp < kd_ctrl_page.oldest_time) {
1087 goto out1;
1088 }
1089
1090 #if CONFIG_EMBEDDED
1091 /*
1092 * When start_kern_tracing is called by the kernel to trace very
1093 * early kernel events, it saves data to a secondary buffer until
1094 * it is possible to initialize ktrace, and then dumps the events
1095 * into the ktrace buffer using this method. In this case, iops will
1096 * be NULL, and the coreid will be zero. It is not possible to have
1097 * a valid IOP coreid of zero, so pass if both iops is NULL and coreid
1098 * is zero.
1099 */
1100 assert(kdbg_iop_list_contains_cpu_id(kd_ctrl_page.kdebug_iops, coreid) || (kd_ctrl_page.kdebug_iops == NULL && coreid == 0));
1101 #endif
1102
1103 disable_preemption();
1104
1105 if (kd_ctrl_page.enabled == 0) {
1106 goto out;
1107 }
1108
1109 kdbp = &kdbip[coreid];
1110 timestamp &= KDBG_TIMESTAMP_MASK;
1111
1112 #if KDEBUG_MOJO_TRACE
1113 if (kdebug_enable & KDEBUG_ENABLE_SERIAL) {
1114 kdebug_serial_print(coreid, debugid, timestamp,
1115 arg1, arg2, arg3, arg4, threadid);
1116 }
1117 #endif
1118
1119 retry_q:
1120 kds_raw = kdbp->kd_list_tail;
1121
1122 if (kds_raw.raw != KDS_PTR_NULL) {
1123 kdsp_actual = POINTER_FROM_KDS_PTR(kds_raw);
1124 bindx = kdsp_actual->kds_bufindx;
1125 } else {
1126 kdsp_actual = NULL;
1127 bindx = EVENTS_PER_STORAGE_UNIT;
1128 }
1129
1130 if (kdsp_actual == NULL || bindx >= EVENTS_PER_STORAGE_UNIT) {
1131 if (allocate_storage_unit(coreid) == FALSE) {
1132 /*
1133 * this can only happen if wrapping
1134 * has been disabled
1135 */
1136 goto out;
1137 }
1138 goto retry_q;
1139 }
1140 if (!OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx)) {
1141 goto retry_q;
1142 }
1143
1144 // IOP entries can be allocated before xnu allocates and inits the buffer
1145 if (timestamp < kdsp_actual->kds_timestamp) {
1146 kdsp_actual->kds_timestamp = timestamp;
1147 }
1148
1149 kd = &kdsp_actual->kds_records[bindx];
1150
1151 kd->debugid = debugid;
1152 kd->arg1 = arg1;
1153 kd->arg2 = arg2;
1154 kd->arg3 = arg3;
1155 kd->arg4 = arg4;
1156 kd->arg5 = threadid;
1157
1158 kdbg_set_timestamp_and_cpu(kd, timestamp, coreid);
1159
1160 OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
1161 out:
1162 enable_preemption();
1163 out1:
1164 if ((kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold)) {
1165 kdbg_wakeup();
1166 }
1167 }
1168
1169 /*
1170 * Check if the given debug ID is allowed to be traced on the current process.
1171 *
1172 * Returns true if allowed and false otherwise.
1173 */
1174 static inline bool
1175 kdebug_debugid_procfilt_allowed(uint32_t debugid)
1176 {
1177 uint32_t procfilt_flags = kd_ctrl_page.kdebug_flags &
1178 (KDBG_PIDCHECK | KDBG_PIDEXCLUDE);
1179
1180 if (!procfilt_flags) {
1181 return true;
1182 }
1183
1184 /*
1185 * DBG_TRACE and MACH_SCHED tracepoints ignore the process filter.
1186 */
1187 if ((debugid & 0xffff0000) == MACHDBG_CODE(DBG_MACH_SCHED, 0) ||
1188 (debugid >> 24 == DBG_TRACE)) {
1189 return true;
1190 }
1191
1192 struct proc *curproc = current_proc();
1193 /*
1194 * If the process is missing (early in boot), allow it.
1195 */
1196 if (!curproc) {
1197 return true;
1198 }
1199
1200 if (procfilt_flags & KDBG_PIDCHECK) {
1201 /*
1202 * Allow only processes marked with the kdebug bit.
1203 */
1204 return curproc->p_kdebug;
1205 } else if (procfilt_flags & KDBG_PIDEXCLUDE) {
1206 /*
1207 * Exclude any process marked with the kdebug bit.
1208 */
1209 return !curproc->p_kdebug;
1210 } else {
1211 panic("kdebug: invalid procfilt flags %x", kd_ctrl_page.kdebug_flags);
1212 __builtin_unreachable();
1213 }
1214 }
1215
1216 static void
1217 kernel_debug_internal(
1218 uint32_t debugid,
1219 uintptr_t arg1,
1220 uintptr_t arg2,
1221 uintptr_t arg3,
1222 uintptr_t arg4,
1223 uintptr_t arg5,
1224 uint64_t flags)
1225 {
1226 uint64_t now;
1227 uint32_t bindx;
1228 kd_buf *kd;
1229 int cpu;
1230 struct kd_bufinfo *kdbp;
1231 struct kd_storage *kdsp_actual;
1232 union kds_ptr kds_raw;
1233 bool only_filter = flags & KDBG_FLAG_FILTERED;
1234 bool observe_procfilt = !(flags & KDBG_FLAG_NOPROCFILT);
1235
1236 if (kd_ctrl_page.kdebug_slowcheck) {
1237 if ((kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) ||
1238 !(kdebug_enable & (KDEBUG_ENABLE_TRACE | KDEBUG_ENABLE_PPT))) {
1239 goto out1;
1240 }
1241
1242 if (!ml_at_interrupt_context() && observe_procfilt &&
1243 !kdebug_debugid_procfilt_allowed(debugid)) {
1244 goto out1;
1245 }
1246
1247 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
1248 if (typefilter_is_debugid_allowed(kdbg_typefilter, debugid)) {
1249 goto record_event;
1250 }
1251
1252 goto out1;
1253 } else if (only_filter) {
1254 goto out1;
1255 } else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
1256 /* Always record trace system info */
1257 if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE) {
1258 goto record_event;
1259 }
1260
1261 if (debugid < kdlog_beg || debugid > kdlog_end) {
1262 goto out1;
1263 }
1264 } else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
1265 /* Always record trace system info */
1266 if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE) {
1267 goto record_event;
1268 }
1269
1270 if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 &&
1271 (debugid & KDBG_EVENTID_MASK) != kdlog_value2 &&
1272 (debugid & KDBG_EVENTID_MASK) != kdlog_value3 &&
1273 (debugid & KDBG_EVENTID_MASK) != kdlog_value4) {
1274 goto out1;
1275 }
1276 }
1277 } else if (only_filter) {
1278 goto out1;
1279 }
1280
1281 record_event:
1282 disable_preemption();
1283
1284 if (kd_ctrl_page.enabled == 0) {
1285 goto out;
1286 }
1287
1288 cpu = cpu_number();
1289 kdbp = &kdbip[cpu];
1290
1291 #if KDEBUG_MOJO_TRACE
1292 if (kdebug_enable & KDEBUG_ENABLE_SERIAL) {
1293 kdebug_serial_print(cpu, debugid,
1294 kdbg_timestamp() & KDBG_TIMESTAMP_MASK,
1295 arg1, arg2, arg3, arg4, arg5);
1296 }
1297 #endif
1298
1299 retry_q:
1300 kds_raw = kdbp->kd_list_tail;
1301
1302 if (kds_raw.raw != KDS_PTR_NULL) {
1303 kdsp_actual = POINTER_FROM_KDS_PTR(kds_raw);
1304 bindx = kdsp_actual->kds_bufindx;
1305 } else {
1306 kdsp_actual = NULL;
1307 bindx = EVENTS_PER_STORAGE_UNIT;
1308 }
1309
1310 if (kdsp_actual == NULL || bindx >= EVENTS_PER_STORAGE_UNIT) {
1311 if (allocate_storage_unit(cpu) == FALSE) {
1312 /*
1313 * this can only happen if wrapping
1314 * has been disabled
1315 */
1316 goto out;
1317 }
1318 goto retry_q;
1319 }
1320
1321 now = kdbg_timestamp() & KDBG_TIMESTAMP_MASK;
1322
1323 if (!OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx)) {
1324 goto retry_q;
1325 }
1326
1327 kd = &kdsp_actual->kds_records[bindx];
1328
1329 kd->debugid = debugid;
1330 kd->arg1 = arg1;
1331 kd->arg2 = arg2;
1332 kd->arg3 = arg3;
1333 kd->arg4 = arg4;
1334 kd->arg5 = arg5;
1335
1336 kdbg_set_timestamp_and_cpu(kd, now, cpu);
1337
1338 OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
1339
1340 #if KPERF
1341 kperf_kdebug_callback(debugid, __builtin_frame_address(0));
1342 #endif
1343 out:
1344 enable_preemption();
1345 out1:
1346 if (kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold) {
1347 uint32_t etype;
1348 uint32_t stype;
1349
1350 etype = debugid & KDBG_EVENTID_MASK;
1351 stype = debugid & KDBG_CSC_MASK;
1352
1353 if (etype == INTERRUPT || etype == MACH_vmfault ||
1354 stype == BSC_SysCall || stype == MACH_SysCall) {
1355 kdbg_wakeup();
1356 }
1357 }
1358 }
1359
1360 void
1361 kernel_debug(
1362 uint32_t debugid,
1363 uintptr_t arg1,
1364 uintptr_t arg2,
1365 uintptr_t arg3,
1366 uintptr_t arg4,
1367 __unused uintptr_t arg5)
1368 {
1369 kernel_debug_internal(debugid, arg1, arg2, arg3, arg4,
1370 (uintptr_t)thread_tid(current_thread()), 0);
1371 }
1372
1373 void
1374 kernel_debug1(
1375 uint32_t debugid,
1376 uintptr_t arg1,
1377 uintptr_t arg2,
1378 uintptr_t arg3,
1379 uintptr_t arg4,
1380 uintptr_t arg5)
1381 {
1382 kernel_debug_internal(debugid, arg1, arg2, arg3, arg4, arg5, 0);
1383 }
1384
1385 void
1386 kernel_debug_flags(
1387 uint32_t debugid,
1388 uintptr_t arg1,
1389 uintptr_t arg2,
1390 uintptr_t arg3,
1391 uintptr_t arg4,
1392 uint64_t flags)
1393 {
1394 kernel_debug_internal(debugid, arg1, arg2, arg3, arg4,
1395 (uintptr_t)thread_tid(current_thread()), flags);
1396 }
1397
1398 void
1399 kernel_debug_filtered(
1400 uint32_t debugid,
1401 uintptr_t arg1,
1402 uintptr_t arg2,
1403 uintptr_t arg3,
1404 uintptr_t arg4)
1405 {
1406 kernel_debug_flags(debugid, arg1, arg2, arg3, arg4, KDBG_FLAG_FILTERED);
1407 }
1408
1409 void
1410 kernel_debug_string_early(const char *message)
1411 {
1412 uintptr_t arg[4] = {0, 0, 0, 0};
1413
1414 /* Stuff the message string in the args and log it. */
1415 strncpy((char *)arg, message, MIN(sizeof(arg), strlen(message)));
1416 KERNEL_DEBUG_EARLY(
1417 TRACE_INFO_STRING,
1418 arg[0], arg[1], arg[2], arg[3]);
1419 }
1420
1421 #define SIMPLE_STR_LEN (64)
1422 static_assert(SIMPLE_STR_LEN % sizeof(uintptr_t) == 0);
1423
1424 void
1425 kernel_debug_string_simple(uint32_t eventid, const char *str)
1426 {
1427 if (!kdebug_enable) {
1428 return;
1429 }
1430
1431 /* array of uintptr_ts simplifies emitting the string as arguments */
1432 uintptr_t str_buf[(SIMPLE_STR_LEN / sizeof(uintptr_t)) + 1] = { 0 };
1433 size_t len = strlcpy((char *)str_buf, str, SIMPLE_STR_LEN + 1);
1434
1435 uintptr_t thread_id = (uintptr_t)thread_tid(current_thread());
1436 uint32_t debugid = eventid | DBG_FUNC_START;
1437
1438 /* string can fit in a single tracepoint */
1439 if (len <= (4 * sizeof(uintptr_t))) {
1440 debugid |= DBG_FUNC_END;
1441 }
1442
1443 kernel_debug_internal(debugid, str_buf[0],
1444 str_buf[1],
1445 str_buf[2],
1446 str_buf[3], thread_id, 0);
1447
1448 debugid &= KDBG_EVENTID_MASK;
1449 int i = 4;
1450 size_t written = 4 * sizeof(uintptr_t);
1451
1452 for (; written < len; i += 4, written += 4 * sizeof(uintptr_t)) {
1453 /* if this is the last tracepoint to be emitted */
1454 if ((written + (4 * sizeof(uintptr_t))) >= len) {
1455 debugid |= DBG_FUNC_END;
1456 }
1457 kernel_debug_internal(debugid, str_buf[i],
1458 str_buf[i + 1],
1459 str_buf[i + 2],
1460 str_buf[i + 3], thread_id, 0);
1461 }
1462 }
1463
1464 extern int master_cpu; /* MACH_KERNEL_PRIVATE */
1465 /*
1466 * Used prior to start_kern_tracing() being called.
1467 * Log temporarily into a static buffer.
1468 */
1469 void
1470 kernel_debug_early(
1471 uint32_t debugid,
1472 uintptr_t arg1,
1473 uintptr_t arg2,
1474 uintptr_t arg3,
1475 uintptr_t arg4)
1476 {
1477 #if defined(__x86_64__)
1478 extern int early_boot;
1479 /*
1480 * Note that "early" isn't early enough in some cases where
1481 * we're invoked before gsbase is set on x86, hence the
1482 * check of "early_boot".
1483 */
1484 if (early_boot) {
1485 return;
1486 }
1487 #endif
1488
1489 /* If early tracing is over, use the normal path. */
1490 if (kd_early_done) {
1491 KERNEL_DEBUG_CONSTANT(debugid, arg1, arg2, arg3, arg4, 0);
1492 return;
1493 }
1494
1495 /* Do nothing if the buffer is full or we're not on the boot cpu. */
1496 kd_early_overflow = kd_early_index >= KD_EARLY_BUFFER_NBUFS;
1497 if (kd_early_overflow || cpu_number() != master_cpu) {
1498 return;
1499 }
1500
1501 kd_early_buffer[kd_early_index].debugid = debugid;
1502 kd_early_buffer[kd_early_index].timestamp = mach_absolute_time();
1503 kd_early_buffer[kd_early_index].arg1 = arg1;
1504 kd_early_buffer[kd_early_index].arg2 = arg2;
1505 kd_early_buffer[kd_early_index].arg3 = arg3;
1506 kd_early_buffer[kd_early_index].arg4 = arg4;
1507 kd_early_buffer[kd_early_index].arg5 = 0;
1508 kd_early_index++;
1509 }
1510
1511 /*
1512 * Transfer the contents of the temporary buffer into the trace buffers.
1513 * Precede that by logging the rebase time (offset) - the TSC-based time (in ns)
1514 * when mach_absolute_time is set to 0.
1515 */
1516 static void
1517 kernel_debug_early_end(void)
1518 {
1519 if (cpu_number() != master_cpu) {
1520 panic("kernel_debug_early_end() not call on boot processor");
1521 }
1522
1523 /* reset the current oldest time to allow early events */
1524 kd_ctrl_page.oldest_time = 0;
1525
1526 #if !CONFIG_EMBEDDED
1527 /* Fake sentinel marking the start of kernel time relative to TSC */
1528 kernel_debug_enter(0,
1529 TRACE_TIMESTAMPS,
1530 0,
1531 (uint32_t)(tsc_rebase_abs_time >> 32),
1532 (uint32_t)tsc_rebase_abs_time,
1533 tsc_at_boot,
1534 0,
1535 0);
1536 #endif
1537 for (unsigned int i = 0; i < kd_early_index; i++) {
1538 kernel_debug_enter(0,
1539 kd_early_buffer[i].debugid,
1540 kd_early_buffer[i].timestamp,
1541 kd_early_buffer[i].arg1,
1542 kd_early_buffer[i].arg2,
1543 kd_early_buffer[i].arg3,
1544 kd_early_buffer[i].arg4,
1545 0);
1546 }
1547
1548 /* Cut events-lost event on overflow */
1549 if (kd_early_overflow) {
1550 KDBG_RELEASE(TRACE_LOST_EVENTS, 1);
1551 }
1552
1553 kd_early_done = true;
1554
1555 /* This trace marks the start of kernel tracing */
1556 kernel_debug_string_early("early trace done");
1557 }
1558
1559 void
1560 kernel_debug_disable(void)
1561 {
1562 if (kdebug_enable) {
1563 kdbg_set_tracing_enabled(FALSE, 0);
1564 }
1565 }
1566
1567 /*
1568 * Returns non-zero if debugid is in a reserved class.
1569 */
1570 static int
1571 kdebug_validate_debugid(uint32_t debugid)
1572 {
1573 uint8_t debugid_class;
1574
1575 debugid_class = KDBG_EXTRACT_CLASS(debugid);
1576 switch (debugid_class) {
1577 case DBG_TRACE:
1578 return EPERM;
1579 }
1580
1581 return 0;
1582 }
1583
1584 /*
1585 * Support syscall SYS_kdebug_typefilter.
1586 */
1587 int
1588 kdebug_typefilter(__unused struct proc* p,
1589 struct kdebug_typefilter_args* uap,
1590 __unused int *retval)
1591 {
1592 int ret = KERN_SUCCESS;
1593
1594 if (uap->addr == USER_ADDR_NULL ||
1595 uap->size == USER_ADDR_NULL) {
1596 return EINVAL;
1597 }
1598
1599 /*
1600 * The atomic load is to close a race window with setting the typefilter
1601 * and memory entry values. A description follows:
1602 *
1603 * Thread 1 (writer)
1604 *
1605 * Allocate Typefilter
1606 * Allocate MemoryEntry
1607 * Write Global MemoryEntry Ptr
1608 * Atomic Store (Release) Global Typefilter Ptr
1609 *
1610 * Thread 2 (reader, AKA us)
1611 *
1612 * if ((Atomic Load (Acquire) Global Typefilter Ptr) == NULL)
1613 * return;
1614 *
1615 * Without the atomic store, it isn't guaranteed that the write of
1616 * Global MemoryEntry Ptr is visible before we can see the write of
1617 * Global Typefilter Ptr.
1618 *
1619 * Without the atomic load, it isn't guaranteed that the loads of
1620 * Global MemoryEntry Ptr aren't speculated.
1621 *
1622 * The global pointers transition from NULL -> valid once and only once,
1623 * and never change after becoming valid. This means that having passed
1624 * the first atomic load test of Global Typefilter Ptr, this function
1625 * can then safely use the remaining global state without atomic checks.
1626 */
1627 if (!__c11_atomic_load((_Atomic typefilter_t *)&kdbg_typefilter, memory_order_acquire)) {
1628 return EINVAL;
1629 }
1630
1631 assert(kdbg_typefilter_memory_entry);
1632
1633 mach_vm_offset_t user_addr = 0;
1634 vm_map_t user_map = current_map();
1635
1636 ret = mach_to_bsd_errno(
1637 mach_vm_map_kernel(user_map, // target map
1638 &user_addr, // [in, out] target address
1639 TYPEFILTER_ALLOC_SIZE, // initial size
1640 0, // mask (alignment?)
1641 VM_FLAGS_ANYWHERE, // flags
1642 VM_MAP_KERNEL_FLAGS_NONE,
1643 VM_KERN_MEMORY_NONE,
1644 kdbg_typefilter_memory_entry, // port (memory entry!)
1645 0, // offset (in memory entry)
1646 FALSE, // should copy
1647 VM_PROT_READ, // cur_prot
1648 VM_PROT_READ, // max_prot
1649 VM_INHERIT_SHARE)); // inherit behavior on fork
1650
1651 if (ret == KERN_SUCCESS) {
1652 vm_size_t user_ptr_size = vm_map_is_64bit(user_map) ? 8 : 4;
1653 ret = copyout(CAST_DOWN(void *, &user_addr), uap->addr, user_ptr_size );
1654
1655 if (ret != KERN_SUCCESS) {
1656 mach_vm_deallocate(user_map, user_addr, TYPEFILTER_ALLOC_SIZE);
1657 }
1658 }
1659
1660 return ret;
1661 }
1662
1663 /*
1664 * Support syscall SYS_kdebug_trace. U64->K32 args may get truncated in kdebug_trace64
1665 */
1666 int
1667 kdebug_trace(struct proc *p, struct kdebug_trace_args *uap, int32_t *retval)
1668 {
1669 struct kdebug_trace64_args uap64;
1670
1671 uap64.code = uap->code;
1672 uap64.arg1 = uap->arg1;
1673 uap64.arg2 = uap->arg2;
1674 uap64.arg3 = uap->arg3;
1675 uap64.arg4 = uap->arg4;
1676
1677 return kdebug_trace64(p, &uap64, retval);
1678 }
1679
1680 /*
1681 * Support syscall SYS_kdebug_trace64. 64-bit args on K32 will get truncated
1682 * to fit in 32-bit record format.
1683 *
1684 * It is intentional that error conditions are not checked until kdebug is
1685 * enabled. This is to match the userspace wrapper behavior, which is optimizing
1686 * for non-error case performance.
1687 */
1688 int
1689 kdebug_trace64(__unused struct proc *p, struct kdebug_trace64_args *uap, __unused int32_t *retval)
1690 {
1691 int err;
1692
1693 if (__probable(kdebug_enable == 0)) {
1694 return 0;
1695 }
1696
1697 if ((err = kdebug_validate_debugid(uap->code)) != 0) {
1698 return err;
1699 }
1700
1701 kernel_debug_internal(uap->code, (uintptr_t)uap->arg1,
1702 (uintptr_t)uap->arg2, (uintptr_t)uap->arg3, (uintptr_t)uap->arg4,
1703 (uintptr_t)thread_tid(current_thread()), 0);
1704
1705 return 0;
1706 }
1707
1708 /*
1709 * Adding enough padding to contain a full tracepoint for the last
1710 * portion of the string greatly simplifies the logic of splitting the
1711 * string between tracepoints. Full tracepoints can be generated using
1712 * the buffer itself, without having to manually add zeros to pad the
1713 * arguments.
1714 */
1715
1716 /* 2 string args in first tracepoint and 9 string data tracepoints */
1717 #define STR_BUF_ARGS (2 + (9 * 4))
1718 /* times the size of each arg on K64 */
1719 #define MAX_STR_LEN (STR_BUF_ARGS * sizeof(uint64_t))
1720 /* on K32, ending straddles a tracepoint, so reserve blanks */
1721 #define STR_BUF_SIZE (MAX_STR_LEN + (2 * sizeof(uint32_t)))
1722
1723 /*
1724 * This function does no error checking and assumes that it is called with
1725 * the correct arguments, including that the buffer pointed to by str is at
1726 * least STR_BUF_SIZE bytes. However, str must be aligned to word-size and
1727 * be NUL-terminated. In cases where a string can fit evenly into a final
1728 * tracepoint without its NUL-terminator, this function will not end those
1729 * strings with a NUL in trace. It's up to clients to look at the function
1730 * qualifier for DBG_FUNC_END in this case, to end the string.
1731 */
1732 static uint64_t
1733 kernel_debug_string_internal(uint32_t debugid, uint64_t str_id, void *vstr,
1734 size_t str_len)
1735 {
1736 /* str must be word-aligned */
1737 uintptr_t *str = vstr;
1738 size_t written = 0;
1739 uintptr_t thread_id;
1740 int i;
1741 uint32_t trace_debugid = TRACEDBG_CODE(DBG_TRACE_STRING,
1742 TRACE_STRING_GLOBAL);
1743
1744 thread_id = (uintptr_t)thread_tid(current_thread());
1745
1746 /* if the ID is being invalidated, just emit that */
1747 if (str_id != 0 && str_len == 0) {
1748 kernel_debug_internal(trace_debugid | DBG_FUNC_START | DBG_FUNC_END,
1749 (uintptr_t)debugid, (uintptr_t)str_id, 0, 0, thread_id, 0);
1750 return str_id;
1751 }
1752
1753 /* generate an ID, if necessary */
1754 if (str_id == 0) {
1755 str_id = OSIncrementAtomic64((SInt64 *)&g_curr_str_id);
1756 str_id = (str_id & STR_ID_MASK) | g_str_id_signature;
1757 }
1758
1759 trace_debugid |= DBG_FUNC_START;
1760 /* string can fit in a single tracepoint */
1761 if (str_len <= (2 * sizeof(uintptr_t))) {
1762 trace_debugid |= DBG_FUNC_END;
1763 }
1764
1765 kernel_debug_internal(trace_debugid, (uintptr_t)debugid, (uintptr_t)str_id,
1766 str[0], str[1], thread_id, 0);
1767
1768 trace_debugid &= KDBG_EVENTID_MASK;
1769 i = 2;
1770 written += 2 * sizeof(uintptr_t);
1771
1772 for (; written < str_len; i += 4, written += 4 * sizeof(uintptr_t)) {
1773 if ((written + (4 * sizeof(uintptr_t))) >= str_len) {
1774 trace_debugid |= DBG_FUNC_END;
1775 }
1776 kernel_debug_internal(trace_debugid, str[i],
1777 str[i + 1],
1778 str[i + 2],
1779 str[i + 3], thread_id, 0);
1780 }
1781
1782 return str_id;
1783 }
1784
1785 /*
1786 * Returns true if the current process can emit events, and false otherwise.
1787 * Trace system and scheduling events circumvent this check, as do events
1788 * emitted in interrupt context.
1789 */
1790 static boolean_t
1791 kdebug_current_proc_enabled(uint32_t debugid)
1792 {
1793 /* can't determine current process in interrupt context */
1794 if (ml_at_interrupt_context()) {
1795 return TRUE;
1796 }
1797
1798 /* always emit trace system and scheduling events */
1799 if ((KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE ||
1800 (debugid & KDBG_CSC_MASK) == MACHDBG_CODE(DBG_MACH_SCHED, 0))) {
1801 return TRUE;
1802 }
1803
1804 if (kd_ctrl_page.kdebug_flags & KDBG_PIDCHECK) {
1805 proc_t cur_proc = current_proc();
1806
1807 /* only the process with the kdebug bit set is allowed */
1808 if (cur_proc && !(cur_proc->p_kdebug)) {
1809 return FALSE;
1810 }
1811 } else if (kd_ctrl_page.kdebug_flags & KDBG_PIDEXCLUDE) {
1812 proc_t cur_proc = current_proc();
1813
1814 /* every process except the one with the kdebug bit set is allowed */
1815 if (cur_proc && cur_proc->p_kdebug) {
1816 return FALSE;
1817 }
1818 }
1819
1820 return TRUE;
1821 }
1822
1823 boolean_t
1824 kdebug_debugid_enabled(uint32_t debugid)
1825 {
1826 /* if no filtering is enabled */
1827 if (!kd_ctrl_page.kdebug_slowcheck) {
1828 return TRUE;
1829 }
1830
1831 return kdebug_debugid_explicitly_enabled(debugid);
1832 }
1833
1834 boolean_t
1835 kdebug_debugid_explicitly_enabled(uint32_t debugid)
1836 {
1837 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
1838 return typefilter_is_debugid_allowed(kdbg_typefilter, debugid);
1839 } else if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE) {
1840 return TRUE;
1841 } else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
1842 if (debugid < kdlog_beg || debugid > kdlog_end) {
1843 return FALSE;
1844 }
1845 } else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
1846 if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 &&
1847 (debugid & KDBG_EVENTID_MASK) != kdlog_value2 &&
1848 (debugid & KDBG_EVENTID_MASK) != kdlog_value3 &&
1849 (debugid & KDBG_EVENTID_MASK) != kdlog_value4) {
1850 return FALSE;
1851 }
1852 }
1853
1854 return TRUE;
1855 }
1856
1857 /*
1858 * Returns 0 if a string can be traced with these arguments. Returns errno
1859 * value if error occurred.
1860 */
1861 static errno_t
1862 kdebug_check_trace_string(uint32_t debugid, uint64_t str_id)
1863 {
1864 /* if there are function qualifiers on the debugid */
1865 if (debugid & ~KDBG_EVENTID_MASK) {
1866 return EINVAL;
1867 }
1868
1869 if (kdebug_validate_debugid(debugid)) {
1870 return EPERM;
1871 }
1872
1873 if (str_id != 0 && (str_id & STR_ID_SIG_MASK) != g_str_id_signature) {
1874 return EINVAL;
1875 }
1876
1877 return 0;
1878 }
1879
1880 /*
1881 * Implementation of KPI kernel_debug_string.
1882 */
1883 int
1884 kernel_debug_string(uint32_t debugid, uint64_t *str_id, const char *str)
1885 {
1886 /* arguments to tracepoints must be word-aligned */
1887 __attribute__((aligned(sizeof(uintptr_t)))) char str_buf[STR_BUF_SIZE];
1888 static_assert(sizeof(str_buf) > MAX_STR_LEN);
1889 vm_size_t len_copied;
1890 int err;
1891
1892 assert(str_id);
1893
1894 if (__probable(kdebug_enable == 0)) {
1895 return 0;
1896 }
1897
1898 if (!kdebug_current_proc_enabled(debugid)) {
1899 return 0;
1900 }
1901
1902 if (!kdebug_debugid_enabled(debugid)) {
1903 return 0;
1904 }
1905
1906 if ((err = kdebug_check_trace_string(debugid, *str_id)) != 0) {
1907 return err;
1908 }
1909
1910 if (str == NULL) {
1911 if (str_id == 0) {
1912 return EINVAL;
1913 }
1914
1915 *str_id = kernel_debug_string_internal(debugid, *str_id, NULL, 0);
1916 return 0;
1917 }
1918
1919 memset(str_buf, 0, sizeof(str_buf));
1920 len_copied = strlcpy(str_buf, str, MAX_STR_LEN + 1);
1921 *str_id = kernel_debug_string_internal(debugid, *str_id, str_buf,
1922 len_copied);
1923 return 0;
1924 }
1925
1926 /*
1927 * Support syscall kdebug_trace_string.
1928 */
1929 int
1930 kdebug_trace_string(__unused struct proc *p,
1931 struct kdebug_trace_string_args *uap,
1932 uint64_t *retval)
1933 {
1934 __attribute__((aligned(sizeof(uintptr_t)))) char str_buf[STR_BUF_SIZE];
1935 static_assert(sizeof(str_buf) > MAX_STR_LEN);
1936 size_t len_copied;
1937 int err;
1938
1939 if (__probable(kdebug_enable == 0)) {
1940 return 0;
1941 }
1942
1943 if (!kdebug_current_proc_enabled(uap->debugid)) {
1944 return 0;
1945 }
1946
1947 if (!kdebug_debugid_enabled(uap->debugid)) {
1948 return 0;
1949 }
1950
1951 if ((err = kdebug_check_trace_string(uap->debugid, uap->str_id)) != 0) {
1952 return err;
1953 }
1954
1955 if (uap->str == USER_ADDR_NULL) {
1956 if (uap->str_id == 0) {
1957 return EINVAL;
1958 }
1959
1960 *retval = kernel_debug_string_internal(uap->debugid, uap->str_id,
1961 NULL, 0);
1962 return 0;
1963 }
1964
1965 memset(str_buf, 0, sizeof(str_buf));
1966 err = copyinstr(uap->str, str_buf, MAX_STR_LEN + 1, &len_copied);
1967
1968 /* it's alright to truncate the string, so allow ENAMETOOLONG */
1969 if (err == ENAMETOOLONG) {
1970 str_buf[MAX_STR_LEN] = '\0';
1971 } else if (err) {
1972 return err;
1973 }
1974
1975 if (len_copied <= 1) {
1976 return EINVAL;
1977 }
1978
1979 /* convert back to a length */
1980 len_copied--;
1981
1982 *retval = kernel_debug_string_internal(uap->debugid, uap->str_id, str_buf,
1983 len_copied);
1984 return 0;
1985 }
1986
1987 static void
1988 kdbg_lock_init(void)
1989 {
1990 static lck_grp_attr_t *kdebug_lck_grp_attr = NULL;
1991 static lck_attr_t *kdebug_lck_attr = NULL;
1992
1993 if (kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT) {
1994 return;
1995 }
1996
1997 assert(kdebug_lck_grp_attr == NULL);
1998 kdebug_lck_grp_attr = lck_grp_attr_alloc_init();
1999 kdebug_lck_grp = lck_grp_alloc_init("kdebug", kdebug_lck_grp_attr);
2000 kdebug_lck_attr = lck_attr_alloc_init();
2001
2002 kds_spin_lock = lck_spin_alloc_init(kdebug_lck_grp, kdebug_lck_attr);
2003 kdw_spin_lock = lck_spin_alloc_init(kdebug_lck_grp, kdebug_lck_attr);
2004
2005 kd_ctrl_page.kdebug_flags |= KDBG_LOCKINIT;
2006 }
2007
2008 int
2009 kdbg_bootstrap(boolean_t early_trace)
2010 {
2011 kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
2012
2013 return create_buffers(early_trace);
2014 }
2015
2016 int
2017 kdbg_reinit(boolean_t early_trace)
2018 {
2019 int ret = 0;
2020
2021 /*
2022 * Disable trace collecting
2023 * First make sure we're not in
2024 * the middle of cutting a trace
2025 */
2026 kernel_debug_disable();
2027
2028 /*
2029 * make sure the SLOW_NOLOG is seen
2030 * by everyone that might be trying
2031 * to cut a trace..
2032 */
2033 IOSleep(100);
2034
2035 delete_buffers();
2036
2037 kdbg_clear_thread_map();
2038 ret = kdbg_bootstrap(early_trace);
2039
2040 RAW_file_offset = 0;
2041 RAW_file_written = 0;
2042
2043 return ret;
2044 }
2045
2046 void
2047 kdbg_trace_data(struct proc *proc, long *arg_pid, long *arg_uniqueid)
2048 {
2049 if (!proc) {
2050 *arg_pid = 0;
2051 *arg_uniqueid = 0;
2052 } else {
2053 *arg_pid = proc->p_pid;
2054 *arg_uniqueid = proc->p_uniqueid;
2055 if ((uint64_t) *arg_uniqueid != proc->p_uniqueid) {
2056 *arg_uniqueid = 0;
2057 }
2058 }
2059 }
2060
2061
2062 void
2063 kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3, long *arg4)
2064 {
2065 char *dbg_nameptr;
2066 int dbg_namelen;
2067 long dbg_parms[4];
2068
2069 if (!proc) {
2070 *arg1 = 0;
2071 *arg2 = 0;
2072 *arg3 = 0;
2073 *arg4 = 0;
2074 return;
2075 }
2076 /*
2077 * Collect the pathname for tracing
2078 */
2079 dbg_nameptr = proc->p_comm;
2080 dbg_namelen = (int)strlen(proc->p_comm);
2081 dbg_parms[0] = 0L;
2082 dbg_parms[1] = 0L;
2083 dbg_parms[2] = 0L;
2084 dbg_parms[3] = 0L;
2085
2086 if (dbg_namelen > (int)sizeof(dbg_parms)) {
2087 dbg_namelen = (int)sizeof(dbg_parms);
2088 }
2089
2090 strncpy((char *)dbg_parms, dbg_nameptr, dbg_namelen);
2091
2092 *arg1 = dbg_parms[0];
2093 *arg2 = dbg_parms[1];
2094 *arg3 = dbg_parms[2];
2095 *arg4 = dbg_parms[3];
2096 }
2097
2098 static void
2099 kdbg_resolve_map(thread_t th_act, void *opaque)
2100 {
2101 kd_threadmap *mapptr;
2102 krt_t *t = (krt_t *)opaque;
2103
2104 if (t->count < t->maxcount) {
2105 mapptr = &t->map[t->count];
2106 mapptr->thread = (uintptr_t)thread_tid(th_act);
2107
2108 (void) strlcpy(mapptr->command, t->atts->task_comm,
2109 sizeof(t->atts->task_comm));
2110 /*
2111 * Some kernel threads have no associated pid.
2112 * We still need to mark the entry as valid.
2113 */
2114 if (t->atts->pid) {
2115 mapptr->valid = t->atts->pid;
2116 } else {
2117 mapptr->valid = 1;
2118 }
2119
2120 t->count++;
2121 }
2122 }
2123
2124 /*
2125 *
2126 * Writes a cpumap for the given iops_list/cpu_count to the provided buffer.
2127 *
2128 * You may provide a buffer and size, or if you set the buffer to NULL, a
2129 * buffer of sufficient size will be allocated.
2130 *
2131 * If you provide a buffer and it is too small, sets cpumap_size to the number
2132 * of bytes required and returns EINVAL.
2133 *
2134 * On success, if you provided a buffer, cpumap_size is set to the number of
2135 * bytes written. If you did not provide a buffer, cpumap is set to the newly
2136 * allocated buffer and cpumap_size is set to the number of bytes allocated.
2137 *
2138 * NOTE: It may seem redundant to pass both iops and a cpu_count.
2139 *
2140 * We may be reporting data from "now", or from the "past".
2141 *
2142 * The "past" data would be for kdbg_readcpumap().
2143 *
2144 * If we do not pass both iops and cpu_count, and iops is NULL, this function
2145 * will need to read "now" state to get the number of cpus, which would be in
2146 * error if we were reporting "past" state.
2147 */
2148
2149 int
2150 kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count, uint8_t** cpumap, uint32_t* cpumap_size)
2151 {
2152 assert(cpumap);
2153 assert(cpumap_size);
2154 assert(cpu_count);
2155 assert(!iops || iops->cpu_id + 1 == cpu_count);
2156
2157 uint32_t bytes_needed = sizeof(kd_cpumap_header) + cpu_count * sizeof(kd_cpumap);
2158 uint32_t bytes_available = *cpumap_size;
2159 *cpumap_size = bytes_needed;
2160
2161 if (*cpumap == NULL) {
2162 if (kmem_alloc(kernel_map, (vm_offset_t*)cpumap, (vm_size_t)*cpumap_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
2163 return ENOMEM;
2164 }
2165 bzero(*cpumap, *cpumap_size);
2166 } else if (bytes_available < bytes_needed) {
2167 return EINVAL;
2168 }
2169
2170 kd_cpumap_header* header = (kd_cpumap_header*)(uintptr_t)*cpumap;
2171
2172 header->version_no = RAW_VERSION1;
2173 header->cpu_count = cpu_count;
2174
2175 kd_cpumap* cpus = (kd_cpumap*)&header[1];
2176
2177 int32_t index = cpu_count - 1;
2178 while (iops) {
2179 cpus[index].cpu_id = iops->cpu_id;
2180 cpus[index].flags = KDBG_CPUMAP_IS_IOP;
2181 strlcpy(cpus[index].name, iops->callback.iop_name, sizeof(cpus->name));
2182
2183 iops = iops->next;
2184 index--;
2185 }
2186
2187 while (index >= 0) {
2188 cpus[index].cpu_id = index;
2189 cpus[index].flags = 0;
2190 strlcpy(cpus[index].name, "AP", sizeof(cpus->name));
2191
2192 index--;
2193 }
2194
2195 return KERN_SUCCESS;
2196 }
2197
2198 void
2199 kdbg_thrmap_init(void)
2200 {
2201 ktrace_assert_lock_held();
2202
2203 if (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) {
2204 return;
2205 }
2206
2207 kd_mapptr = kdbg_thrmap_init_internal(0, &kd_mapsize, &kd_mapcount);
2208
2209 if (kd_mapptr) {
2210 kd_ctrl_page.kdebug_flags |= KDBG_MAPINIT;
2211 }
2212 }
2213
2214 static kd_threadmap *
2215 kdbg_thrmap_init_internal(unsigned int count, unsigned int *mapsize, unsigned int *mapcount)
2216 {
2217 kd_threadmap *mapptr;
2218 proc_t p;
2219 struct krt akrt;
2220 int tts_count = 0; /* number of task-to-string structures */
2221 struct tts *tts_mapptr;
2222 unsigned int tts_mapsize = 0;
2223 vm_offset_t kaddr;
2224
2225 assert(mapsize != NULL);
2226 assert(mapcount != NULL);
2227
2228 *mapcount = threads_count;
2229 tts_count = tasks_count;
2230
2231 /*
2232 * The proc count could change during buffer allocation,
2233 * so introduce a small fudge factor to bump up the
2234 * buffer sizes. This gives new tasks some chance of
2235 * making into the tables. Bump up by 25%.
2236 */
2237 *mapcount += *mapcount / 4;
2238 tts_count += tts_count / 4;
2239
2240 *mapsize = *mapcount * sizeof(kd_threadmap);
2241
2242 if (count && count < *mapcount) {
2243 return 0;
2244 }
2245
2246 if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)*mapsize, VM_KERN_MEMORY_DIAG) == KERN_SUCCESS)) {
2247 bzero((void *)kaddr, *mapsize);
2248 mapptr = (kd_threadmap *)kaddr;
2249 } else {
2250 return 0;
2251 }
2252
2253 tts_mapsize = tts_count * sizeof(struct tts);
2254
2255 if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)tts_mapsize, VM_KERN_MEMORY_DIAG) == KERN_SUCCESS)) {
2256 bzero((void *)kaddr, tts_mapsize);
2257 tts_mapptr = (struct tts *)kaddr;
2258 } else {
2259 kmem_free(kernel_map, (vm_offset_t)mapptr, *mapsize);
2260
2261 return 0;
2262 }
2263
2264 /*
2265 * Save the proc's name and take a reference for each task associated
2266 * with a valid process.
2267 */
2268 proc_list_lock();
2269
2270 int i = 0;
2271 ALLPROC_FOREACH(p) {
2272 if (i >= tts_count) {
2273 break;
2274 }
2275 if (p->p_lflag & P_LEXIT) {
2276 continue;
2277 }
2278 if (p->task) {
2279 task_reference(p->task);
2280 tts_mapptr[i].task = p->task;
2281 tts_mapptr[i].pid = p->p_pid;
2282 (void)strlcpy(tts_mapptr[i].task_comm, proc_best_name(p), sizeof(tts_mapptr[i].task_comm));
2283 i++;
2284 }
2285 }
2286 tts_count = i;
2287
2288 proc_list_unlock();
2289
2290 /*
2291 * Initialize thread map data
2292 */
2293 akrt.map = mapptr;
2294 akrt.count = 0;
2295 akrt.maxcount = *mapcount;
2296
2297 for (i = 0; i < tts_count; i++) {
2298 akrt.atts = &tts_mapptr[i];
2299 task_act_iterate_wth_args(tts_mapptr[i].task, kdbg_resolve_map, &akrt);
2300 task_deallocate((task_t)tts_mapptr[i].task);
2301 }
2302 kmem_free(kernel_map, (vm_offset_t)tts_mapptr, tts_mapsize);
2303
2304 *mapcount = akrt.count;
2305
2306 return mapptr;
2307 }
2308
2309 static void
2310 kdbg_clear(void)
2311 {
2312 /*
2313 * Clean up the trace buffer
2314 * First make sure we're not in
2315 * the middle of cutting a trace
2316 */
2317 kernel_debug_disable();
2318 kdbg_disable_typefilter();
2319
2320 /*
2321 * make sure the SLOW_NOLOG is seen
2322 * by everyone that might be trying
2323 * to cut a trace..
2324 */
2325 IOSleep(100);
2326
2327 /* reset kdebug state for each process */
2328 if (kd_ctrl_page.kdebug_flags & (KDBG_PIDCHECK | KDBG_PIDEXCLUDE)) {
2329 proc_list_lock();
2330 proc_t p;
2331 ALLPROC_FOREACH(p) {
2332 p->p_kdebug = 0;
2333 }
2334 proc_list_unlock();
2335 }
2336
2337 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2338 kd_ctrl_page.kdebug_flags &= ~(KDBG_NOWRAP | KDBG_RANGECHECK | KDBG_VALCHECK);
2339 kd_ctrl_page.kdebug_flags &= ~(KDBG_PIDCHECK | KDBG_PIDEXCLUDE);
2340
2341 kd_ctrl_page.oldest_time = 0;
2342
2343 delete_buffers();
2344 nkdbufs = 0;
2345
2346 /* Clean up the thread map buffer */
2347 kdbg_clear_thread_map();
2348
2349 RAW_file_offset = 0;
2350 RAW_file_written = 0;
2351 }
2352
2353 void
2354 kdebug_reset(void)
2355 {
2356 ktrace_assert_lock_held();
2357
2358 kdbg_lock_init();
2359
2360 kdbg_clear();
2361 if (kdbg_typefilter) {
2362 typefilter_reject_all(kdbg_typefilter);
2363 typefilter_allow_class(kdbg_typefilter, DBG_TRACE);
2364 }
2365 }
2366
2367 void
2368 kdebug_free_early_buf(void)
2369 {
2370 #if !CONFIG_EMBEDDED
2371 /* Must be done with the buffer, so release it back to the VM.
2372 * On embedded targets this buffer is freed when the BOOTDATA segment is freed. */
2373 ml_static_mfree((vm_offset_t)&kd_early_buffer, sizeof(kd_early_buffer));
2374 #endif
2375 }
2376
2377 int
2378 kdbg_setpid(kd_regtype *kdr)
2379 {
2380 pid_t pid;
2381 int flag, ret = 0;
2382 struct proc *p;
2383
2384 pid = (pid_t)kdr->value1;
2385 flag = (int)kdr->value2;
2386
2387 if (pid >= 0) {
2388 if ((p = proc_find(pid)) == NULL) {
2389 ret = ESRCH;
2390 } else {
2391 if (flag == 1) {
2392 /*
2393 * turn on pid check for this and all pids
2394 */
2395 kd_ctrl_page.kdebug_flags |= KDBG_PIDCHECK;
2396 kd_ctrl_page.kdebug_flags &= ~KDBG_PIDEXCLUDE;
2397 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2398
2399 p->p_kdebug = 1;
2400 } else {
2401 /*
2402 * turn off pid check for this pid value
2403 * Don't turn off all pid checking though
2404 *
2405 * kd_ctrl_page.kdebug_flags &= ~KDBG_PIDCHECK;
2406 */
2407 p->p_kdebug = 0;
2408 }
2409 proc_rele(p);
2410 }
2411 } else {
2412 ret = EINVAL;
2413 }
2414
2415 return ret;
2416 }
2417
2418 /* This is for pid exclusion in the trace buffer */
2419 int
2420 kdbg_setpidex(kd_regtype *kdr)
2421 {
2422 pid_t pid;
2423 int flag, ret = 0;
2424 struct proc *p;
2425
2426 pid = (pid_t)kdr->value1;
2427 flag = (int)kdr->value2;
2428
2429 if (pid >= 0) {
2430 if ((p = proc_find(pid)) == NULL) {
2431 ret = ESRCH;
2432 } else {
2433 if (flag == 1) {
2434 /*
2435 * turn on pid exclusion
2436 */
2437 kd_ctrl_page.kdebug_flags |= KDBG_PIDEXCLUDE;
2438 kd_ctrl_page.kdebug_flags &= ~KDBG_PIDCHECK;
2439 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2440
2441 p->p_kdebug = 1;
2442 } else {
2443 /*
2444 * turn off pid exclusion for this pid value
2445 * Don't turn off all pid exclusion though
2446 *
2447 * kd_ctrl_page.kdebug_flags &= ~KDBG_PIDEXCLUDE;
2448 */
2449 p->p_kdebug = 0;
2450 }
2451 proc_rele(p);
2452 }
2453 } else {
2454 ret = EINVAL;
2455 }
2456
2457 return ret;
2458 }
2459
2460 /*
2461 * The following functions all operate on the "global" typefilter singleton.
2462 */
2463
2464 /*
2465 * The tf param is optional, you may pass either a valid typefilter or NULL.
2466 * If you pass a valid typefilter, you release ownership of that typefilter.
2467 */
2468 static int
2469 kdbg_initialize_typefilter(typefilter_t tf)
2470 {
2471 ktrace_assert_lock_held();
2472 assert(!kdbg_typefilter);
2473 assert(!kdbg_typefilter_memory_entry);
2474 typefilter_t deallocate_tf = NULL;
2475
2476 if (!tf && ((tf = deallocate_tf = typefilter_create()) == NULL)) {
2477 return ENOMEM;
2478 }
2479
2480 if ((kdbg_typefilter_memory_entry = typefilter_create_memory_entry(tf)) == MACH_PORT_NULL) {
2481 if (deallocate_tf) {
2482 typefilter_deallocate(deallocate_tf);
2483 }
2484 return ENOMEM;
2485 }
2486
2487 /*
2488 * The atomic store closes a race window with
2489 * the kdebug_typefilter syscall, which assumes
2490 * that any non-null kdbg_typefilter means a
2491 * valid memory_entry is available.
2492 */
2493 __c11_atomic_store(((_Atomic typefilter_t*)&kdbg_typefilter), tf, memory_order_release);
2494
2495 return KERN_SUCCESS;
2496 }
2497
2498 static int
2499 kdbg_copyin_typefilter(user_addr_t addr, size_t size)
2500 {
2501 int ret = ENOMEM;
2502 typefilter_t tf;
2503
2504 ktrace_assert_lock_held();
2505
2506 if (size != KDBG_TYPEFILTER_BITMAP_SIZE) {
2507 return EINVAL;
2508 }
2509
2510 if ((tf = typefilter_create())) {
2511 if ((ret = copyin(addr, tf, KDBG_TYPEFILTER_BITMAP_SIZE)) == 0) {
2512 /* The kernel typefilter must always allow DBG_TRACE */
2513 typefilter_allow_class(tf, DBG_TRACE);
2514
2515 /*
2516 * If this is the first typefilter; claim it.
2517 * Otherwise copy and deallocate.
2518 *
2519 * Allocating a typefilter for the copyin allows
2520 * the kernel to hold the invariant that DBG_TRACE
2521 * must always be allowed.
2522 */
2523 if (!kdbg_typefilter) {
2524 if ((ret = kdbg_initialize_typefilter(tf))) {
2525 return ret;
2526 }
2527 tf = NULL;
2528 } else {
2529 typefilter_copy(kdbg_typefilter, tf);
2530 }
2531
2532 kdbg_enable_typefilter();
2533 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_TYPEFILTER_CHANGED, kdbg_typefilter);
2534 }
2535
2536 if (tf) {
2537 typefilter_deallocate(tf);
2538 }
2539 }
2540
2541 return ret;
2542 }
2543
2544 /*
2545 * Enable the flags in the control page for the typefilter. Assumes that
2546 * kdbg_typefilter has already been allocated, so events being written
2547 * don't see a bad typefilter.
2548 */
2549 static void
2550 kdbg_enable_typefilter(void)
2551 {
2552 assert(kdbg_typefilter);
2553 kd_ctrl_page.kdebug_flags &= ~(KDBG_RANGECHECK | KDBG_VALCHECK);
2554 kd_ctrl_page.kdebug_flags |= KDBG_TYPEFILTER_CHECK;
2555 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2556 commpage_update_kdebug_state();
2557 }
2558
2559 /*
2560 * Disable the flags in the control page for the typefilter. The typefilter
2561 * may be safely deallocated shortly after this function returns.
2562 */
2563 static void
2564 kdbg_disable_typefilter(void)
2565 {
2566 bool notify_iops = kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK;
2567 kd_ctrl_page.kdebug_flags &= ~KDBG_TYPEFILTER_CHECK;
2568
2569 if ((kd_ctrl_page.kdebug_flags & (KDBG_PIDCHECK | KDBG_PIDEXCLUDE))) {
2570 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2571 } else {
2572 kdbg_set_flags(SLOW_CHECKS, 0, FALSE);
2573 }
2574 commpage_update_kdebug_state();
2575
2576 if (notify_iops) {
2577 /*
2578 * Notify IOPs that the typefilter will now allow everything.
2579 * Otherwise, they won't know a typefilter is no longer in
2580 * effect.
2581 */
2582 typefilter_allow_all(kdbg_typefilter);
2583 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops,
2584 KD_CALLBACK_TYPEFILTER_CHANGED, kdbg_typefilter);
2585 }
2586 }
2587
2588 uint32_t
2589 kdebug_commpage_state(void)
2590 {
2591 if (kdebug_enable) {
2592 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
2593 return KDEBUG_COMMPAGE_ENABLE_TYPEFILTER | KDEBUG_COMMPAGE_ENABLE_TRACE;
2594 }
2595
2596 return KDEBUG_COMMPAGE_ENABLE_TRACE;
2597 }
2598
2599 return 0;
2600 }
2601
2602 int
2603 kdbg_setreg(kd_regtype * kdr)
2604 {
2605 int ret = 0;
2606 unsigned int val_1, val_2, val;
2607 switch (kdr->type) {
2608 case KDBG_CLASSTYPE:
2609 val_1 = (kdr->value1 & 0xff);
2610 val_2 = (kdr->value2 & 0xff);
2611 kdlog_beg = (val_1 << 24);
2612 kdlog_end = (val_2 << 24);
2613 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2614 kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK; /* Turn off specific value check */
2615 kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_CLASSTYPE);
2616 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2617 break;
2618 case KDBG_SUBCLSTYPE:
2619 val_1 = (kdr->value1 & 0xff);
2620 val_2 = (kdr->value2 & 0xff);
2621 val = val_2 + 1;
2622 kdlog_beg = ((val_1 << 24) | (val_2 << 16));
2623 kdlog_end = ((val_1 << 24) | (val << 16));
2624 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2625 kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK; /* Turn off specific value check */
2626 kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_SUBCLSTYPE);
2627 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2628 break;
2629 case KDBG_RANGETYPE:
2630 kdlog_beg = (kdr->value1);
2631 kdlog_end = (kdr->value2);
2632 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2633 kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK; /* Turn off specific value check */
2634 kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_RANGETYPE);
2635 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2636 break;
2637 case KDBG_VALCHECK:
2638 kdlog_value1 = (kdr->value1);
2639 kdlog_value2 = (kdr->value2);
2640 kdlog_value3 = (kdr->value3);
2641 kdlog_value4 = (kdr->value4);
2642 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2643 kd_ctrl_page.kdebug_flags &= ~KDBG_RANGECHECK; /* Turn off range check */
2644 kd_ctrl_page.kdebug_flags |= KDBG_VALCHECK; /* Turn on specific value check */
2645 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2646 break;
2647 case KDBG_TYPENONE:
2648 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2649
2650 if ((kd_ctrl_page.kdebug_flags & (KDBG_RANGECHECK | KDBG_VALCHECK |
2651 KDBG_PIDCHECK | KDBG_PIDEXCLUDE |
2652 KDBG_TYPEFILTER_CHECK))) {
2653 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2654 } else {
2655 kdbg_set_flags(SLOW_CHECKS, 0, FALSE);
2656 }
2657
2658 kdlog_beg = 0;
2659 kdlog_end = 0;
2660 break;
2661 default:
2662 ret = EINVAL;
2663 break;
2664 }
2665 return ret;
2666 }
2667
2668 static int
2669 kdbg_write_to_vnode(caddr_t buffer, size_t size, vnode_t vp, vfs_context_t ctx, off_t file_offset)
2670 {
2671 return vn_rdwr(UIO_WRITE, vp, buffer, size, file_offset, UIO_SYSSPACE, IO_NODELOCKED | IO_UNIT,
2672 vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
2673 }
2674
2675 int
2676 kdbg_write_v3_chunk_header(user_addr_t buffer, uint32_t tag, uint32_t sub_tag, uint64_t length, vnode_t vp, vfs_context_t ctx)
2677 {
2678 int ret = KERN_SUCCESS;
2679 kd_chunk_header_v3 header = {
2680 .tag = tag,
2681 .sub_tag = sub_tag,
2682 .length = length,
2683 };
2684
2685 // Check that only one of them is valid
2686 assert(!buffer ^ !vp);
2687 assert((vp == NULL) || (ctx != NULL));
2688
2689 // Write the 8-byte future_chunk_timestamp field in the payload
2690 if (buffer || vp) {
2691 if (vp) {
2692 ret = kdbg_write_to_vnode((caddr_t)&header, sizeof(kd_chunk_header_v3), vp, ctx, RAW_file_offset);
2693 if (ret) {
2694 goto write_error;
2695 }
2696 RAW_file_offset += (sizeof(kd_chunk_header_v3));
2697 } else {
2698 ret = copyout(&header, buffer, sizeof(kd_chunk_header_v3));
2699 if (ret) {
2700 goto write_error;
2701 }
2702 }
2703 }
2704 write_error:
2705 return ret;
2706 }
2707
2708 int
2709 kdbg_write_v3_chunk_header_to_buffer(void * buffer, uint32_t tag, uint32_t sub_tag, uint64_t length)
2710 {
2711 kd_chunk_header_v3 header = {
2712 .tag = tag,
2713 .sub_tag = sub_tag,
2714 .length = length,
2715 };
2716
2717 if (!buffer) {
2718 return 0;
2719 }
2720
2721 memcpy(buffer, &header, sizeof(kd_chunk_header_v3));
2722
2723 return sizeof(kd_chunk_header_v3);
2724 }
2725
2726 int
2727 kdbg_write_v3_chunk_to_fd(uint32_t tag, uint32_t sub_tag, uint64_t length, void *payload, uint64_t payload_size, int fd)
2728 {
2729 proc_t p;
2730 struct vfs_context context;
2731 struct fileproc *fp;
2732 vnode_t vp;
2733 p = current_proc();
2734
2735 proc_fdlock(p);
2736 if ((fp_lookup(p, fd, &fp, 1))) {
2737 proc_fdunlock(p);
2738 return EFAULT;
2739 }
2740
2741 context.vc_thread = current_thread();
2742 context.vc_ucred = fp->f_fglob->fg_cred;
2743
2744 if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
2745 fp_drop(p, fd, fp, 1);
2746 proc_fdunlock(p);
2747 return EBADF;
2748 }
2749 vp = (struct vnode *) fp->f_fglob->fg_data;
2750 proc_fdunlock(p);
2751
2752 if ((vnode_getwithref(vp)) == 0) {
2753 RAW_file_offset = fp->f_fglob->fg_offset;
2754
2755 kd_chunk_header_v3 chunk_header = {
2756 .tag = tag,
2757 .sub_tag = sub_tag,
2758 .length = length,
2759 };
2760
2761 int ret = kdbg_write_to_vnode((caddr_t) &chunk_header, sizeof(kd_chunk_header_v3), vp, &context, RAW_file_offset);
2762 if (!ret) {
2763 RAW_file_offset += sizeof(kd_chunk_header_v3);
2764 }
2765
2766 ret = kdbg_write_to_vnode((caddr_t) payload, (size_t) payload_size, vp, &context, RAW_file_offset);
2767 if (!ret) {
2768 RAW_file_offset += payload_size;
2769 }
2770
2771 fp->f_fglob->fg_offset = RAW_file_offset;
2772 vnode_put(vp);
2773 }
2774
2775 fp_drop(p, fd, fp, 0);
2776 return KERN_SUCCESS;
2777 }
2778
2779 user_addr_t
2780 kdbg_write_v3_event_chunk_header(user_addr_t buffer, uint32_t tag, uint64_t length, vnode_t vp, vfs_context_t ctx)
2781 {
2782 uint64_t future_chunk_timestamp = 0;
2783 length += sizeof(uint64_t);
2784
2785 if (kdbg_write_v3_chunk_header(buffer, tag, V3_EVENT_DATA_VERSION, length, vp, ctx)) {
2786 return 0;
2787 }
2788 if (buffer) {
2789 buffer += sizeof(kd_chunk_header_v3);
2790 }
2791
2792 // Check that only one of them is valid
2793 assert(!buffer ^ !vp);
2794 assert((vp == NULL) || (ctx != NULL));
2795
2796 // Write the 8-byte future_chunk_timestamp field in the payload
2797 if (buffer || vp) {
2798 if (vp) {
2799 int ret = kdbg_write_to_vnode((caddr_t)&future_chunk_timestamp, sizeof(uint64_t), vp, ctx, RAW_file_offset);
2800 if (!ret) {
2801 RAW_file_offset += (sizeof(uint64_t));
2802 }
2803 } else {
2804 if (copyout(&future_chunk_timestamp, buffer, sizeof(uint64_t))) {
2805 return 0;
2806 }
2807 }
2808 }
2809
2810 return buffer + sizeof(uint64_t);
2811 }
2812
2813 int
2814 kdbg_write_v3_header(user_addr_t user_header, size_t *user_header_size, int fd)
2815 {
2816 int ret = KERN_SUCCESS;
2817
2818 uint8_t* cpumap = 0;
2819 uint32_t cpumap_size = 0;
2820 uint32_t thrmap_size = 0;
2821
2822 size_t bytes_needed = 0;
2823
2824 // Check that only one of them is valid
2825 assert(!user_header ^ !fd);
2826 assert(user_header_size);
2827
2828 if (!(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT)) {
2829 ret = EINVAL;
2830 goto bail;
2831 }
2832
2833 if (!(user_header || fd)) {
2834 ret = EINVAL;
2835 goto bail;
2836 }
2837
2838 // Initialize the cpu map
2839 ret = kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, &cpumap, &cpumap_size);
2840 if (ret != KERN_SUCCESS) {
2841 goto bail;
2842 }
2843
2844 // Check if a thread map is initialized
2845 if (!kd_mapptr) {
2846 ret = EINVAL;
2847 goto bail;
2848 }
2849 thrmap_size = kd_mapcount * sizeof(kd_threadmap);
2850
2851 mach_timebase_info_data_t timebase = {0, 0};
2852 clock_timebase_info(&timebase);
2853
2854 // Setup the header.
2855 // See v3 header description in sys/kdebug.h for more inforamtion.
2856 kd_header_v3 header = {
2857 .tag = RAW_VERSION3,
2858 .sub_tag = V3_HEADER_VERSION,
2859 .length = (sizeof(kd_header_v3) + cpumap_size - sizeof(kd_cpumap_header)),
2860 .timebase_numer = timebase.numer,
2861 .timebase_denom = timebase.denom,
2862 .timestamp = 0, /* FIXME rdar://problem/22053009 */
2863 .walltime_secs = 0,
2864 .walltime_usecs = 0,
2865 .timezone_minuteswest = 0,
2866 .timezone_dst = 0,
2867 #if defined(__LP64__)
2868 .flags = 1,
2869 #else
2870 .flags = 0,
2871 #endif
2872 };
2873
2874 // If its a buffer, check if we have enough space to copy the header and the maps.
2875 if (user_header) {
2876 bytes_needed = header.length + thrmap_size + (2 * sizeof(kd_chunk_header_v3));
2877 if (*user_header_size < bytes_needed) {
2878 ret = EINVAL;
2879 goto bail;
2880 }
2881 }
2882
2883 // Start writing the header
2884 if (fd) {
2885 void *hdr_ptr = (void *)(((uintptr_t) &header) + sizeof(kd_chunk_header_v3));
2886 size_t payload_size = (sizeof(kd_header_v3) - sizeof(kd_chunk_header_v3));
2887
2888 ret = kdbg_write_v3_chunk_to_fd(RAW_VERSION3, V3_HEADER_VERSION, header.length, hdr_ptr, payload_size, fd);
2889 if (ret) {
2890 goto bail;
2891 }
2892 } else {
2893 if (copyout(&header, user_header, sizeof(kd_header_v3))) {
2894 ret = EFAULT;
2895 goto bail;
2896 }
2897 // Update the user pointer
2898 user_header += sizeof(kd_header_v3);
2899 }
2900
2901 // Write a cpu map. This is a sub chunk of the header
2902 cpumap = (uint8_t*)((uintptr_t) cpumap + sizeof(kd_cpumap_header));
2903 size_t payload_size = (size_t)(cpumap_size - sizeof(kd_cpumap_header));
2904 if (fd) {
2905 ret = kdbg_write_v3_chunk_to_fd(V3_CPU_MAP, V3_CPUMAP_VERSION, payload_size, (void *)cpumap, payload_size, fd);
2906 if (ret) {
2907 goto bail;
2908 }
2909 } else {
2910 ret = kdbg_write_v3_chunk_header(user_header, V3_CPU_MAP, V3_CPUMAP_VERSION, payload_size, NULL, NULL);
2911 if (ret) {
2912 goto bail;
2913 }
2914 user_header += sizeof(kd_chunk_header_v3);
2915 if (copyout(cpumap, user_header, payload_size)) {
2916 ret = EFAULT;
2917 goto bail;
2918 }
2919 // Update the user pointer
2920 user_header += payload_size;
2921 }
2922
2923 // Write a thread map
2924 if (fd) {
2925 ret = kdbg_write_v3_chunk_to_fd(V3_THREAD_MAP, V3_THRMAP_VERSION, thrmap_size, (void *)kd_mapptr, thrmap_size, fd);
2926 if (ret) {
2927 goto bail;
2928 }
2929 } else {
2930 ret = kdbg_write_v3_chunk_header(user_header, V3_THREAD_MAP, V3_THRMAP_VERSION, thrmap_size, NULL, NULL);
2931 if (ret) {
2932 goto bail;
2933 }
2934 user_header += sizeof(kd_chunk_header_v3);
2935 if (copyout(kd_mapptr, user_header, thrmap_size)) {
2936 ret = EFAULT;
2937 goto bail;
2938 }
2939 user_header += thrmap_size;
2940 }
2941
2942 if (fd) {
2943 RAW_file_written += bytes_needed;
2944 }
2945
2946 *user_header_size = bytes_needed;
2947 bail:
2948 if (cpumap) {
2949 kmem_free(kernel_map, (vm_offset_t)cpumap, cpumap_size);
2950 }
2951 return ret;
2952 }
2953
2954 int
2955 kdbg_readcpumap(user_addr_t user_cpumap, size_t *user_cpumap_size)
2956 {
2957 uint8_t* cpumap = NULL;
2958 uint32_t cpumap_size = 0;
2959 int ret = KERN_SUCCESS;
2960
2961 if (kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) {
2962 if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, &cpumap, &cpumap_size) == KERN_SUCCESS) {
2963 if (user_cpumap) {
2964 size_t bytes_to_copy = (*user_cpumap_size >= cpumap_size) ? cpumap_size : *user_cpumap_size;
2965 if (copyout(cpumap, user_cpumap, (size_t)bytes_to_copy)) {
2966 ret = EFAULT;
2967 }
2968 }
2969 *user_cpumap_size = cpumap_size;
2970 kmem_free(kernel_map, (vm_offset_t)cpumap, cpumap_size);
2971 } else {
2972 ret = EINVAL;
2973 }
2974 } else {
2975 ret = EINVAL;
2976 }
2977
2978 return ret;
2979 }
2980
2981 int
2982 kdbg_readcurthrmap(user_addr_t buffer, size_t *bufsize)
2983 {
2984 kd_threadmap *mapptr;
2985 unsigned int mapsize;
2986 unsigned int mapcount;
2987 unsigned int count = 0;
2988 int ret = 0;
2989
2990 count = *bufsize / sizeof(kd_threadmap);
2991 *bufsize = 0;
2992
2993 if ((mapptr = kdbg_thrmap_init_internal(count, &mapsize, &mapcount))) {
2994 if (copyout(mapptr, buffer, mapcount * sizeof(kd_threadmap))) {
2995 ret = EFAULT;
2996 } else {
2997 *bufsize = (mapcount * sizeof(kd_threadmap));
2998 }
2999
3000 kmem_free(kernel_map, (vm_offset_t)mapptr, mapsize);
3001 } else {
3002 ret = EINVAL;
3003 }
3004
3005 return ret;
3006 }
3007
3008 static int
3009 kdbg_write_v1_header(boolean_t write_thread_map, vnode_t vp, vfs_context_t ctx)
3010 {
3011 int ret = 0;
3012 RAW_header header;
3013 clock_sec_t secs;
3014 clock_usec_t usecs;
3015 char *pad_buf;
3016 uint32_t pad_size;
3017 uint32_t extra_thread_count = 0;
3018 uint32_t cpumap_size;
3019 size_t map_size = 0;
3020 size_t map_count = 0;
3021
3022 if (write_thread_map) {
3023 assert(kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
3024 map_count = kd_mapcount;
3025 map_size = map_count * sizeof(kd_threadmap);
3026 }
3027
3028 /*
3029 * Without the buffers initialized, we cannot construct a CPU map or a
3030 * thread map, and cannot write a header.
3031 */
3032 if (!(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT)) {
3033 return EINVAL;
3034 }
3035
3036 /*
3037 * To write a RAW_VERSION1+ file, we must embed a cpumap in the
3038 * "padding" used to page align the events following the threadmap. If
3039 * the threadmap happens to not require enough padding, we artificially
3040 * increase its footprint until it needs enough padding.
3041 */
3042
3043 assert(vp);
3044 assert(ctx);
3045
3046 pad_size = PAGE_16KB - ((sizeof(RAW_header) + map_size) & PAGE_MASK_64);
3047 cpumap_size = sizeof(kd_cpumap_header) + kd_ctrl_page.kdebug_cpus * sizeof(kd_cpumap);
3048
3049 if (cpumap_size > pad_size) {
3050 /* If the cpu map doesn't fit in the current available pad_size,
3051 * we increase the pad_size by 16K. We do this so that the event
3052 * data is always available on a page aligned boundary for both
3053 * 4k and 16k systems. We enforce this alignment for the event
3054 * data so that we can take advantage of optimized file/disk writes.
3055 */
3056 pad_size += PAGE_16KB;
3057 }
3058
3059 /* The way we are silently embedding a cpumap in the "padding" is by artificially
3060 * increasing the number of thread entries. However, we'll also need to ensure that
3061 * the cpumap is embedded in the last 4K page before when the event data is expected.
3062 * This way the tools can read the data starting the next page boundary on both
3063 * 4K and 16K systems preserving compatibility with older versions of the tools
3064 */
3065 if (pad_size > PAGE_4KB) {
3066 pad_size -= PAGE_4KB;
3067 extra_thread_count = (pad_size / sizeof(kd_threadmap)) + 1;
3068 }
3069
3070 memset(&header, 0, sizeof(header));
3071 header.version_no = RAW_VERSION1;
3072 header.thread_count = map_count + extra_thread_count;
3073
3074 clock_get_calendar_microtime(&secs, &usecs);
3075 header.TOD_secs = secs;
3076 header.TOD_usecs = usecs;
3077
3078 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)&header, sizeof(RAW_header), RAW_file_offset,
3079 UIO_SYSSPACE, IO_NODELOCKED | IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
3080 if (ret) {
3081 goto write_error;
3082 }
3083 RAW_file_offset += sizeof(RAW_header);
3084 RAW_file_written += sizeof(RAW_header);
3085
3086 if (write_thread_map) {
3087 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)kd_mapptr, map_size, RAW_file_offset,
3088 UIO_SYSSPACE, IO_NODELOCKED | IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
3089 if (ret) {
3090 goto write_error;
3091 }
3092
3093 RAW_file_offset += map_size;
3094 RAW_file_written += map_size;
3095 }
3096
3097 if (extra_thread_count) {
3098 pad_size = extra_thread_count * sizeof(kd_threadmap);
3099 pad_buf = kalloc(pad_size);
3100 if (!pad_buf) {
3101 ret = ENOMEM;
3102 goto write_error;
3103 }
3104 memset(pad_buf, 0, pad_size);
3105
3106 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset,
3107 UIO_SYSSPACE, IO_NODELOCKED | IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
3108 kfree(pad_buf, pad_size);
3109 if (ret) {
3110 goto write_error;
3111 }
3112
3113 RAW_file_offset += pad_size;
3114 RAW_file_written += pad_size;
3115 }
3116
3117 pad_size = PAGE_SIZE - (RAW_file_offset & PAGE_MASK_64);
3118 if (pad_size) {
3119 pad_buf = (char *)kalloc(pad_size);
3120 if (!pad_buf) {
3121 ret = ENOMEM;
3122 goto write_error;
3123 }
3124 memset(pad_buf, 0, pad_size);
3125
3126 /*
3127 * embed a cpumap in the padding bytes.
3128 * older code will skip this.
3129 * newer code will know how to read it.
3130 */
3131 uint32_t temp = pad_size;
3132 if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, (uint8_t**)&pad_buf, &temp) != KERN_SUCCESS) {
3133 memset(pad_buf, 0, pad_size);
3134 }
3135
3136 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset,
3137 UIO_SYSSPACE, IO_NODELOCKED | IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
3138 kfree(pad_buf, pad_size);
3139 if (ret) {
3140 goto write_error;
3141 }
3142
3143 RAW_file_offset += pad_size;
3144 RAW_file_written += pad_size;
3145 }
3146
3147 write_error:
3148 return ret;
3149 }
3150
3151 static void
3152 kdbg_clear_thread_map(void)
3153 {
3154 ktrace_assert_lock_held();
3155
3156 if (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) {
3157 assert(kd_mapptr != NULL);
3158 kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize);
3159 kd_mapptr = NULL;
3160 kd_mapsize = 0;
3161 kd_mapcount = 0;
3162 kd_ctrl_page.kdebug_flags &= ~KDBG_MAPINIT;
3163 }
3164 }
3165
3166 /*
3167 * Write out a version 1 header and the thread map, if it is initialized, to a
3168 * vnode. Used by KDWRITEMAP and kdbg_dump_trace_to_file.
3169 *
3170 * Returns write errors from vn_rdwr if a write fails. Returns ENODATA if the
3171 * thread map has not been initialized, but the header will still be written.
3172 * Returns ENOMEM if padding could not be allocated. Returns 0 otherwise.
3173 */
3174 static int
3175 kdbg_write_thread_map(vnode_t vp, vfs_context_t ctx)
3176 {
3177 int ret = 0;
3178 boolean_t map_initialized;
3179
3180 ktrace_assert_lock_held();
3181 assert(ctx != NULL);
3182
3183 map_initialized = (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
3184
3185 ret = kdbg_write_v1_header(map_initialized, vp, ctx);
3186 if (ret == 0) {
3187 if (map_initialized) {
3188 kdbg_clear_thread_map();
3189 } else {
3190 ret = ENODATA;
3191 }
3192 }
3193
3194 return ret;
3195 }
3196
3197 /*
3198 * Copy out the thread map to a user space buffer. Used by KDTHRMAP.
3199 *
3200 * Returns copyout errors if the copyout fails. Returns ENODATA if the thread
3201 * map has not been initialized. Returns EINVAL if the buffer provided is not
3202 * large enough for the entire thread map. Returns 0 otherwise.
3203 */
3204 static int
3205 kdbg_copyout_thread_map(user_addr_t buffer, size_t *buffer_size)
3206 {
3207 boolean_t map_initialized;
3208 size_t map_size;
3209 int ret = 0;
3210
3211 ktrace_assert_lock_held();
3212 assert(buffer_size != NULL);
3213
3214 map_initialized = (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
3215 if (!map_initialized) {
3216 return ENODATA;
3217 }
3218
3219 map_size = kd_mapcount * sizeof(kd_threadmap);
3220 if (*buffer_size < map_size) {
3221 return EINVAL;
3222 }
3223
3224 ret = copyout(kd_mapptr, buffer, map_size);
3225 if (ret == 0) {
3226 kdbg_clear_thread_map();
3227 }
3228
3229 return ret;
3230 }
3231
3232 int
3233 kdbg_readthrmap_v3(user_addr_t buffer, size_t buffer_size, int fd)
3234 {
3235 int ret = 0;
3236 boolean_t map_initialized;
3237 size_t map_size;
3238
3239 ktrace_assert_lock_held();
3240
3241 if ((!fd && !buffer) || (fd && buffer)) {
3242 return EINVAL;
3243 }
3244
3245 map_initialized = (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
3246 map_size = kd_mapcount * sizeof(kd_threadmap);
3247
3248 if (map_initialized && (buffer_size >= map_size)) {
3249 ret = kdbg_write_v3_header(buffer, &buffer_size, fd);
3250
3251 if (ret == 0) {
3252 kdbg_clear_thread_map();
3253 }
3254 } else {
3255 ret = EINVAL;
3256 }
3257
3258 return ret;
3259 }
3260
3261 static void
3262 kdbg_set_nkdbufs(unsigned int req_nkdbufs)
3263 {
3264 /*
3265 * Only allow allocation up to half the available memory (sane_size).
3266 */
3267 uint64_t max_nkdbufs = (sane_size / 2) / sizeof(kd_buf);
3268 nkdbufs = (req_nkdbufs > max_nkdbufs) ? max_nkdbufs : req_nkdbufs;
3269 }
3270
3271 /*
3272 * Block until there are `n_storage_threshold` storage units filled with
3273 * events or `timeout_ms` milliseconds have passed. If `locked_wait` is true,
3274 * `ktrace_lock` is held while waiting. This is necessary while waiting to
3275 * write events out of the buffers.
3276 *
3277 * Returns true if the threshold was reached and false otherwise.
3278 *
3279 * Called with `ktrace_lock` locked and interrupts enabled.
3280 */
3281 static boolean_t
3282 kdbg_wait(uint64_t timeout_ms, boolean_t locked_wait)
3283 {
3284 int wait_result = THREAD_AWAKENED;
3285 uint64_t abstime = 0;
3286
3287 ktrace_assert_lock_held();
3288
3289 if (timeout_ms != 0) {
3290 uint64_t ns = timeout_ms * NSEC_PER_MSEC;
3291 nanoseconds_to_absolutetime(ns, &abstime);
3292 clock_absolutetime_interval_to_deadline(abstime, &abstime);
3293 }
3294
3295 boolean_t s = ml_set_interrupts_enabled(FALSE);
3296 if (!s) {
3297 panic("kdbg_wait() called with interrupts disabled");
3298 }
3299 lck_spin_lock_grp(kdw_spin_lock, kdebug_lck_grp);
3300
3301 if (!locked_wait) {
3302 /* drop the mutex to allow others to access trace */
3303 ktrace_unlock();
3304 }
3305
3306 while (wait_result == THREAD_AWAKENED &&
3307 kd_ctrl_page.kds_inuse_count < n_storage_threshold) {
3308 kds_waiter = 1;
3309
3310 if (abstime) {
3311 wait_result = lck_spin_sleep_deadline(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE, abstime);
3312 } else {
3313 wait_result = lck_spin_sleep(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE);
3314 }
3315
3316 kds_waiter = 0;
3317 }
3318
3319 /* check the count under the spinlock */
3320 boolean_t threshold_exceeded = (kd_ctrl_page.kds_inuse_count >= n_storage_threshold);
3321
3322 lck_spin_unlock(kdw_spin_lock);
3323 ml_set_interrupts_enabled(s);
3324
3325 if (!locked_wait) {
3326 /* pick the mutex back up again */
3327 ktrace_lock();
3328 }
3329
3330 /* write out whether we've exceeded the threshold */
3331 return threshold_exceeded;
3332 }
3333
3334 /*
3335 * Wakeup a thread waiting using `kdbg_wait` if there are at least
3336 * `n_storage_threshold` storage units in use.
3337 */
3338 static void
3339 kdbg_wakeup(void)
3340 {
3341 boolean_t need_kds_wakeup = FALSE;
3342
3343 /*
3344 * Try to take the lock here to synchronize with the waiter entering
3345 * the blocked state. Use the try mode to prevent deadlocks caused by
3346 * re-entering this routine due to various trace points triggered in the
3347 * lck_spin_sleep_xxxx routines used to actually enter one of our 2 wait
3348 * conditions. No problem if we fail, there will be lots of additional
3349 * events coming in that will eventually succeed in grabbing this lock.
3350 */
3351 boolean_t s = ml_set_interrupts_enabled(FALSE);
3352
3353 if (lck_spin_try_lock(kdw_spin_lock)) {
3354 if (kds_waiter &&
3355 (kd_ctrl_page.kds_inuse_count >= n_storage_threshold)) {
3356 kds_waiter = 0;
3357 need_kds_wakeup = TRUE;
3358 }
3359 lck_spin_unlock(kdw_spin_lock);
3360 }
3361
3362 ml_set_interrupts_enabled(s);
3363
3364 if (need_kds_wakeup == TRUE) {
3365 wakeup(&kds_waiter);
3366 }
3367 }
3368
3369 int
3370 kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
3371 {
3372 int ret = 0;
3373 size_t size = *sizep;
3374 unsigned int value = 0;
3375 kd_regtype kd_Reg;
3376 kbufinfo_t kd_bufinfo;
3377 proc_t p;
3378
3379 if (name[0] == KERN_KDWRITETR ||
3380 name[0] == KERN_KDWRITETR_V3 ||
3381 name[0] == KERN_KDWRITEMAP ||
3382 name[0] == KERN_KDWRITEMAP_V3 ||
3383 name[0] == KERN_KDEFLAGS ||
3384 name[0] == KERN_KDDFLAGS ||
3385 name[0] == KERN_KDENABLE ||
3386 name[0] == KERN_KDSETBUF) {
3387 if (namelen < 2) {
3388 return EINVAL;
3389 }
3390 value = name[1];
3391 }
3392
3393 kdbg_lock_init();
3394 assert(kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT);
3395
3396 ktrace_lock();
3397
3398 /*
3399 * Some requests only require "read" access to kdebug trace. Regardless,
3400 * tell ktrace that a configuration or read is occurring (and see if it's
3401 * allowed).
3402 */
3403 if (name[0] != KERN_KDGETBUF &&
3404 name[0] != KERN_KDGETREG &&
3405 name[0] != KERN_KDREADCURTHRMAP) {
3406 if ((ret = ktrace_configure(KTRACE_KDEBUG))) {
3407 goto out;
3408 }
3409 } else {
3410 if ((ret = ktrace_read_check())) {
3411 goto out;
3412 }
3413 }
3414
3415 switch (name[0]) {
3416 case KERN_KDGETBUF:
3417 if (size < sizeof(kd_bufinfo.nkdbufs)) {
3418 /*
3419 * There is not enough room to return even
3420 * the first element of the info structure.
3421 */
3422 ret = EINVAL;
3423 break;
3424 }
3425
3426 memset(&kd_bufinfo, 0, sizeof(kd_bufinfo));
3427
3428 kd_bufinfo.nkdbufs = nkdbufs;
3429 kd_bufinfo.nkdthreads = kd_mapcount;
3430
3431 if ((kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG)) {
3432 kd_bufinfo.nolog = 1;
3433 } else {
3434 kd_bufinfo.nolog = 0;
3435 }
3436
3437 kd_bufinfo.flags = kd_ctrl_page.kdebug_flags;
3438 #if defined(__LP64__)
3439 kd_bufinfo.flags |= KDBG_LP64;
3440 #endif
3441 {
3442 int pid = ktrace_get_owning_pid();
3443 kd_bufinfo.bufid = (pid == 0 ? -1 : pid);
3444 }
3445
3446 if (size >= sizeof(kd_bufinfo)) {
3447 /*
3448 * Provide all the info we have
3449 */
3450 if (copyout(&kd_bufinfo, where, sizeof(kd_bufinfo))) {
3451 ret = EINVAL;
3452 }
3453 } else {
3454 /*
3455 * For backwards compatibility, only provide
3456 * as much info as there is room for.
3457 */
3458 if (copyout(&kd_bufinfo, where, size)) {
3459 ret = EINVAL;
3460 }
3461 }
3462 break;
3463
3464 case KERN_KDREADCURTHRMAP:
3465 ret = kdbg_readcurthrmap(where, sizep);
3466 break;
3467
3468 case KERN_KDEFLAGS:
3469 value &= KDBG_USERFLAGS;
3470 kd_ctrl_page.kdebug_flags |= value;
3471 break;
3472
3473 case KERN_KDDFLAGS:
3474 value &= KDBG_USERFLAGS;
3475 kd_ctrl_page.kdebug_flags &= ~value;
3476 break;
3477
3478 case KERN_KDENABLE:
3479 /*
3480 * Enable tracing mechanism. Two types:
3481 * KDEBUG_TRACE is the standard one,
3482 * and KDEBUG_PPT which is a carefully
3483 * chosen subset to avoid performance impact.
3484 */
3485 if (value) {
3486 /*
3487 * enable only if buffer is initialized
3488 */
3489 if (!(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) ||
3490 !(value == KDEBUG_ENABLE_TRACE || value == KDEBUG_ENABLE_PPT)) {
3491 ret = EINVAL;
3492 break;
3493 }
3494 kdbg_thrmap_init();
3495
3496 kdbg_set_tracing_enabled(TRUE, value);
3497 } else {
3498 if (!kdebug_enable) {
3499 break;
3500 }
3501
3502 kernel_debug_disable();
3503 }
3504 break;
3505
3506 case KERN_KDSETBUF:
3507 kdbg_set_nkdbufs(value);
3508 break;
3509
3510 case KERN_KDSETUP:
3511 ret = kdbg_reinit(FALSE);
3512 break;
3513
3514 case KERN_KDREMOVE:
3515 ktrace_reset(KTRACE_KDEBUG);
3516 break;
3517
3518 case KERN_KDSETREG:
3519 if (size < sizeof(kd_regtype)) {
3520 ret = EINVAL;
3521 break;
3522 }
3523 if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
3524 ret = EINVAL;
3525 break;
3526 }
3527
3528 ret = kdbg_setreg(&kd_Reg);
3529 break;
3530
3531 case KERN_KDGETREG:
3532 ret = EINVAL;
3533 break;
3534
3535 case KERN_KDREADTR:
3536 ret = kdbg_read(where, sizep, NULL, NULL, RAW_VERSION1);
3537 break;
3538
3539 case KERN_KDWRITETR:
3540 case KERN_KDWRITETR_V3:
3541 case KERN_KDWRITEMAP:
3542 case KERN_KDWRITEMAP_V3:
3543 {
3544 struct vfs_context context;
3545 struct fileproc *fp;
3546 size_t number;
3547 vnode_t vp;
3548 int fd;
3549
3550 if (name[0] == KERN_KDWRITETR || name[0] == KERN_KDWRITETR_V3) {
3551 (void)kdbg_wait(size, TRUE);
3552 }
3553 p = current_proc();
3554 fd = value;
3555
3556 proc_fdlock(p);
3557 if ((ret = fp_lookup(p, fd, &fp, 1))) {
3558 proc_fdunlock(p);
3559 break;
3560 }
3561 context.vc_thread = current_thread();
3562 context.vc_ucred = fp->f_fglob->fg_cred;
3563
3564 if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
3565 fp_drop(p, fd, fp, 1);
3566 proc_fdunlock(p);
3567
3568 ret = EBADF;
3569 break;
3570 }
3571 vp = (struct vnode *)fp->f_fglob->fg_data;
3572 proc_fdunlock(p);
3573
3574 if ((ret = vnode_getwithref(vp)) == 0) {
3575 RAW_file_offset = fp->f_fglob->fg_offset;
3576 if (name[0] == KERN_KDWRITETR || name[0] == KERN_KDWRITETR_V3) {
3577 number = nkdbufs * sizeof(kd_buf);
3578
3579 KDBG_RELEASE(TRACE_WRITING_EVENTS | DBG_FUNC_START);
3580 if (name[0] == KERN_KDWRITETR_V3) {
3581 ret = kdbg_read(0, &number, vp, &context, RAW_VERSION3);
3582 } else {
3583 ret = kdbg_read(0, &number, vp, &context, RAW_VERSION1);
3584 }
3585 KDBG_RELEASE(TRACE_WRITING_EVENTS | DBG_FUNC_END, number);
3586
3587 *sizep = number;
3588 } else {
3589 number = kd_mapcount * sizeof(kd_threadmap);
3590 if (name[0] == KERN_KDWRITEMAP_V3) {
3591 ret = kdbg_readthrmap_v3(0, number, fd);
3592 } else {
3593 ret = kdbg_write_thread_map(vp, &context);
3594 }
3595 }
3596 fp->f_fglob->fg_offset = RAW_file_offset;
3597 vnode_put(vp);
3598 }
3599 fp_drop(p, fd, fp, 0);
3600
3601 break;
3602 }
3603 case KERN_KDBUFWAIT:
3604 *sizep = kdbg_wait(size, FALSE);
3605 break;
3606
3607 case KERN_KDPIDTR:
3608 if (size < sizeof(kd_regtype)) {
3609 ret = EINVAL;
3610 break;
3611 }
3612 if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
3613 ret = EINVAL;
3614 break;
3615 }
3616
3617 ret = kdbg_setpid(&kd_Reg);
3618 break;
3619
3620 case KERN_KDPIDEX:
3621 if (size < sizeof(kd_regtype)) {
3622 ret = EINVAL;
3623 break;
3624 }
3625 if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
3626 ret = EINVAL;
3627 break;
3628 }
3629
3630 ret = kdbg_setpidex(&kd_Reg);
3631 break;
3632
3633 case KERN_KDCPUMAP:
3634 ret = kdbg_readcpumap(where, sizep);
3635 break;
3636
3637 case KERN_KDTHRMAP:
3638 ret = kdbg_copyout_thread_map(where, sizep);
3639 break;
3640
3641 case KERN_KDSET_TYPEFILTER: {
3642 ret = kdbg_copyin_typefilter(where, size);
3643 break;
3644 }
3645
3646 case KERN_KDTEST:
3647 ret = kdbg_test(size);
3648 break;
3649
3650 default:
3651 ret = EINVAL;
3652 break;
3653 }
3654 out:
3655 ktrace_unlock();
3656
3657 return ret;
3658 }
3659
3660
3661 /*
3662 * This code can run for the most part concurrently with kernel_debug_internal()...
3663 * 'release_storage_unit' will take the kds_spin_lock which may cause us to briefly
3664 * synchronize with the recording side of this puzzle... otherwise, we are able to
3665 * move through the lists w/o use of any locks
3666 */
3667 int
3668 kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx, uint32_t file_version)
3669 {
3670 unsigned int count;
3671 unsigned int cpu, min_cpu;
3672 uint64_t barrier_min = 0, barrier_max = 0, t, earliest_time;
3673 int error = 0;
3674 kd_buf *tempbuf;
3675 uint32_t rcursor;
3676 kd_buf lostevent;
3677 union kds_ptr kdsp;
3678 bool traced_retrograde = false;
3679 struct kd_storage *kdsp_actual;
3680 struct kd_bufinfo *kdbp;
3681 struct kd_bufinfo *min_kdbp;
3682 uint32_t tempbuf_count;
3683 uint32_t tempbuf_number;
3684 uint32_t old_kdebug_flags;
3685 uint32_t old_kdebug_slowcheck;
3686 boolean_t out_of_events = FALSE;
3687 boolean_t wrapped = FALSE;
3688
3689 assert(number);
3690 count = *number / sizeof(kd_buf);
3691 *number = 0;
3692
3693 ktrace_assert_lock_held();
3694
3695 if (count == 0 || !(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) || kdcopybuf == 0) {
3696 return EINVAL;
3697 }
3698
3699 thread_set_eager_preempt(current_thread());
3700
3701 memset(&lostevent, 0, sizeof(lostevent));
3702 lostevent.debugid = TRACE_LOST_EVENTS;
3703
3704 /*
3705 * Capture the current time. Only sort events that have occured
3706 * before now. Since the IOPs are being flushed here, it is possible
3707 * that events occur on the AP while running live tracing. If we are
3708 * disabled, no new events should occur on the AP.
3709 */
3710 if (kd_ctrl_page.enabled) {
3711 barrier_max = kdbg_timestamp() & KDBG_TIMESTAMP_MASK;
3712 }
3713
3714 /*
3715 * Request each IOP to provide us with up to date entries before merging
3716 * buffers together.
3717 */
3718 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_SYNC_FLUSH, NULL);
3719
3720 /*
3721 * Disable wrap so storage units cannot be stolen out from underneath us
3722 * while merging events.
3723 *
3724 * Because we hold ktrace_lock, no other control threads can be playing
3725 * with kdebug_flags. The code that emits new events could be running,
3726 * but it grabs kds_spin_lock if it needs to acquire a new storage
3727 * chunk, which is where it examines kdebug_flags. If it is adding to
3728 * the same chunk we're reading from, check for that below.
3729 */
3730 wrapped = disable_wrap(&old_kdebug_slowcheck, &old_kdebug_flags);
3731
3732 if (count > nkdbufs) {
3733 count = nkdbufs;
3734 }
3735
3736 if ((tempbuf_count = count) > KDCOPYBUF_COUNT) {
3737 tempbuf_count = KDCOPYBUF_COUNT;
3738 }
3739
3740 /*
3741 * If the buffers have wrapped, do not emit additional lost events for the
3742 * oldest storage units.
3743 */
3744 if (wrapped) {
3745 kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
3746
3747 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page.kdebug_cpus; cpu++, kdbp++) {
3748 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
3749 continue;
3750 }
3751 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3752 kdsp_actual->kds_lostevents = FALSE;
3753 }
3754 }
3755 /*
3756 * Capture the earliest time where there are events for all CPUs and don't
3757 * emit events with timestamps prior.
3758 */
3759 barrier_min = kd_ctrl_page.oldest_time;
3760
3761 while (count) {
3762 tempbuf = kdcopybuf;
3763 tempbuf_number = 0;
3764
3765 if (wrapped) {
3766 /*
3767 * Emit a lost events tracepoint to indicate that previous events
3768 * were lost -- the thread map cannot be trusted. A new one must
3769 * be taken so tools can analyze the trace in a backwards-facing
3770 * fashion.
3771 */
3772 kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, 0);
3773 *tempbuf = lostevent;
3774 wrapped = FALSE;
3775 goto nextevent;
3776 }
3777
3778 /* While space left in merged events scratch buffer. */
3779 while (tempbuf_count) {
3780 bool lostevents = false;
3781 int lostcpu = 0;
3782 earliest_time = UINT64_MAX;
3783 min_kdbp = NULL;
3784 min_cpu = 0;
3785
3786 /* Check each CPU's buffers for the earliest event. */
3787 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page.kdebug_cpus; cpu++, kdbp++) {
3788 /* Skip CPUs without data in their oldest storage unit. */
3789 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
3790 next_cpu:
3791 continue;
3792 }
3793 /* From CPU data to buffer header to buffer. */
3794 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3795
3796 next_event:
3797 /* The next event to be read from this buffer. */
3798 rcursor = kdsp_actual->kds_readlast;
3799
3800 /* Skip this buffer if there are no events left. */
3801 if (rcursor == kdsp_actual->kds_bufindx) {
3802 continue;
3803 }
3804
3805 /*
3806 * Check that this storage unit wasn't stolen and events were
3807 * lost. This must have happened while wrapping was disabled
3808 * in this function.
3809 */
3810 if (kdsp_actual->kds_lostevents) {
3811 lostevents = true;
3812 kdsp_actual->kds_lostevents = FALSE;
3813
3814 /*
3815 * The earliest event we can trust is the first one in this
3816 * stolen storage unit.
3817 */
3818 uint64_t lost_time =
3819 kdbg_get_timestamp(&kdsp_actual->kds_records[0]);
3820 if (kd_ctrl_page.oldest_time < lost_time) {
3821 /*
3822 * If this is the first time we've seen lost events for
3823 * this gap, record its timestamp as the oldest
3824 * timestamp we're willing to merge for the lost events
3825 * tracepoint.
3826 */
3827 kd_ctrl_page.oldest_time = barrier_min = lost_time;
3828 lostcpu = cpu;
3829 }
3830 }
3831
3832 t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
3833
3834 if ((t > barrier_max) && (barrier_max > 0)) {
3835 if (kdbg_debug) {
3836 printf("kdebug: FUTURE EVENT: debugid %#8x: "
3837 "time %lld from CPU %u "
3838 "(barrier at time %lld, read %lu events)\n",
3839 kdsp_actual->kds_records[rcursor].debugid,
3840 t, cpu, barrier_max, *number + tempbuf_number);
3841 }
3842 /*
3843 * Need to flush IOPs again before we can sort any more
3844 * data from the buffers.
3845 */
3846 out_of_events = TRUE;
3847 break;
3848 }
3849 if (t < kdsp_actual->kds_timestamp) {
3850 /*
3851 * This indicates the event emitter hasn't completed
3852 * filling in the event (becuase we're looking at the
3853 * buffer that the record head is using). The max barrier
3854 * timestamp should have saved us from seeing these kinds
3855 * of things, but other CPUs might be slow on the up-take.
3856 *
3857 * Bail out so we don't get out-of-order events by
3858 * continuing to read events from other CPUs' events.
3859 */
3860 out_of_events = TRUE;
3861 break;
3862 }
3863
3864 /*
3865 * Ignore events that have aged out due to wrapping or storage
3866 * unit exhaustion while merging events.
3867 */
3868 if (t < barrier_min) {
3869 kdsp_actual->kds_readlast++;
3870
3871 if (kdsp_actual->kds_readlast >= EVENTS_PER_STORAGE_UNIT) {
3872 release_storage_unit(cpu, kdsp.raw);
3873
3874 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
3875 goto next_cpu;
3876 }
3877 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3878 }
3879
3880 goto next_event;
3881 }
3882
3883 /*
3884 * Don't worry about merging any events -- just walk through
3885 * the CPUs and find the latest timestamp of lost events.
3886 */
3887 if (lostevents) {
3888 continue;
3889 }
3890
3891 if (t < earliest_time) {
3892 earliest_time = t;
3893 min_kdbp = kdbp;
3894 min_cpu = cpu;
3895 }
3896 }
3897 if (lostevents) {
3898 /*
3899 * If any lost events were hit in the buffers, emit an event
3900 * with the latest timestamp.
3901 */
3902 kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, lostcpu);
3903 *tempbuf = lostevent;
3904 tempbuf->arg1 = 1;
3905 goto nextevent;
3906 }
3907 if (min_kdbp == NULL) {
3908 /* All buffers ran empty. */
3909 out_of_events = TRUE;
3910 }
3911 if (out_of_events) {
3912 break;
3913 }
3914
3915 kdsp = min_kdbp->kd_list_head;
3916 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3917
3918 /* Copy earliest event into merged events scratch buffer. */
3919 *tempbuf = kdsp_actual->kds_records[kdsp_actual->kds_readlast++];
3920
3921 if (kdsp_actual->kds_readlast == EVENTS_PER_STORAGE_UNIT) {
3922 release_storage_unit(min_cpu, kdsp.raw);
3923 }
3924
3925 /*
3926 * Watch for out of order timestamps (from IOPs).
3927 */
3928 if (earliest_time < min_kdbp->kd_prev_timebase) {
3929 /*
3930 * If we haven't already, emit a retrograde events event.
3931 * Otherwise, ignore this event.
3932 */
3933 if (traced_retrograde) {
3934 continue;
3935 }
3936
3937 kdbg_set_timestamp_and_cpu(tempbuf, min_kdbp->kd_prev_timebase, kdbg_get_cpu(tempbuf));
3938 tempbuf->arg1 = tempbuf->debugid;
3939 tempbuf->arg2 = earliest_time;
3940 tempbuf->arg3 = 0;
3941 tempbuf->arg4 = 0;
3942 tempbuf->debugid = TRACE_RETROGRADE_EVENTS;
3943 traced_retrograde = true;
3944 } else {
3945 min_kdbp->kd_prev_timebase = earliest_time;
3946 }
3947 nextevent:
3948 tempbuf_count--;
3949 tempbuf_number++;
3950 tempbuf++;
3951
3952 if ((RAW_file_written += sizeof(kd_buf)) >= RAW_FLUSH_SIZE) {
3953 break;
3954 }
3955 }
3956 if (tempbuf_number) {
3957 /*
3958 * Remember the latest timestamp of events that we've merged so we
3959 * don't think we've lost events later.
3960 */
3961 uint64_t latest_time = kdbg_get_timestamp(tempbuf - 1);
3962 if (kd_ctrl_page.oldest_time < latest_time) {
3963 kd_ctrl_page.oldest_time = latest_time;
3964 }
3965 if (file_version == RAW_VERSION3) {
3966 if (!(kdbg_write_v3_event_chunk_header(buffer, V3_RAW_EVENTS, (tempbuf_number * sizeof(kd_buf)), vp, ctx))) {
3967 error = EFAULT;
3968 goto check_error;
3969 }
3970 if (buffer) {
3971 buffer += (sizeof(kd_chunk_header_v3) + sizeof(uint64_t));
3972 }
3973
3974 assert(count >= (sizeof(kd_chunk_header_v3) + sizeof(uint64_t)));
3975 count -= (sizeof(kd_chunk_header_v3) + sizeof(uint64_t));
3976 *number += (sizeof(kd_chunk_header_v3) + sizeof(uint64_t));
3977 }
3978 if (vp) {
3979 size_t write_size = tempbuf_number * sizeof(kd_buf);
3980 error = kdbg_write_to_vnode((caddr_t)kdcopybuf, write_size, vp, ctx, RAW_file_offset);
3981 if (!error) {
3982 RAW_file_offset += write_size;
3983 }
3984
3985 if (RAW_file_written >= RAW_FLUSH_SIZE) {
3986 error = VNOP_FSYNC(vp, MNT_NOWAIT, ctx);
3987
3988 RAW_file_written = 0;
3989 }
3990 } else {
3991 error = copyout(kdcopybuf, buffer, tempbuf_number * sizeof(kd_buf));
3992 buffer += (tempbuf_number * sizeof(kd_buf));
3993 }
3994 check_error:
3995 if (error) {
3996 *number = 0;
3997 error = EINVAL;
3998 break;
3999 }
4000 count -= tempbuf_number;
4001 *number += tempbuf_number;
4002 }
4003 if (out_of_events == TRUE) {
4004 /*
4005 * all trace buffers are empty
4006 */
4007 break;
4008 }
4009
4010 if ((tempbuf_count = count) > KDCOPYBUF_COUNT) {
4011 tempbuf_count = KDCOPYBUF_COUNT;
4012 }
4013 }
4014 if (!(old_kdebug_flags & KDBG_NOWRAP)) {
4015 enable_wrap(old_kdebug_slowcheck);
4016 }
4017 thread_clear_eager_preempt(current_thread());
4018 return error;
4019 }
4020
4021 static int
4022 kdbg_test(size_t flavor)
4023 {
4024 int code = 0;
4025 int dummy_iop = 0;
4026
4027 #define KDEBUG_TEST_CODE(code) BSDDBG_CODE(DBG_BSD_KDEBUG_TEST, (code))
4028 switch (flavor) {
4029 case 1:
4030 /* try each macro */
4031 KDBG(KDEBUG_TEST_CODE(code)); code++;
4032 KDBG(KDEBUG_TEST_CODE(code), 1); code++;
4033 KDBG(KDEBUG_TEST_CODE(code), 1, 2); code++;
4034 KDBG(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
4035 KDBG(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
4036
4037 KDBG_RELEASE(KDEBUG_TEST_CODE(code)); code++;
4038 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1); code++;
4039 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1, 2); code++;
4040 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
4041 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
4042
4043 KDBG_FILTERED(KDEBUG_TEST_CODE(code)); code++;
4044 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1); code++;
4045 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1, 2); code++;
4046 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
4047 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
4048
4049 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code)); code++;
4050 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code), 1); code++;
4051 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code), 1, 2); code++;
4052 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
4053 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
4054
4055 KDBG_DEBUG(KDEBUG_TEST_CODE(code)); code++;
4056 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1); code++;
4057 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1, 2); code++;
4058 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
4059 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
4060 break;
4061
4062 case 2:
4063 if (kd_ctrl_page.kdebug_iops) {
4064 /* avoid the assertion in kernel_debug_enter for a valid IOP */
4065 dummy_iop = kd_ctrl_page.kdebug_iops[0].cpu_id;
4066 }
4067
4068 /* ensure old timestamps are not emitted from kernel_debug_enter */
4069 kernel_debug_enter(dummy_iop, KDEBUG_TEST_CODE(code),
4070 100 /* very old timestamp */, 0, 0, 0,
4071 0, (uintptr_t)thread_tid(current_thread()));
4072 code++;
4073 kernel_debug_enter(dummy_iop, KDEBUG_TEST_CODE(code),
4074 kdbg_timestamp(), 0, 0, 0, 0,
4075 (uintptr_t)thread_tid(current_thread()));
4076 code++;
4077 break;
4078
4079 default:
4080 return ENOTSUP;
4081 }
4082 #undef KDEBUG_TEST_CODE
4083
4084 return 0;
4085 }
4086
4087 void
4088 kdebug_init(unsigned int n_events, char *filter_desc, boolean_t wrapping)
4089 {
4090 assert(filter_desc != NULL);
4091
4092 #if defined(__x86_64__)
4093 /* only trace MACH events when outputting kdebug to serial */
4094 if (kdebug_serial) {
4095 n_events = 1;
4096 if (filter_desc[0] == '\0') {
4097 filter_desc[0] = 'C';
4098 filter_desc[1] = '1';
4099 filter_desc[2] = '\0';
4100 }
4101 }
4102 #endif /* defined(__x86_64__) */
4103
4104 if (log_leaks && n_events == 0) {
4105 n_events = 200000;
4106 }
4107
4108 kdebug_trace_start(n_events, filter_desc, wrapping, FALSE);
4109 }
4110
4111 static void
4112 kdbg_set_typefilter_string(const char *filter_desc)
4113 {
4114 char *end = NULL;
4115
4116 ktrace_assert_lock_held();
4117
4118 assert(filter_desc != NULL);
4119
4120 typefilter_reject_all(kdbg_typefilter);
4121 typefilter_allow_class(kdbg_typefilter, DBG_TRACE);
4122
4123 /* if the filter description starts with a number, assume it's a csc */
4124 if (filter_desc[0] >= '0' && filter_desc[0] <= '9') {
4125 unsigned long csc = strtoul(filter_desc, NULL, 0);
4126 if (filter_desc != end && csc <= KDBG_CSC_MAX) {
4127 typefilter_allow_csc(kdbg_typefilter, csc);
4128 }
4129 return;
4130 }
4131
4132 while (filter_desc[0] != '\0') {
4133 unsigned long allow_value;
4134
4135 char filter_type = filter_desc[0];
4136 if (filter_type != 'C' && filter_type != 'S') {
4137 return;
4138 }
4139 filter_desc++;
4140
4141 allow_value = strtoul(filter_desc, &end, 0);
4142 if (filter_desc == end) {
4143 /* cannot parse as integer */
4144 return;
4145 }
4146
4147 switch (filter_type) {
4148 case 'C':
4149 if (allow_value <= KDBG_CLASS_MAX) {
4150 typefilter_allow_class(kdbg_typefilter, allow_value);
4151 } else {
4152 /* illegal class */
4153 return;
4154 }
4155 break;
4156 case 'S':
4157 if (allow_value <= KDBG_CSC_MAX) {
4158 typefilter_allow_csc(kdbg_typefilter, allow_value);
4159 } else {
4160 /* illegal class subclass */
4161 return;
4162 }
4163 break;
4164 default:
4165 return;
4166 }
4167
4168 /* advance to next filter entry */
4169 filter_desc = end;
4170 if (filter_desc[0] == ',') {
4171 filter_desc++;
4172 }
4173 }
4174 }
4175
4176 /*
4177 * This function is meant to be called from the bootstrap thread or coming out
4178 * of acpi_idle_kernel.
4179 */
4180 void
4181 kdebug_trace_start(unsigned int n_events, const char *filter_desc,
4182 boolean_t wrapping, boolean_t at_wake)
4183 {
4184 if (!n_events) {
4185 kd_early_done = true;
4186 return;
4187 }
4188
4189 ktrace_start_single_threaded();
4190
4191 kdbg_lock_init();
4192
4193 ktrace_kernel_configure(KTRACE_KDEBUG);
4194
4195 kdbg_set_nkdbufs(n_events);
4196
4197 kernel_debug_string_early("start_kern_tracing");
4198
4199 if (kdbg_reinit(TRUE)) {
4200 printf("error from kdbg_reinit, kernel tracing not started\n");
4201 goto out;
4202 }
4203
4204 /*
4205 * Wrapping is disabled because boot and wake tracing is interested in
4206 * the earliest events, at the expense of later ones.
4207 */
4208 if (!wrapping) {
4209 uint32_t old1, old2;
4210 (void)disable_wrap(&old1, &old2);
4211 }
4212
4213 if (filter_desc && filter_desc[0] != '\0') {
4214 if (kdbg_initialize_typefilter(NULL) == KERN_SUCCESS) {
4215 kdbg_set_typefilter_string(filter_desc);
4216 kdbg_enable_typefilter();
4217 }
4218 }
4219
4220 /*
4221 * Hold off interrupts between getting a thread map and enabling trace
4222 * and until the early traces are recorded.
4223 */
4224 boolean_t s = ml_set_interrupts_enabled(FALSE);
4225
4226 if (at_wake) {
4227 kdbg_thrmap_init();
4228 }
4229
4230 kdbg_set_tracing_enabled(TRUE, KDEBUG_ENABLE_TRACE | (kdebug_serial ?
4231 KDEBUG_ENABLE_SERIAL : 0));
4232
4233 if (!at_wake) {
4234 /*
4235 * Transfer all very early events from the static buffer into the real
4236 * buffers.
4237 */
4238 kernel_debug_early_end();
4239 }
4240
4241 ml_set_interrupts_enabled(s);
4242
4243 printf("kernel tracing started with %u events\n", n_events);
4244
4245 #if KDEBUG_MOJO_TRACE
4246 if (kdebug_serial) {
4247 printf("serial output enabled with %lu named events\n",
4248 sizeof(kd_events) / sizeof(kd_event_t));
4249 }
4250 #endif /* KDEBUG_MOJO_TRACE */
4251
4252 out:
4253 ktrace_end_single_threaded();
4254 }
4255
4256 void
4257 kdbg_dump_trace_to_file(const char *filename)
4258 {
4259 vfs_context_t ctx;
4260 vnode_t vp;
4261 size_t write_size;
4262 int ret;
4263
4264 ktrace_lock();
4265
4266 if (!(kdebug_enable & KDEBUG_ENABLE_TRACE)) {
4267 goto out;
4268 }
4269
4270 if (ktrace_get_owning_pid() != 0) {
4271 /*
4272 * Another process owns ktrace and is still active, disable tracing to
4273 * prevent wrapping.
4274 */
4275 kdebug_enable = 0;
4276 kd_ctrl_page.enabled = 0;
4277 commpage_update_kdebug_state();
4278 goto out;
4279 }
4280
4281 KDBG_RELEASE(TRACE_WRITING_EVENTS | DBG_FUNC_START);
4282
4283 kdebug_enable = 0;
4284 kd_ctrl_page.enabled = 0;
4285 commpage_update_kdebug_state();
4286
4287 ctx = vfs_context_kernel();
4288
4289 if (vnode_open(filename, (O_CREAT | FWRITE | O_NOFOLLOW), 0600, 0, &vp, ctx)) {
4290 goto out;
4291 }
4292
4293 kdbg_write_thread_map(vp, ctx);
4294
4295 write_size = nkdbufs * sizeof(kd_buf);
4296 ret = kdbg_read(0, &write_size, vp, ctx, RAW_VERSION1);
4297 if (ret) {
4298 goto out_close;
4299 }
4300
4301 /*
4302 * Wait to synchronize the file to capture the I/O in the
4303 * TRACE_WRITING_EVENTS interval.
4304 */
4305 ret = VNOP_FSYNC(vp, MNT_WAIT, ctx);
4306
4307 /*
4308 * Balance the starting TRACE_WRITING_EVENTS tracepoint manually.
4309 */
4310 kd_buf end_event = {
4311 .debugid = TRACE_WRITING_EVENTS | DBG_FUNC_END,
4312 .arg1 = write_size,
4313 .arg2 = ret,
4314 .arg5 = thread_tid(current_thread()),
4315 };
4316 kdbg_set_timestamp_and_cpu(&end_event, kdbg_timestamp(),
4317 cpu_number());
4318
4319 /* this is best effort -- ignore any errors */
4320 (void)kdbg_write_to_vnode((caddr_t)&end_event, sizeof(kd_buf), vp, ctx,
4321 RAW_file_offset);
4322
4323 out_close:
4324 vnode_close(vp, FWRITE, ctx);
4325 sync(current_proc(), (void *)NULL, (int *)NULL);
4326
4327 out:
4328 ktrace_unlock();
4329 }
4330
4331 static int
4332 kdbg_sysctl_continuous SYSCTL_HANDLER_ARGS
4333 {
4334 #pragma unused(oidp, arg1, arg2)
4335 int value = kdbg_continuous_time;
4336 int ret = sysctl_io_number(req, value, sizeof(value), &value, NULL);
4337
4338 if (ret || !req->newptr) {
4339 return ret;
4340 }
4341
4342 kdbg_continuous_time = value;
4343 return 0;
4344 }
4345
4346 SYSCTL_NODE(_kern, OID_AUTO, kdbg, CTLFLAG_RD | CTLFLAG_LOCKED, 0,
4347 "kdbg");
4348
4349 SYSCTL_PROC(_kern_kdbg, OID_AUTO, experimental_continuous,
4350 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0,
4351 sizeof(int), kdbg_sysctl_continuous, "I",
4352 "Set kdebug to use mach_continuous_time");
4353
4354 SYSCTL_INT(_kern_kdbg, OID_AUTO, debug,
4355 CTLFLAG_RW | CTLFLAG_LOCKED,
4356 &kdbg_debug, 0, "Set kdebug debug mode");
4357
4358 SYSCTL_QUAD(_kern_kdbg, OID_AUTO, oldest_time,
4359 CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED,
4360 &kd_ctrl_page.oldest_time,
4361 "Find the oldest timestamp still in trace");
4362
4363 #if KDEBUG_MOJO_TRACE
4364 static kd_event_t *
4365 binary_search(uint32_t id)
4366 {
4367 int low, high, mid;
4368
4369 low = 0;
4370 high = (int)(sizeof(kd_events) / sizeof(kd_event_t)) - 1;
4371
4372 while (TRUE) {
4373 mid = (low + high) / 2;
4374
4375 if (low > high) {
4376 return NULL; /* failed */
4377 } else if (low + 1 >= high) {
4378 /* We have a match */
4379 if (kd_events[high].id == id) {
4380 return &kd_events[high];
4381 } else if (kd_events[low].id == id) {
4382 return &kd_events[low];
4383 } else {
4384 return NULL; /* search failed */
4385 }
4386 } else if (id < kd_events[mid].id) {
4387 high = mid;
4388 } else {
4389 low = mid;
4390 }
4391 }
4392 }
4393
4394 /*
4395 * Look up event id to get name string.
4396 * Using a per-cpu cache of a single entry
4397 * before resorting to a binary search of the full table.
4398 */
4399 #define NCACHE 1
4400 static kd_event_t *last_hit[MAX_CPUS];
4401 static kd_event_t *
4402 event_lookup_cache(uint32_t cpu, uint32_t id)
4403 {
4404 if (last_hit[cpu] == NULL || last_hit[cpu]->id != id) {
4405 last_hit[cpu] = binary_search(id);
4406 }
4407 return last_hit[cpu];
4408 }
4409
4410 static uint64_t kd_last_timstamp;
4411
4412 static void
4413 kdebug_serial_print(
4414 uint32_t cpunum,
4415 uint32_t debugid,
4416 uint64_t timestamp,
4417 uintptr_t arg1,
4418 uintptr_t arg2,
4419 uintptr_t arg3,
4420 uintptr_t arg4,
4421 uintptr_t threadid
4422 )
4423 {
4424 char kprintf_line[192];
4425 char event[40];
4426 uint64_t us = timestamp / NSEC_PER_USEC;
4427 uint64_t us_tenth = (timestamp % NSEC_PER_USEC) / 100;
4428 uint64_t delta = timestamp - kd_last_timstamp;
4429 uint64_t delta_us = delta / NSEC_PER_USEC;
4430 uint64_t delta_us_tenth = (delta % NSEC_PER_USEC) / 100;
4431 uint32_t event_id = debugid & KDBG_EVENTID_MASK;
4432 const char *command;
4433 const char *bra;
4434 const char *ket;
4435 kd_event_t *ep;
4436
4437 /* event time and delta from last */
4438 snprintf(kprintf_line, sizeof(kprintf_line),
4439 "%11llu.%1llu %8llu.%1llu ",
4440 us, us_tenth, delta_us, delta_us_tenth);
4441
4442
4443 /* event (id or name) - start prefixed by "[", end postfixed by "]" */
4444 bra = (debugid & DBG_FUNC_START) ? "[" : " ";
4445 ket = (debugid & DBG_FUNC_END) ? "]" : " ";
4446 ep = event_lookup_cache(cpunum, event_id);
4447 if (ep) {
4448 if (strlen(ep->name) < sizeof(event) - 3) {
4449 snprintf(event, sizeof(event), "%s%s%s",
4450 bra, ep->name, ket);
4451 } else {
4452 snprintf(event, sizeof(event), "%s%x(name too long)%s",
4453 bra, event_id, ket);
4454 }
4455 } else {
4456 snprintf(event, sizeof(event), "%s%x%s",
4457 bra, event_id, ket);
4458 }
4459 snprintf(kprintf_line + strlen(kprintf_line),
4460 sizeof(kprintf_line) - strlen(kprintf_line),
4461 "%-40s ", event);
4462
4463 /* arg1 .. arg4 with special cases for strings */
4464 switch (event_id) {
4465 case VFS_LOOKUP:
4466 case VFS_LOOKUP_DONE:
4467 if (debugid & DBG_FUNC_START) {
4468 /* arg1 hex then arg2..arg4 chars */
4469 snprintf(kprintf_line + strlen(kprintf_line),
4470 sizeof(kprintf_line) - strlen(kprintf_line),
4471 "%-16lx %-8s%-8s%-8s ",
4472 arg1, (char*)&arg2, (char*)&arg3, (char*)&arg4);
4473 break;
4474 }
4475 /* else fall through for arg1..arg4 chars */
4476 case TRACE_STRING_EXEC:
4477 case TRACE_STRING_NEWTHREAD:
4478 case TRACE_INFO_STRING:
4479 snprintf(kprintf_line + strlen(kprintf_line),
4480 sizeof(kprintf_line) - strlen(kprintf_line),
4481 "%-8s%-8s%-8s%-8s ",
4482 (char*)&arg1, (char*)&arg2, (char*)&arg3, (char*)&arg4);
4483 break;
4484 default:
4485 snprintf(kprintf_line + strlen(kprintf_line),
4486 sizeof(kprintf_line) - strlen(kprintf_line),
4487 "%-16lx %-16lx %-16lx %-16lx",
4488 arg1, arg2, arg3, arg4);
4489 }
4490
4491 /* threadid, cpu and command name */
4492 if (threadid == (uintptr_t)thread_tid(current_thread()) &&
4493 current_proc() &&
4494 current_proc()->p_comm[0]) {
4495 command = current_proc()->p_comm;
4496 } else {
4497 command = "-";
4498 }
4499 snprintf(kprintf_line + strlen(kprintf_line),
4500 sizeof(kprintf_line) - strlen(kprintf_line),
4501 " %-16lx %-2d %s\n",
4502 threadid, cpunum, command);
4503
4504 kprintf("%s", kprintf_line);
4505 kd_last_timstamp = timestamp;
4506 }
4507
4508 #endif