]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kdebug.c
xnu-4903.241.1.tar.gz
[apple/xnu.git] / bsd / kern / kdebug.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @Apple_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
21 */
22
23 #include <sys/errno.h>
24 #include <sys/param.h>
25 #include <sys/systm.h>
26 #include <sys/proc_internal.h>
27 #include <sys/vm.h>
28 #include <sys/sysctl.h>
29 #include <sys/kdebug.h>
30 #include <sys/kauth.h>
31 #include <sys/ktrace.h>
32 #include <sys/sysproto.h>
33 #include <sys/bsdtask_info.h>
34 #include <sys/random.h>
35
36 #include <mach/clock_types.h>
37 #include <mach/mach_types.h>
38 #include <mach/mach_time.h>
39 #include <mach/mach_vm.h>
40 #include <machine/machine_routines.h>
41
42 #include <mach/machine.h>
43 #include <mach/vm_map.h>
44
45 #if defined(__i386__) || defined(__x86_64__)
46 #include <i386/rtclock_protos.h>
47 #include <i386/mp.h>
48 #include <i386/machine_routines.h>
49 #include <i386/tsc.h>
50 #endif
51
52 #include <kern/clock.h>
53
54 #include <kern/thread.h>
55 #include <kern/task.h>
56 #include <kern/debug.h>
57 #include <kern/kalloc.h>
58 #include <kern/cpu_data.h>
59 #include <kern/assert.h>
60 #include <kern/telemetry.h>
61 #include <kern/sched_prim.h>
62 #include <vm/vm_kern.h>
63 #include <sys/lock.h>
64 #include <kperf/kperf.h>
65 #include <pexpert/device_tree.h>
66
67 #include <sys/malloc.h>
68 #include <sys/mcache.h>
69
70 #include <sys/vnode.h>
71 #include <sys/vnode_internal.h>
72 #include <sys/fcntl.h>
73 #include <sys/file_internal.h>
74 #include <sys/ubc.h>
75 #include <sys/param.h> /* for isset() */
76
77 #include <mach/mach_host.h> /* for host_info() */
78 #include <libkern/OSAtomic.h>
79
80 #include <machine/pal_routines.h>
81 #include <machine/atomic.h>
82
83 /*
84 * IOP(s)
85 *
86 * https://coreoswiki.apple.com/wiki/pages/U6z3i0q9/Consistent_Logging_Implementers_Guide.html
87 *
88 * IOP(s) are auxiliary cores that want to participate in kdebug event logging.
89 * They are registered dynamically. Each is assigned a cpu_id at registration.
90 *
91 * NOTE: IOP trace events may not use the same clock hardware as "normal"
92 * cpus. There is an effort made to synchronize the IOP timebase with the
93 * AP, but it should be understood that there may be discrepancies.
94 *
95 * Once registered, an IOP is permanent, it cannot be unloaded/unregistered.
96 * The current implementation depends on this for thread safety.
97 *
98 * New registrations occur by allocating an kd_iop struct and assigning
99 * a provisional cpu_id of list_head->cpu_id + 1. Then a CAS to claim the
100 * list_head pointer resolves any races.
101 *
102 * You may safely walk the kd_iops list at any time, without holding locks.
103 *
104 * When allocating buffers, the current kd_iops head is captured. Any operations
105 * that depend on the buffer state (such as flushing IOP traces on reads,
106 * etc.) should use the captured list head. This will allow registrations to
107 * take place while trace is in use.
108 */
109
110 typedef struct kd_iop {
111 kd_callback_t callback;
112 uint32_t cpu_id;
113 uint64_t last_timestamp; /* Prevent timer rollback */
114 struct kd_iop* next;
115 } kd_iop_t;
116
117 static kd_iop_t* kd_iops = NULL;
118
119 /*
120 * Typefilter(s)
121 *
122 * A typefilter is a 8KB bitmap that is used to selectively filter events
123 * being recorded. It is able to individually address every class & subclass.
124 *
125 * There is a shared typefilter in the kernel which is lazily allocated. Once
126 * allocated, the shared typefilter is never deallocated. The shared typefilter
127 * is also mapped on demand into userspace processes that invoke kdebug_trace
128 * API from Libsyscall. When mapped into a userspace process, the memory is
129 * read only, and does not have a fixed address.
130 *
131 * It is a requirement that the kernel's shared typefilter always pass DBG_TRACE
132 * events. This is enforced automatically, by having the needed bits set any
133 * time the shared typefilter is mutated.
134 */
135
136 typedef uint8_t* typefilter_t;
137
138 static typefilter_t kdbg_typefilter;
139 static mach_port_t kdbg_typefilter_memory_entry;
140
141 /*
142 * There are 3 combinations of page sizes:
143 *
144 * 4KB / 4KB
145 * 4KB / 16KB
146 * 16KB / 16KB
147 *
148 * The typefilter is exactly 8KB. In the first two scenarios, we would like
149 * to use 2 pages exactly; in the third scenario we must make certain that
150 * a full page is allocated so we do not inadvertantly share 8KB of random
151 * data to userspace. The round_page_32 macro rounds to kernel page size.
152 */
153 #define TYPEFILTER_ALLOC_SIZE MAX(round_page_32(KDBG_TYPEFILTER_BITMAP_SIZE), KDBG_TYPEFILTER_BITMAP_SIZE)
154
155 static typefilter_t typefilter_create(void)
156 {
157 typefilter_t tf;
158 if (KERN_SUCCESS == kmem_alloc(kernel_map, (vm_offset_t*)&tf, TYPEFILTER_ALLOC_SIZE, VM_KERN_MEMORY_DIAG)) {
159 memset(&tf[KDBG_TYPEFILTER_BITMAP_SIZE], 0, TYPEFILTER_ALLOC_SIZE - KDBG_TYPEFILTER_BITMAP_SIZE);
160 return tf;
161 }
162 return NULL;
163 }
164
165 static void typefilter_deallocate(typefilter_t tf)
166 {
167 assert(tf != NULL);
168 assert(tf != kdbg_typefilter);
169 kmem_free(kernel_map, (vm_offset_t)tf, TYPEFILTER_ALLOC_SIZE);
170 }
171
172 static void typefilter_copy(typefilter_t dst, typefilter_t src)
173 {
174 assert(src != NULL);
175 assert(dst != NULL);
176 memcpy(dst, src, KDBG_TYPEFILTER_BITMAP_SIZE);
177 }
178
179 static void typefilter_reject_all(typefilter_t tf)
180 {
181 assert(tf != NULL);
182 memset(tf, 0, KDBG_TYPEFILTER_BITMAP_SIZE);
183 }
184
185 static void typefilter_allow_all(typefilter_t tf)
186 {
187 assert(tf != NULL);
188 memset(tf, ~0, KDBG_TYPEFILTER_BITMAP_SIZE);
189 }
190
191 static void typefilter_allow_class(typefilter_t tf, uint8_t class)
192 {
193 assert(tf != NULL);
194 const uint32_t BYTES_PER_CLASS = 256 / 8; // 256 subclasses, 1 bit each
195 memset(&tf[class * BYTES_PER_CLASS], 0xFF, BYTES_PER_CLASS);
196 }
197
198 static void typefilter_allow_csc(typefilter_t tf, uint16_t csc)
199 {
200 assert(tf != NULL);
201 setbit(tf, csc);
202 }
203
204 static bool typefilter_is_debugid_allowed(typefilter_t tf, uint32_t id)
205 {
206 assert(tf != NULL);
207 return isset(tf, KDBG_EXTRACT_CSC(id));
208 }
209
210 static mach_port_t typefilter_create_memory_entry(typefilter_t tf)
211 {
212 assert(tf != NULL);
213
214 mach_port_t memory_entry = MACH_PORT_NULL;
215 memory_object_size_t size = TYPEFILTER_ALLOC_SIZE;
216
217 mach_make_memory_entry_64(kernel_map,
218 &size,
219 (memory_object_offset_t)tf,
220 VM_PROT_READ,
221 &memory_entry,
222 MACH_PORT_NULL);
223
224 return memory_entry;
225 }
226
227 static int kdbg_copyin_typefilter(user_addr_t addr, size_t size);
228 static void kdbg_enable_typefilter(void);
229 static void kdbg_disable_typefilter(void);
230
231 /*
232 * External prototypes
233 */
234
235 void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *);
236 int cpu_number(void); /* XXX <machine/...> include path broken */
237 void commpage_update_kdebug_state(void); /* XXX sign */
238
239 extern int log_leaks;
240
241 /*
242 * This flag is for testing purposes only -- it's highly experimental and tools
243 * have not been updated to support it.
244 */
245 static bool kdbg_continuous_time = false;
246
247 static inline uint64_t
248 kdbg_timestamp(void)
249 {
250 if (kdbg_continuous_time) {
251 return mach_continuous_time();
252 } else {
253 return mach_absolute_time();
254 }
255 }
256
257 static int kdbg_debug = 0;
258
259 #if KDEBUG_MOJO_TRACE
260 #include <sys/kdebugevents.h>
261 static void kdebug_serial_print( /* forward */
262 uint32_t, uint32_t, uint64_t,
263 uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
264 #endif
265
266 int kdbg_control(int *, u_int, user_addr_t, size_t *);
267
268 static int kdbg_read(user_addr_t, size_t *, vnode_t, vfs_context_t, uint32_t);
269 static int kdbg_readcpumap(user_addr_t, size_t *);
270 static int kdbg_readthrmap_v3(user_addr_t, size_t, int);
271 static int kdbg_readcurthrmap(user_addr_t, size_t *);
272 static int kdbg_setreg(kd_regtype *);
273 static int kdbg_setpidex(kd_regtype *);
274 static int kdbg_setpid(kd_regtype *);
275 static void kdbg_thrmap_init(void);
276 static int kdbg_reinit(boolean_t);
277 static int kdbg_bootstrap(boolean_t);
278 static int kdbg_test(size_t flavor);
279
280 static int kdbg_write_v1_header(boolean_t write_thread_map, vnode_t vp, vfs_context_t ctx);
281 static int kdbg_write_thread_map(vnode_t vp, vfs_context_t ctx);
282 static int kdbg_copyout_thread_map(user_addr_t buffer, size_t *buffer_size);
283 static void kdbg_clear_thread_map(void);
284
285 static boolean_t kdbg_wait(uint64_t timeout_ms, boolean_t locked_wait);
286 static void kdbg_wakeup(void);
287
288 int kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count,
289 uint8_t** cpumap, uint32_t* cpumap_size);
290
291 static kd_threadmap *kdbg_thrmap_init_internal(unsigned int count,
292 unsigned int *mapsize,
293 unsigned int *mapcount);
294
295 static boolean_t kdebug_current_proc_enabled(uint32_t debugid);
296 static errno_t kdebug_check_trace_string(uint32_t debugid, uint64_t str_id);
297
298 int kdbg_write_v3_header(user_addr_t, size_t *, int);
299 int kdbg_write_v3_chunk_header(user_addr_t buffer, uint32_t tag,
300 uint32_t sub_tag, uint64_t length,
301 vnode_t vp, vfs_context_t ctx);
302
303 user_addr_t kdbg_write_v3_event_chunk_header(user_addr_t buffer, uint32_t tag,
304 uint64_t length, vnode_t vp,
305 vfs_context_t ctx);
306
307 // Helper functions
308
309 static int create_buffers(boolean_t);
310 static void delete_buffers(void);
311
312 extern int tasks_count;
313 extern int threads_count;
314 extern void IOSleep(int);
315
316 /* trace enable status */
317 unsigned int kdebug_enable = 0;
318
319 /* A static buffer to record events prior to the start of regular logging */
320
321 #define KD_EARLY_BUFFER_SIZE (16 * 1024)
322 #define KD_EARLY_BUFFER_NBUFS (KD_EARLY_BUFFER_SIZE / sizeof(kd_buf))
323 #if CONFIG_EMBEDDED
324 /*
325 * On embedded, the space for this is carved out by osfmk/arm/data.s -- clang
326 * has problems aligning to greater than 4K.
327 */
328 extern kd_buf kd_early_buffer[KD_EARLY_BUFFER_NBUFS];
329 #else /* CONFIG_EMBEDDED */
330 __attribute__((aligned(KD_EARLY_BUFFER_SIZE)))
331 static kd_buf kd_early_buffer[KD_EARLY_BUFFER_NBUFS];
332 #endif /* !CONFIG_EMBEDDED */
333
334 static unsigned int kd_early_index = 0;
335 static bool kd_early_overflow = false;
336 static bool kd_early_done = false;
337
338 #define SLOW_NOLOG 0x01
339 #define SLOW_CHECKS 0x02
340
341 #define EVENTS_PER_STORAGE_UNIT 2048
342 #define MIN_STORAGE_UNITS_PER_CPU 4
343
344 #define POINTER_FROM_KDS_PTR(x) (&kd_bufs[x.buffer_index].kdsb_addr[x.offset])
345
346 union kds_ptr {
347 struct {
348 uint32_t buffer_index:21;
349 uint16_t offset:11;
350 };
351 uint32_t raw;
352 };
353
354 struct kd_storage {
355 union kds_ptr kds_next;
356 uint32_t kds_bufindx;
357 uint32_t kds_bufcnt;
358 uint32_t kds_readlast;
359 boolean_t kds_lostevents;
360 uint64_t kds_timestamp;
361
362 kd_buf kds_records[EVENTS_PER_STORAGE_UNIT];
363 };
364
365 #define MAX_BUFFER_SIZE (1024 * 1024 * 128)
366 #define N_STORAGE_UNITS_PER_BUFFER (MAX_BUFFER_SIZE / sizeof(struct kd_storage))
367 static_assert(N_STORAGE_UNITS_PER_BUFFER <= 0x7ff,
368 "shoudn't overflow kds_ptr.offset");
369
370 struct kd_storage_buffers {
371 struct kd_storage *kdsb_addr;
372 uint32_t kdsb_size;
373 };
374
375 #define KDS_PTR_NULL 0xffffffff
376 struct kd_storage_buffers *kd_bufs = NULL;
377 int n_storage_units = 0;
378 unsigned int n_storage_buffers = 0;
379 int n_storage_threshold = 0;
380 int kds_waiter = 0;
381
382 #pragma pack(0)
383 struct kd_bufinfo {
384 union kds_ptr kd_list_head;
385 union kds_ptr kd_list_tail;
386 boolean_t kd_lostevents;
387 uint32_t _pad;
388 uint64_t kd_prev_timebase;
389 uint32_t num_bufs;
390 } __attribute__(( aligned(MAX_CPU_CACHE_LINE_SIZE) ));
391
392
393 /*
394 * In principle, this control block can be shared in DRAM with other
395 * coprocessors and runtimes, for configuring what tracing is enabled.
396 */
397 struct kd_ctrl_page_t {
398 union kds_ptr kds_free_list;
399 uint32_t enabled :1;
400 uint32_t _pad0 :31;
401 int kds_inuse_count;
402 uint32_t kdebug_flags;
403 uint32_t kdebug_slowcheck;
404 uint64_t oldest_time;
405 /*
406 * The number of kd_bufinfo structs allocated may not match the current
407 * number of active cpus. We capture the iops list head at initialization
408 * which we could use to calculate the number of cpus we allocated data for,
409 * unless it happens to be null. To avoid that case, we explicitly also
410 * capture a cpu count.
411 */
412 kd_iop_t* kdebug_iops;
413 uint32_t kdebug_cpus;
414 } kd_ctrl_page = {
415 .kds_free_list = {.raw = KDS_PTR_NULL},
416 .kdebug_slowcheck = SLOW_NOLOG,
417 .oldest_time = 0
418 };
419
420 #pragma pack()
421
422 struct kd_bufinfo *kdbip = NULL;
423
424 #define KDCOPYBUF_COUNT 8192
425 #define KDCOPYBUF_SIZE (KDCOPYBUF_COUNT * sizeof(kd_buf))
426
427 #define PAGE_4KB 4096
428 #define PAGE_16KB 16384
429
430 kd_buf *kdcopybuf = NULL;
431
432 unsigned int nkdbufs = 0;
433 unsigned int kdlog_beg=0;
434 unsigned int kdlog_end=0;
435 unsigned int kdlog_value1=0;
436 unsigned int kdlog_value2=0;
437 unsigned int kdlog_value3=0;
438 unsigned int kdlog_value4=0;
439
440 static lck_spin_t * kdw_spin_lock;
441 static lck_spin_t * kds_spin_lock;
442
443 kd_threadmap *kd_mapptr = 0;
444 unsigned int kd_mapsize = 0;
445 unsigned int kd_mapcount = 0;
446
447 off_t RAW_file_offset = 0;
448 int RAW_file_written = 0;
449
450 #define RAW_FLUSH_SIZE (2 * 1024 * 1024)
451
452 /*
453 * A globally increasing counter for identifying strings in trace. Starts at
454 * 1 because 0 is a reserved return value.
455 */
456 __attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE)))
457 static uint64_t g_curr_str_id = 1;
458
459 #define STR_ID_SIG_OFFSET (48)
460 #define STR_ID_MASK ((1ULL << STR_ID_SIG_OFFSET) - 1)
461 #define STR_ID_SIG_MASK (~STR_ID_MASK)
462
463 /*
464 * A bit pattern for identifying string IDs generated by
465 * kdebug_trace_string(2).
466 */
467 static uint64_t g_str_id_signature = (0x70acULL << STR_ID_SIG_OFFSET);
468
469 #define INTERRUPT 0x01050000
470 #define MACH_vmfault 0x01300008
471 #define BSC_SysCall 0x040c0000
472 #define MACH_SysCall 0x010c0000
473
474 /* task to string structure */
475 struct tts
476 {
477 task_t task; /* from procs task */
478 pid_t pid; /* from procs p_pid */
479 char task_comm[20]; /* from procs p_comm */
480 };
481
482 typedef struct tts tts_t;
483
484 struct krt
485 {
486 kd_threadmap *map; /* pointer to the map buffer */
487 int count;
488 int maxcount;
489 struct tts *atts;
490 };
491
492 typedef struct krt krt_t;
493
494 static uint32_t
495 kdbg_cpu_count(boolean_t early_trace)
496 {
497 if (early_trace) {
498 #if CONFIG_EMBEDDED
499 return ml_get_cpu_count();
500 #else
501 return max_ncpus;
502 #endif
503 }
504
505 host_basic_info_data_t hinfo;
506 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
507 host_info((host_t)1 /* BSD_HOST */, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
508 assert(hinfo.logical_cpu_max > 0);
509 return hinfo.logical_cpu_max;
510 }
511
512 #if MACH_ASSERT
513 #if CONFIG_EMBEDDED
514 static boolean_t
515 kdbg_iop_list_is_valid(kd_iop_t* iop)
516 {
517 if (iop) {
518 /* Is list sorted by cpu_id? */
519 kd_iop_t* temp = iop;
520 do {
521 assert(!temp->next || temp->next->cpu_id == temp->cpu_id - 1);
522 assert(temp->next || (temp->cpu_id == kdbg_cpu_count(FALSE) || temp->cpu_id == kdbg_cpu_count(TRUE)));
523 } while ((temp = temp->next));
524
525 /* Does each entry have a function and a name? */
526 temp = iop;
527 do {
528 assert(temp->callback.func);
529 assert(strlen(temp->callback.iop_name) < sizeof(temp->callback.iop_name));
530 } while ((temp = temp->next));
531 }
532
533 return TRUE;
534 }
535
536 static boolean_t
537 kdbg_iop_list_contains_cpu_id(kd_iop_t* list, uint32_t cpu_id)
538 {
539 while (list) {
540 if (list->cpu_id == cpu_id)
541 return TRUE;
542 list = list->next;
543 }
544
545 return FALSE;
546 }
547 #endif /* CONFIG_EMBEDDED */
548 #endif /* MACH_ASSERT */
549
550 static void
551 kdbg_iop_list_callback(kd_iop_t* iop, kd_callback_type type, void* arg)
552 {
553 while (iop) {
554 iop->callback.func(iop->callback.context, type, arg);
555 iop = iop->next;
556 }
557 }
558
559 static void
560 kdbg_set_tracing_enabled(boolean_t enabled, uint32_t trace_type)
561 {
562 int s = ml_set_interrupts_enabled(FALSE);
563 lck_spin_lock(kds_spin_lock);
564 if (enabled) {
565 /*
566 * The oldest valid time is now; reject old events from IOPs.
567 */
568 kd_ctrl_page.oldest_time = kdbg_timestamp();
569 kdebug_enable |= trace_type;
570 kd_ctrl_page.kdebug_slowcheck &= ~SLOW_NOLOG;
571 kd_ctrl_page.enabled = 1;
572 commpage_update_kdebug_state();
573 } else {
574 kdebug_enable &= ~(KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_PPT);
575 kd_ctrl_page.kdebug_slowcheck |= SLOW_NOLOG;
576 kd_ctrl_page.enabled = 0;
577 commpage_update_kdebug_state();
578 }
579 lck_spin_unlock(kds_spin_lock);
580 ml_set_interrupts_enabled(s);
581
582 if (enabled) {
583 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_KDEBUG_ENABLED, NULL);
584 } else {
585 /*
586 * If you do not flush the IOP trace buffers, they can linger
587 * for a considerable period; consider code which disables and
588 * deallocates without a final sync flush.
589 */
590 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_KDEBUG_DISABLED, NULL);
591 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_SYNC_FLUSH, NULL);
592 }
593 }
594
595 static void
596 kdbg_set_flags(int slowflag, int enableflag, boolean_t enabled)
597 {
598 int s = ml_set_interrupts_enabled(FALSE);
599 lck_spin_lock(kds_spin_lock);
600
601 if (enabled) {
602 kd_ctrl_page.kdebug_slowcheck |= slowflag;
603 kdebug_enable |= enableflag;
604 } else {
605 kd_ctrl_page.kdebug_slowcheck &= ~slowflag;
606 kdebug_enable &= ~enableflag;
607 }
608
609 lck_spin_unlock(kds_spin_lock);
610 ml_set_interrupts_enabled(s);
611 }
612
613 /*
614 * Disable wrapping and return true if trace wrapped, false otherwise.
615 */
616 static boolean_t
617 disable_wrap(uint32_t *old_slowcheck, uint32_t *old_flags)
618 {
619 boolean_t wrapped;
620 int s = ml_set_interrupts_enabled(FALSE);
621 lck_spin_lock(kds_spin_lock);
622
623 *old_slowcheck = kd_ctrl_page.kdebug_slowcheck;
624 *old_flags = kd_ctrl_page.kdebug_flags;
625
626 wrapped = kd_ctrl_page.kdebug_flags & KDBG_WRAPPED;
627 kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
628 kd_ctrl_page.kdebug_flags |= KDBG_NOWRAP;
629
630 lck_spin_unlock(kds_spin_lock);
631 ml_set_interrupts_enabled(s);
632
633 return wrapped;
634 }
635
636 static void
637 enable_wrap(uint32_t old_slowcheck)
638 {
639 int s = ml_set_interrupts_enabled(FALSE);
640 lck_spin_lock(kds_spin_lock);
641
642 kd_ctrl_page.kdebug_flags &= ~KDBG_NOWRAP;
643
644 if ( !(old_slowcheck & SLOW_NOLOG))
645 kd_ctrl_page.kdebug_slowcheck &= ~SLOW_NOLOG;
646
647 lck_spin_unlock(kds_spin_lock);
648 ml_set_interrupts_enabled(s);
649 }
650
651 static int
652 create_buffers(boolean_t early_trace)
653 {
654 unsigned int i;
655 unsigned int p_buffer_size;
656 unsigned int f_buffer_size;
657 unsigned int f_buffers;
658 int error = 0;
659
660 /*
661 * For the duration of this allocation, trace code will only reference
662 * kdebug_iops. Any iops registered after this enabling will not be
663 * messaged until the buffers are reallocated.
664 *
665 * TLDR; Must read kd_iops once and only once!
666 */
667 kd_ctrl_page.kdebug_iops = kd_iops;
668
669 #if CONFIG_EMBEDDED
670 assert(kdbg_iop_list_is_valid(kd_ctrl_page.kdebug_iops));
671 #endif
672
673 /*
674 * If the list is valid, it is sorted, newest -> oldest. Each iop entry
675 * has a cpu_id of "the older entry + 1", so the highest cpu_id will
676 * be the list head + 1.
677 */
678
679 kd_ctrl_page.kdebug_cpus = kd_ctrl_page.kdebug_iops ? kd_ctrl_page.kdebug_iops->cpu_id + 1 : kdbg_cpu_count(early_trace);
680
681 if (kmem_alloc(kernel_map, (vm_offset_t *)&kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
682 error = ENOSPC;
683 goto out;
684 }
685
686 if (nkdbufs < (kd_ctrl_page.kdebug_cpus * EVENTS_PER_STORAGE_UNIT * MIN_STORAGE_UNITS_PER_CPU))
687 n_storage_units = kd_ctrl_page.kdebug_cpus * MIN_STORAGE_UNITS_PER_CPU;
688 else
689 n_storage_units = nkdbufs / EVENTS_PER_STORAGE_UNIT;
690
691 nkdbufs = n_storage_units * EVENTS_PER_STORAGE_UNIT;
692
693 f_buffers = n_storage_units / N_STORAGE_UNITS_PER_BUFFER;
694 n_storage_buffers = f_buffers;
695
696 f_buffer_size = N_STORAGE_UNITS_PER_BUFFER * sizeof(struct kd_storage);
697 p_buffer_size = (n_storage_units % N_STORAGE_UNITS_PER_BUFFER) * sizeof(struct kd_storage);
698
699 if (p_buffer_size)
700 n_storage_buffers++;
701
702 kd_bufs = NULL;
703
704 if (kdcopybuf == 0) {
705 if (kmem_alloc(kernel_map, (vm_offset_t *)&kdcopybuf, (vm_size_t)KDCOPYBUF_SIZE, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
706 error = ENOSPC;
707 goto out;
708 }
709 }
710 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers)), VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
711 error = ENOSPC;
712 goto out;
713 }
714 bzero(kd_bufs, n_storage_buffers * sizeof(struct kd_storage_buffers));
715
716 for (i = 0; i < f_buffers; i++) {
717 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)f_buffer_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
718 error = ENOSPC;
719 goto out;
720 }
721 bzero(kd_bufs[i].kdsb_addr, f_buffer_size);
722
723 kd_bufs[i].kdsb_size = f_buffer_size;
724 }
725 if (p_buffer_size) {
726 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)p_buffer_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
727 error = ENOSPC;
728 goto out;
729 }
730 bzero(kd_bufs[i].kdsb_addr, p_buffer_size);
731
732 kd_bufs[i].kdsb_size = p_buffer_size;
733 }
734 n_storage_units = 0;
735
736 for (i = 0; i < n_storage_buffers; i++) {
737 struct kd_storage *kds;
738 int n_elements;
739 int n;
740
741 n_elements = kd_bufs[i].kdsb_size / sizeof(struct kd_storage);
742 kds = kd_bufs[i].kdsb_addr;
743
744 for (n = 0; n < n_elements; n++) {
745 kds[n].kds_next.buffer_index = kd_ctrl_page.kds_free_list.buffer_index;
746 kds[n].kds_next.offset = kd_ctrl_page.kds_free_list.offset;
747
748 kd_ctrl_page.kds_free_list.buffer_index = i;
749 kd_ctrl_page.kds_free_list.offset = n;
750 }
751 n_storage_units += n_elements;
752 }
753
754 bzero((char *)kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus);
755
756 for (i = 0; i < kd_ctrl_page.kdebug_cpus; i++) {
757 kdbip[i].kd_list_head.raw = KDS_PTR_NULL;
758 kdbip[i].kd_list_tail.raw = KDS_PTR_NULL;
759 kdbip[i].kd_lostevents = FALSE;
760 kdbip[i].num_bufs = 0;
761 }
762
763 kd_ctrl_page.kdebug_flags |= KDBG_BUFINIT;
764
765 kd_ctrl_page.kds_inuse_count = 0;
766 n_storage_threshold = n_storage_units / 2;
767 out:
768 if (error)
769 delete_buffers();
770
771 return(error);
772 }
773
774 static void
775 delete_buffers(void)
776 {
777 unsigned int i;
778
779 if (kd_bufs) {
780 for (i = 0; i < n_storage_buffers; i++) {
781 if (kd_bufs[i].kdsb_addr) {
782 kmem_free(kernel_map, (vm_offset_t)kd_bufs[i].kdsb_addr, (vm_size_t)kd_bufs[i].kdsb_size);
783 }
784 }
785 kmem_free(kernel_map, (vm_offset_t)kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers)));
786
787 kd_bufs = NULL;
788 n_storage_buffers = 0;
789 }
790 if (kdcopybuf) {
791 kmem_free(kernel_map, (vm_offset_t)kdcopybuf, KDCOPYBUF_SIZE);
792
793 kdcopybuf = NULL;
794 }
795 kd_ctrl_page.kds_free_list.raw = KDS_PTR_NULL;
796
797 if (kdbip) {
798 kmem_free(kernel_map, (vm_offset_t)kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus);
799
800 kdbip = NULL;
801 }
802 kd_ctrl_page.kdebug_iops = NULL;
803 kd_ctrl_page.kdebug_cpus = 0;
804 kd_ctrl_page.kdebug_flags &= ~KDBG_BUFINIT;
805 }
806
807 void
808 release_storage_unit(int cpu, uint32_t kdsp_raw)
809 {
810 int s = 0;
811 struct kd_storage *kdsp_actual;
812 struct kd_bufinfo *kdbp;
813 union kds_ptr kdsp;
814
815 kdsp.raw = kdsp_raw;
816
817 s = ml_set_interrupts_enabled(FALSE);
818 lck_spin_lock(kds_spin_lock);
819
820 kdbp = &kdbip[cpu];
821
822 if (kdsp.raw == kdbp->kd_list_head.raw) {
823 /*
824 * it's possible for the storage unit pointed to
825 * by kdsp to have already been stolen... so
826 * check to see if it's still the head of the list
827 * now that we're behind the lock that protects
828 * adding and removing from the queue...
829 * since we only ever release and steal units from
830 * that position, if it's no longer the head
831 * we having nothing to do in this context
832 */
833 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
834 kdbp->kd_list_head = kdsp_actual->kds_next;
835
836 kdsp_actual->kds_next = kd_ctrl_page.kds_free_list;
837 kd_ctrl_page.kds_free_list = kdsp;
838
839 kd_ctrl_page.kds_inuse_count--;
840 }
841 lck_spin_unlock(kds_spin_lock);
842 ml_set_interrupts_enabled(s);
843 }
844
845
846 boolean_t
847 allocate_storage_unit(int cpu)
848 {
849 union kds_ptr kdsp;
850 struct kd_storage *kdsp_actual, *kdsp_next_actual;
851 struct kd_bufinfo *kdbp, *kdbp_vict, *kdbp_try;
852 uint64_t oldest_ts, ts;
853 boolean_t retval = TRUE;
854 int s = 0;
855
856 s = ml_set_interrupts_enabled(FALSE);
857 lck_spin_lock(kds_spin_lock);
858
859 kdbp = &kdbip[cpu];
860
861 /* If someone beat us to the allocate, return success */
862 if (kdbp->kd_list_tail.raw != KDS_PTR_NULL) {
863 kdsp_actual = POINTER_FROM_KDS_PTR(kdbp->kd_list_tail);
864
865 if (kdsp_actual->kds_bufindx < EVENTS_PER_STORAGE_UNIT)
866 goto out;
867 }
868
869 if ((kdsp = kd_ctrl_page.kds_free_list).raw != KDS_PTR_NULL) {
870 /*
871 * If there's a free page, grab it from the free list.
872 */
873 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
874 kd_ctrl_page.kds_free_list = kdsp_actual->kds_next;
875
876 kd_ctrl_page.kds_inuse_count++;
877 } else {
878 /*
879 * Otherwise, we're going to lose events and repurpose the oldest
880 * storage unit we can find.
881 */
882 if (kd_ctrl_page.kdebug_flags & KDBG_NOWRAP) {
883 kd_ctrl_page.kdebug_slowcheck |= SLOW_NOLOG;
884 kdbp->kd_lostevents = TRUE;
885 retval = FALSE;
886 goto out;
887 }
888 kdbp_vict = NULL;
889 oldest_ts = UINT64_MAX;
890
891 for (kdbp_try = &kdbip[0]; kdbp_try < &kdbip[kd_ctrl_page.kdebug_cpus]; kdbp_try++) {
892
893 if (kdbp_try->kd_list_head.raw == KDS_PTR_NULL) {
894 /*
895 * no storage unit to steal
896 */
897 continue;
898 }
899
900 kdsp_actual = POINTER_FROM_KDS_PTR(kdbp_try->kd_list_head);
901
902 if (kdsp_actual->kds_bufcnt < EVENTS_PER_STORAGE_UNIT) {
903 /*
904 * make sure we don't steal the storage unit
905 * being actively recorded to... need to
906 * move on because we don't want an out-of-order
907 * set of events showing up later
908 */
909 continue;
910 }
911
912 /*
913 * When wrapping, steal the storage unit with the
914 * earliest timestamp on its last event, instead of the
915 * earliest timestamp on the first event. This allows a
916 * storage unit with more recent events to be preserved,
917 * even if the storage unit contains events that are
918 * older than those found in other CPUs.
919 */
920 ts = kdbg_get_timestamp(&kdsp_actual->kds_records[EVENTS_PER_STORAGE_UNIT - 1]);
921 if (ts < oldest_ts) {
922 oldest_ts = ts;
923 kdbp_vict = kdbp_try;
924 }
925 }
926 if (kdbp_vict == NULL) {
927 kdebug_enable = 0;
928 kd_ctrl_page.enabled = 0;
929 commpage_update_kdebug_state();
930 retval = FALSE;
931 goto out;
932 }
933 kdsp = kdbp_vict->kd_list_head;
934 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
935 kdbp_vict->kd_list_head = kdsp_actual->kds_next;
936
937 if (kdbp_vict->kd_list_head.raw != KDS_PTR_NULL) {
938 kdsp_next_actual = POINTER_FROM_KDS_PTR(kdbp_vict->kd_list_head);
939 kdsp_next_actual->kds_lostevents = TRUE;
940 } else
941 kdbp_vict->kd_lostevents = TRUE;
942
943 if (kd_ctrl_page.oldest_time < oldest_ts) {
944 kd_ctrl_page.oldest_time = oldest_ts;
945 }
946 kd_ctrl_page.kdebug_flags |= KDBG_WRAPPED;
947 }
948 kdsp_actual->kds_timestamp = kdbg_timestamp();
949 kdsp_actual->kds_next.raw = KDS_PTR_NULL;
950 kdsp_actual->kds_bufcnt = 0;
951 kdsp_actual->kds_readlast = 0;
952
953 kdsp_actual->kds_lostevents = kdbp->kd_lostevents;
954 kdbp->kd_lostevents = FALSE;
955 kdsp_actual->kds_bufindx = 0;
956
957 if (kdbp->kd_list_head.raw == KDS_PTR_NULL)
958 kdbp->kd_list_head = kdsp;
959 else
960 POINTER_FROM_KDS_PTR(kdbp->kd_list_tail)->kds_next = kdsp;
961 kdbp->kd_list_tail = kdsp;
962 out:
963 lck_spin_unlock(kds_spin_lock);
964 ml_set_interrupts_enabled(s);
965
966 return (retval);
967 }
968
969 int
970 kernel_debug_register_callback(kd_callback_t callback)
971 {
972 kd_iop_t* iop;
973 if (kmem_alloc(kernel_map, (vm_offset_t *)&iop, sizeof(kd_iop_t), VM_KERN_MEMORY_DIAG) == KERN_SUCCESS) {
974 memcpy(&iop->callback, &callback, sizeof(kd_callback_t));
975
976 /*
977 * <rdar://problem/13351477> Some IOP clients are not providing a name.
978 *
979 * Remove when fixed.
980 */
981 {
982 boolean_t is_valid_name = FALSE;
983 for (uint32_t length=0; length<sizeof(callback.iop_name); ++length) {
984 /* This is roughly isprintable(c) */
985 if (callback.iop_name[length] > 0x20 && callback.iop_name[length] < 0x7F)
986 continue;
987 if (callback.iop_name[length] == 0) {
988 if (length)
989 is_valid_name = TRUE;
990 break;
991 }
992 }
993
994 if (!is_valid_name) {
995 strlcpy(iop->callback.iop_name, "IOP-???", sizeof(iop->callback.iop_name));
996 }
997 }
998
999 iop->last_timestamp = 0;
1000
1001 do {
1002 /*
1003 * We use two pieces of state, the old list head
1004 * pointer, and the value of old_list_head->cpu_id.
1005 * If we read kd_iops more than once, it can change
1006 * between reads.
1007 *
1008 * TLDR; Must not read kd_iops more than once per loop.
1009 */
1010 iop->next = kd_iops;
1011 iop->cpu_id = iop->next ? (iop->next->cpu_id+1) : kdbg_cpu_count(FALSE);
1012
1013 /*
1014 * Header says OSCompareAndSwapPtr has a memory barrier
1015 */
1016 } while (!OSCompareAndSwapPtr(iop->next, iop, (void* volatile*)&kd_iops));
1017
1018 return iop->cpu_id;
1019 }
1020
1021 return 0;
1022 }
1023
1024 void
1025 kernel_debug_enter(
1026 uint32_t coreid,
1027 uint32_t debugid,
1028 uint64_t timestamp,
1029 uintptr_t arg1,
1030 uintptr_t arg2,
1031 uintptr_t arg3,
1032 uintptr_t arg4,
1033 uintptr_t threadid
1034 )
1035 {
1036 uint32_t bindx;
1037 kd_buf *kd;
1038 struct kd_bufinfo *kdbp;
1039 struct kd_storage *kdsp_actual;
1040 union kds_ptr kds_raw;
1041
1042 if (kd_ctrl_page.kdebug_slowcheck) {
1043
1044 if ( (kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) || !(kdebug_enable & (KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_PPT)))
1045 goto out1;
1046
1047 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
1048 if (typefilter_is_debugid_allowed(kdbg_typefilter, debugid))
1049 goto record_event;
1050 goto out1;
1051 }
1052 else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
1053 if (debugid >= kdlog_beg && debugid <= kdlog_end)
1054 goto record_event;
1055 goto out1;
1056 }
1057 else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
1058 if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 &&
1059 (debugid & KDBG_EVENTID_MASK) != kdlog_value2 &&
1060 (debugid & KDBG_EVENTID_MASK) != kdlog_value3 &&
1061 (debugid & KDBG_EVENTID_MASK) != kdlog_value4)
1062 goto out1;
1063 }
1064 }
1065
1066 record_event:
1067 if (timestamp < kd_ctrl_page.oldest_time) {
1068 goto out1;
1069 }
1070
1071 #if CONFIG_EMBEDDED
1072 /*
1073 * When start_kern_tracing is called by the kernel to trace very
1074 * early kernel events, it saves data to a secondary buffer until
1075 * it is possible to initialize ktrace, and then dumps the events
1076 * into the ktrace buffer using this method. In this case, iops will
1077 * be NULL, and the coreid will be zero. It is not possible to have
1078 * a valid IOP coreid of zero, so pass if both iops is NULL and coreid
1079 * is zero.
1080 */
1081 assert(kdbg_iop_list_contains_cpu_id(kd_ctrl_page.kdebug_iops, coreid) || (kd_ctrl_page.kdebug_iops == NULL && coreid == 0));
1082 #endif
1083
1084 disable_preemption();
1085
1086 if (kd_ctrl_page.enabled == 0)
1087 goto out;
1088
1089 kdbp = &kdbip[coreid];
1090 timestamp &= KDBG_TIMESTAMP_MASK;
1091
1092 #if KDEBUG_MOJO_TRACE
1093 if (kdebug_enable & KDEBUG_ENABLE_SERIAL)
1094 kdebug_serial_print(coreid, debugid, timestamp,
1095 arg1, arg2, arg3, arg4, threadid);
1096 #endif
1097
1098 retry_q:
1099 kds_raw = kdbp->kd_list_tail;
1100
1101 if (kds_raw.raw != KDS_PTR_NULL) {
1102 kdsp_actual = POINTER_FROM_KDS_PTR(kds_raw);
1103 bindx = kdsp_actual->kds_bufindx;
1104 } else {
1105 kdsp_actual = NULL;
1106 bindx = EVENTS_PER_STORAGE_UNIT;
1107 }
1108
1109 if (kdsp_actual == NULL || bindx >= EVENTS_PER_STORAGE_UNIT) {
1110 if (allocate_storage_unit(coreid) == FALSE) {
1111 /*
1112 * this can only happen if wrapping
1113 * has been disabled
1114 */
1115 goto out;
1116 }
1117 goto retry_q;
1118 }
1119 if ( !OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx))
1120 goto retry_q;
1121
1122 // IOP entries can be allocated before xnu allocates and inits the buffer
1123 if (timestamp < kdsp_actual->kds_timestamp)
1124 kdsp_actual->kds_timestamp = timestamp;
1125
1126 kd = &kdsp_actual->kds_records[bindx];
1127
1128 kd->debugid = debugid;
1129 kd->arg1 = arg1;
1130 kd->arg2 = arg2;
1131 kd->arg3 = arg3;
1132 kd->arg4 = arg4;
1133 kd->arg5 = threadid;
1134
1135 kdbg_set_timestamp_and_cpu(kd, timestamp, coreid);
1136
1137 OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
1138 out:
1139 enable_preemption();
1140 out1:
1141 if ((kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold)) {
1142 kdbg_wakeup();
1143 }
1144 }
1145
1146 /*
1147 * Check if the given debug ID is allowed to be traced on the current process.
1148 *
1149 * Returns true if allowed and false otherwise.
1150 */
1151 static inline bool
1152 kdebug_debugid_procfilt_allowed(uint32_t debugid)
1153 {
1154 uint32_t procfilt_flags = kd_ctrl_page.kdebug_flags &
1155 (KDBG_PIDCHECK | KDBG_PIDEXCLUDE);
1156
1157 if (!procfilt_flags) {
1158 return true;
1159 }
1160
1161 /*
1162 * DBG_TRACE and MACH_SCHED tracepoints ignore the process filter.
1163 */
1164 if ((debugid & 0xffff0000) == MACHDBG_CODE(DBG_MACH_SCHED, 0) ||
1165 (debugid >> 24 == DBG_TRACE)) {
1166 return true;
1167 }
1168
1169 struct proc *curproc = current_proc();
1170 /*
1171 * If the process is missing (early in boot), allow it.
1172 */
1173 if (!curproc) {
1174 return true;
1175 }
1176
1177 if (procfilt_flags & KDBG_PIDCHECK) {
1178 /*
1179 * Allow only processes marked with the kdebug bit.
1180 */
1181 return curproc->p_kdebug;
1182 } else if (procfilt_flags & KDBG_PIDEXCLUDE) {
1183 /*
1184 * Exclude any process marked with the kdebug bit.
1185 */
1186 return !curproc->p_kdebug;
1187 } else {
1188 panic("kdebug: invalid procfilt flags %x", kd_ctrl_page.kdebug_flags);
1189 __builtin_unreachable();
1190 }
1191 }
1192
1193 static void
1194 kernel_debug_internal(
1195 uint32_t debugid,
1196 uintptr_t arg1,
1197 uintptr_t arg2,
1198 uintptr_t arg3,
1199 uintptr_t arg4,
1200 uintptr_t arg5,
1201 uint64_t flags)
1202 {
1203 uint64_t now;
1204 uint32_t bindx;
1205 kd_buf *kd;
1206 int cpu;
1207 struct kd_bufinfo *kdbp;
1208 struct kd_storage *kdsp_actual;
1209 union kds_ptr kds_raw;
1210 bool only_filter = flags & KDBG_FLAG_FILTERED;
1211 bool observe_procfilt = !(flags & KDBG_FLAG_NOPROCFILT);
1212
1213 if (kd_ctrl_page.kdebug_slowcheck) {
1214 if ((kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) ||
1215 !(kdebug_enable & (KDEBUG_ENABLE_TRACE | KDEBUG_ENABLE_PPT)))
1216 {
1217 goto out1;
1218 }
1219
1220 if (!ml_at_interrupt_context() && observe_procfilt &&
1221 !kdebug_debugid_procfilt_allowed(debugid)) {
1222 goto out1;
1223 }
1224
1225 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
1226 if (typefilter_is_debugid_allowed(kdbg_typefilter, debugid))
1227 goto record_event;
1228
1229 goto out1;
1230 } else if (only_filter) {
1231 goto out1;
1232 }
1233 else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
1234 /* Always record trace system info */
1235 if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE)
1236 goto record_event;
1237
1238 if (debugid < kdlog_beg || debugid > kdlog_end)
1239 goto out1;
1240 }
1241 else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
1242 /* Always record trace system info */
1243 if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE)
1244 goto record_event;
1245
1246 if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 &&
1247 (debugid & KDBG_EVENTID_MASK) != kdlog_value2 &&
1248 (debugid & KDBG_EVENTID_MASK) != kdlog_value3 &&
1249 (debugid & KDBG_EVENTID_MASK) != kdlog_value4)
1250 goto out1;
1251 }
1252 } else if (only_filter) {
1253 goto out1;
1254 }
1255
1256 record_event:
1257 disable_preemption();
1258
1259 if (kd_ctrl_page.enabled == 0)
1260 goto out;
1261
1262 cpu = cpu_number();
1263 kdbp = &kdbip[cpu];
1264
1265 #if KDEBUG_MOJO_TRACE
1266 if (kdebug_enable & KDEBUG_ENABLE_SERIAL)
1267 kdebug_serial_print(cpu, debugid,
1268 kdbg_timestamp() & KDBG_TIMESTAMP_MASK,
1269 arg1, arg2, arg3, arg4, arg5);
1270 #endif
1271
1272 retry_q:
1273 kds_raw = kdbp->kd_list_tail;
1274
1275 if (kds_raw.raw != KDS_PTR_NULL) {
1276 kdsp_actual = POINTER_FROM_KDS_PTR(kds_raw);
1277 bindx = kdsp_actual->kds_bufindx;
1278 } else {
1279 kdsp_actual = NULL;
1280 bindx = EVENTS_PER_STORAGE_UNIT;
1281 }
1282
1283 if (kdsp_actual == NULL || bindx >= EVENTS_PER_STORAGE_UNIT) {
1284 if (allocate_storage_unit(cpu) == FALSE) {
1285 /*
1286 * this can only happen if wrapping
1287 * has been disabled
1288 */
1289 goto out;
1290 }
1291 goto retry_q;
1292 }
1293
1294 now = kdbg_timestamp() & KDBG_TIMESTAMP_MASK;
1295
1296 if ( !OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx))
1297 goto retry_q;
1298
1299 kd = &kdsp_actual->kds_records[bindx];
1300
1301 kd->debugid = debugid;
1302 kd->arg1 = arg1;
1303 kd->arg2 = arg2;
1304 kd->arg3 = arg3;
1305 kd->arg4 = arg4;
1306 kd->arg5 = arg5;
1307
1308 kdbg_set_timestamp_and_cpu(kd, now, cpu);
1309
1310 OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
1311
1312 #if KPERF
1313 kperf_kdebug_callback(debugid, __builtin_frame_address(0));
1314 #endif
1315 out:
1316 enable_preemption();
1317 out1:
1318 if (kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold) {
1319 uint32_t etype;
1320 uint32_t stype;
1321
1322 etype = debugid & KDBG_EVENTID_MASK;
1323 stype = debugid & KDBG_CSC_MASK;
1324
1325 if (etype == INTERRUPT || etype == MACH_vmfault ||
1326 stype == BSC_SysCall || stype == MACH_SysCall) {
1327 kdbg_wakeup();
1328 }
1329 }
1330 }
1331
1332 void
1333 kernel_debug(
1334 uint32_t debugid,
1335 uintptr_t arg1,
1336 uintptr_t arg2,
1337 uintptr_t arg3,
1338 uintptr_t arg4,
1339 __unused uintptr_t arg5)
1340 {
1341 kernel_debug_internal(debugid, arg1, arg2, arg3, arg4,
1342 (uintptr_t)thread_tid(current_thread()), 0);
1343 }
1344
1345 void
1346 kernel_debug1(
1347 uint32_t debugid,
1348 uintptr_t arg1,
1349 uintptr_t arg2,
1350 uintptr_t arg3,
1351 uintptr_t arg4,
1352 uintptr_t arg5)
1353 {
1354 kernel_debug_internal(debugid, arg1, arg2, arg3, arg4, arg5, 0);
1355 }
1356
1357 void
1358 kernel_debug_flags(
1359 uint32_t debugid,
1360 uintptr_t arg1,
1361 uintptr_t arg2,
1362 uintptr_t arg3,
1363 uintptr_t arg4,
1364 uint64_t flags)
1365 {
1366 kernel_debug_internal(debugid, arg1, arg2, arg3, arg4,
1367 (uintptr_t)thread_tid(current_thread()), flags);
1368 }
1369
1370 void
1371 kernel_debug_filtered(
1372 uint32_t debugid,
1373 uintptr_t arg1,
1374 uintptr_t arg2,
1375 uintptr_t arg3,
1376 uintptr_t arg4)
1377 {
1378 kernel_debug_flags(debugid, arg1, arg2, arg3, arg4, KDBG_FLAG_FILTERED);
1379 }
1380
1381 void
1382 kernel_debug_string_early(const char *message)
1383 {
1384 uintptr_t arg[4] = {0, 0, 0, 0};
1385
1386 /* Stuff the message string in the args and log it. */
1387 strncpy((char *)arg, message, MIN(sizeof(arg), strlen(message)));
1388 KERNEL_DEBUG_EARLY(
1389 TRACE_INFO_STRING,
1390 arg[0], arg[1], arg[2], arg[3]);
1391 }
1392
1393 #define SIMPLE_STR_LEN (64)
1394 static_assert(SIMPLE_STR_LEN % sizeof(uintptr_t) == 0);
1395
1396 void
1397 kernel_debug_string_simple(uint32_t eventid, const char *str)
1398 {
1399 if (!kdebug_enable) {
1400 return;
1401 }
1402
1403 /* array of uintptr_ts simplifies emitting the string as arguments */
1404 uintptr_t str_buf[(SIMPLE_STR_LEN / sizeof(uintptr_t)) + 1] = { 0 };
1405 size_t len = strlcpy((char *)str_buf, str, SIMPLE_STR_LEN + 1);
1406
1407 uintptr_t thread_id = (uintptr_t)thread_tid(current_thread());
1408 uint32_t debugid = eventid | DBG_FUNC_START;
1409
1410 /* string can fit in a single tracepoint */
1411 if (len <= (4 * sizeof(uintptr_t))) {
1412 debugid |= DBG_FUNC_END;
1413 }
1414
1415 kernel_debug_internal(debugid, str_buf[0],
1416 str_buf[1],
1417 str_buf[2],
1418 str_buf[3], thread_id, 0);
1419
1420 debugid &= KDBG_EVENTID_MASK;
1421 int i = 4;
1422 size_t written = 4 * sizeof(uintptr_t);
1423
1424 for (; written < len; i += 4, written += 4 * sizeof(uintptr_t)) {
1425 /* if this is the last tracepoint to be emitted */
1426 if ((written + (4 * sizeof(uintptr_t))) >= len) {
1427 debugid |= DBG_FUNC_END;
1428 }
1429 kernel_debug_internal(debugid, str_buf[i],
1430 str_buf[i + 1],
1431 str_buf[i + 2],
1432 str_buf[i + 3], thread_id, 0);
1433 }
1434 }
1435
1436 extern int master_cpu; /* MACH_KERNEL_PRIVATE */
1437 /*
1438 * Used prior to start_kern_tracing() being called.
1439 * Log temporarily into a static buffer.
1440 */
1441 void
1442 kernel_debug_early(
1443 uint32_t debugid,
1444 uintptr_t arg1,
1445 uintptr_t arg2,
1446 uintptr_t arg3,
1447 uintptr_t arg4)
1448 {
1449 /* If early tracing is over, use the normal path. */
1450 if (kd_early_done) {
1451 KERNEL_DEBUG_CONSTANT(debugid, arg1, arg2, arg3, arg4, 0);
1452 return;
1453 }
1454
1455 /* Do nothing if the buffer is full or we're not on the boot cpu. */
1456 kd_early_overflow = kd_early_index >= KD_EARLY_BUFFER_NBUFS;
1457 if (kd_early_overflow || cpu_number() != master_cpu) {
1458 return;
1459 }
1460
1461 kd_early_buffer[kd_early_index].debugid = debugid;
1462 kd_early_buffer[kd_early_index].timestamp = mach_absolute_time();
1463 kd_early_buffer[kd_early_index].arg1 = arg1;
1464 kd_early_buffer[kd_early_index].arg2 = arg2;
1465 kd_early_buffer[kd_early_index].arg3 = arg3;
1466 kd_early_buffer[kd_early_index].arg4 = arg4;
1467 kd_early_buffer[kd_early_index].arg5 = 0;
1468 kd_early_index++;
1469 }
1470
1471 /*
1472 * Transfer the contents of the temporary buffer into the trace buffers.
1473 * Precede that by logging the rebase time (offset) - the TSC-based time (in ns)
1474 * when mach_absolute_time is set to 0.
1475 */
1476 static void
1477 kernel_debug_early_end(void)
1478 {
1479 if (cpu_number() != master_cpu) {
1480 panic("kernel_debug_early_end() not call on boot processor");
1481 }
1482
1483 /* reset the current oldest time to allow early events */
1484 kd_ctrl_page.oldest_time = 0;
1485
1486 #if !CONFIG_EMBEDDED
1487 /* Fake sentinel marking the start of kernel time relative to TSC */
1488 kernel_debug_enter(0,
1489 TRACE_TIMESTAMPS,
1490 0,
1491 (uint32_t)(tsc_rebase_abs_time >> 32),
1492 (uint32_t)tsc_rebase_abs_time,
1493 tsc_at_boot,
1494 0,
1495 0);
1496 #endif
1497 for (unsigned int i = 0; i < kd_early_index; i++) {
1498 kernel_debug_enter(0,
1499 kd_early_buffer[i].debugid,
1500 kd_early_buffer[i].timestamp,
1501 kd_early_buffer[i].arg1,
1502 kd_early_buffer[i].arg2,
1503 kd_early_buffer[i].arg3,
1504 kd_early_buffer[i].arg4,
1505 0);
1506 }
1507
1508 /* Cut events-lost event on overflow */
1509 if (kd_early_overflow) {
1510 KDBG_RELEASE(TRACE_LOST_EVENTS, 1);
1511 }
1512
1513 kd_early_done = true;
1514
1515 /* This trace marks the start of kernel tracing */
1516 kernel_debug_string_early("early trace done");
1517 }
1518
1519 void
1520 kernel_debug_disable(void)
1521 {
1522 if (kdebug_enable) {
1523 kdbg_set_tracing_enabled(FALSE, 0);
1524 }
1525 }
1526
1527 /*
1528 * Returns non-zero if debugid is in a reserved class.
1529 */
1530 static int
1531 kdebug_validate_debugid(uint32_t debugid)
1532 {
1533 uint8_t debugid_class;
1534
1535 debugid_class = KDBG_EXTRACT_CLASS(debugid);
1536 switch (debugid_class) {
1537 case DBG_TRACE:
1538 return EPERM;
1539 }
1540
1541 return 0;
1542 }
1543
1544 /*
1545 * Support syscall SYS_kdebug_typefilter.
1546 */
1547 int
1548 kdebug_typefilter(__unused struct proc* p,
1549 struct kdebug_typefilter_args* uap,
1550 __unused int *retval)
1551 {
1552 int ret = KERN_SUCCESS;
1553
1554 if (uap->addr == USER_ADDR_NULL ||
1555 uap->size == USER_ADDR_NULL) {
1556 return EINVAL;
1557 }
1558
1559 /*
1560 * The atomic load is to close a race window with setting the typefilter
1561 * and memory entry values. A description follows:
1562 *
1563 * Thread 1 (writer)
1564 *
1565 * Allocate Typefilter
1566 * Allocate MemoryEntry
1567 * Write Global MemoryEntry Ptr
1568 * Atomic Store (Release) Global Typefilter Ptr
1569 *
1570 * Thread 2 (reader, AKA us)
1571 *
1572 * if ((Atomic Load (Acquire) Global Typefilter Ptr) == NULL)
1573 * return;
1574 *
1575 * Without the atomic store, it isn't guaranteed that the write of
1576 * Global MemoryEntry Ptr is visible before we can see the write of
1577 * Global Typefilter Ptr.
1578 *
1579 * Without the atomic load, it isn't guaranteed that the loads of
1580 * Global MemoryEntry Ptr aren't speculated.
1581 *
1582 * The global pointers transition from NULL -> valid once and only once,
1583 * and never change after becoming valid. This means that having passed
1584 * the first atomic load test of Global Typefilter Ptr, this function
1585 * can then safely use the remaining global state without atomic checks.
1586 */
1587 if (!__c11_atomic_load((_Atomic typefilter_t *)&kdbg_typefilter, memory_order_acquire)) {
1588 return EINVAL;
1589 }
1590
1591 assert(kdbg_typefilter_memory_entry);
1592
1593 mach_vm_offset_t user_addr = 0;
1594 vm_map_t user_map = current_map();
1595
1596 ret = mach_to_bsd_errno(
1597 mach_vm_map_kernel(user_map, // target map
1598 &user_addr, // [in, out] target address
1599 TYPEFILTER_ALLOC_SIZE, // initial size
1600 0, // mask (alignment?)
1601 VM_FLAGS_ANYWHERE, // flags
1602 VM_MAP_KERNEL_FLAGS_NONE,
1603 VM_KERN_MEMORY_NONE,
1604 kdbg_typefilter_memory_entry, // port (memory entry!)
1605 0, // offset (in memory entry)
1606 FALSE, // should copy
1607 VM_PROT_READ, // cur_prot
1608 VM_PROT_READ, // max_prot
1609 VM_INHERIT_SHARE)); // inherit behavior on fork
1610
1611 if (ret == KERN_SUCCESS) {
1612 vm_size_t user_ptr_size = vm_map_is_64bit(user_map) ? 8 : 4;
1613 ret = copyout(CAST_DOWN(void *, &user_addr), uap->addr, user_ptr_size );
1614
1615 if (ret != KERN_SUCCESS) {
1616 mach_vm_deallocate(user_map, user_addr, TYPEFILTER_ALLOC_SIZE);
1617 }
1618 }
1619
1620 return ret;
1621 }
1622
1623 /*
1624 * Support syscall SYS_kdebug_trace. U64->K32 args may get truncated in kdebug_trace64
1625 */
1626 int
1627 kdebug_trace(struct proc *p, struct kdebug_trace_args *uap, int32_t *retval)
1628 {
1629 struct kdebug_trace64_args uap64;
1630
1631 uap64.code = uap->code;
1632 uap64.arg1 = uap->arg1;
1633 uap64.arg2 = uap->arg2;
1634 uap64.arg3 = uap->arg3;
1635 uap64.arg4 = uap->arg4;
1636
1637 return kdebug_trace64(p, &uap64, retval);
1638 }
1639
1640 /*
1641 * Support syscall SYS_kdebug_trace64. 64-bit args on K32 will get truncated
1642 * to fit in 32-bit record format.
1643 *
1644 * It is intentional that error conditions are not checked until kdebug is
1645 * enabled. This is to match the userspace wrapper behavior, which is optimizing
1646 * for non-error case performance.
1647 */
1648 int kdebug_trace64(__unused struct proc *p, struct kdebug_trace64_args *uap, __unused int32_t *retval)
1649 {
1650 int err;
1651
1652 if ( __probable(kdebug_enable == 0) )
1653 return(0);
1654
1655 if ((err = kdebug_validate_debugid(uap->code)) != 0) {
1656 return err;
1657 }
1658
1659 kernel_debug_internal(uap->code, (uintptr_t)uap->arg1,
1660 (uintptr_t)uap->arg2, (uintptr_t)uap->arg3, (uintptr_t)uap->arg4,
1661 (uintptr_t)thread_tid(current_thread()), 0);
1662
1663 return(0);
1664 }
1665
1666 /*
1667 * Adding enough padding to contain a full tracepoint for the last
1668 * portion of the string greatly simplifies the logic of splitting the
1669 * string between tracepoints. Full tracepoints can be generated using
1670 * the buffer itself, without having to manually add zeros to pad the
1671 * arguments.
1672 */
1673
1674 /* 2 string args in first tracepoint and 9 string data tracepoints */
1675 #define STR_BUF_ARGS (2 + (9 * 4))
1676 /* times the size of each arg on K64 */
1677 #define MAX_STR_LEN (STR_BUF_ARGS * sizeof(uint64_t))
1678 /* on K32, ending straddles a tracepoint, so reserve blanks */
1679 #define STR_BUF_SIZE (MAX_STR_LEN + (2 * sizeof(uint32_t)))
1680
1681 /*
1682 * This function does no error checking and assumes that it is called with
1683 * the correct arguments, including that the buffer pointed to by str is at
1684 * least STR_BUF_SIZE bytes. However, str must be aligned to word-size and
1685 * be NUL-terminated. In cases where a string can fit evenly into a final
1686 * tracepoint without its NUL-terminator, this function will not end those
1687 * strings with a NUL in trace. It's up to clients to look at the function
1688 * qualifier for DBG_FUNC_END in this case, to end the string.
1689 */
1690 static uint64_t
1691 kernel_debug_string_internal(uint32_t debugid, uint64_t str_id, void *vstr,
1692 size_t str_len)
1693 {
1694 /* str must be word-aligned */
1695 uintptr_t *str = vstr;
1696 size_t written = 0;
1697 uintptr_t thread_id;
1698 int i;
1699 uint32_t trace_debugid = TRACEDBG_CODE(DBG_TRACE_STRING,
1700 TRACE_STRING_GLOBAL);
1701
1702 thread_id = (uintptr_t)thread_tid(current_thread());
1703
1704 /* if the ID is being invalidated, just emit that */
1705 if (str_id != 0 && str_len == 0) {
1706 kernel_debug_internal(trace_debugid | DBG_FUNC_START | DBG_FUNC_END,
1707 (uintptr_t)debugid, (uintptr_t)str_id, 0, 0, thread_id, 0);
1708 return str_id;
1709 }
1710
1711 /* generate an ID, if necessary */
1712 if (str_id == 0) {
1713 str_id = OSIncrementAtomic64((SInt64 *)&g_curr_str_id);
1714 str_id = (str_id & STR_ID_MASK) | g_str_id_signature;
1715 }
1716
1717 trace_debugid |= DBG_FUNC_START;
1718 /* string can fit in a single tracepoint */
1719 if (str_len <= (2 * sizeof(uintptr_t))) {
1720 trace_debugid |= DBG_FUNC_END;
1721 }
1722
1723 kernel_debug_internal(trace_debugid, (uintptr_t)debugid, (uintptr_t)str_id,
1724 str[0], str[1], thread_id, 0);
1725
1726 trace_debugid &= KDBG_EVENTID_MASK;
1727 i = 2;
1728 written += 2 * sizeof(uintptr_t);
1729
1730 for (; written < str_len; i += 4, written += 4 * sizeof(uintptr_t)) {
1731 if ((written + (4 * sizeof(uintptr_t))) >= str_len) {
1732 trace_debugid |= DBG_FUNC_END;
1733 }
1734 kernel_debug_internal(trace_debugid, str[i],
1735 str[i + 1],
1736 str[i + 2],
1737 str[i + 3], thread_id, 0);
1738 }
1739
1740 return str_id;
1741 }
1742
1743 /*
1744 * Returns true if the current process can emit events, and false otherwise.
1745 * Trace system and scheduling events circumvent this check, as do events
1746 * emitted in interrupt context.
1747 */
1748 static boolean_t
1749 kdebug_current_proc_enabled(uint32_t debugid)
1750 {
1751 /* can't determine current process in interrupt context */
1752 if (ml_at_interrupt_context()) {
1753 return TRUE;
1754 }
1755
1756 /* always emit trace system and scheduling events */
1757 if ((KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE ||
1758 (debugid & KDBG_CSC_MASK) == MACHDBG_CODE(DBG_MACH_SCHED, 0)))
1759 {
1760 return TRUE;
1761 }
1762
1763 if (kd_ctrl_page.kdebug_flags & KDBG_PIDCHECK) {
1764 proc_t cur_proc = current_proc();
1765
1766 /* only the process with the kdebug bit set is allowed */
1767 if (cur_proc && !(cur_proc->p_kdebug)) {
1768 return FALSE;
1769 }
1770 } else if (kd_ctrl_page.kdebug_flags & KDBG_PIDEXCLUDE) {
1771 proc_t cur_proc = current_proc();
1772
1773 /* every process except the one with the kdebug bit set is allowed */
1774 if (cur_proc && cur_proc->p_kdebug) {
1775 return FALSE;
1776 }
1777 }
1778
1779 return TRUE;
1780 }
1781
1782 boolean_t
1783 kdebug_debugid_enabled(uint32_t debugid)
1784 {
1785 /* if no filtering is enabled */
1786 if (!kd_ctrl_page.kdebug_slowcheck) {
1787 return TRUE;
1788 }
1789
1790 return kdebug_debugid_explicitly_enabled(debugid);
1791 }
1792
1793 boolean_t
1794 kdebug_debugid_explicitly_enabled(uint32_t debugid)
1795 {
1796 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
1797 return typefilter_is_debugid_allowed(kdbg_typefilter, debugid);
1798 } else if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE) {
1799 return TRUE;
1800 } else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
1801 if (debugid < kdlog_beg || debugid > kdlog_end) {
1802 return FALSE;
1803 }
1804 } else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
1805 if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 &&
1806 (debugid & KDBG_EVENTID_MASK) != kdlog_value2 &&
1807 (debugid & KDBG_EVENTID_MASK) != kdlog_value3 &&
1808 (debugid & KDBG_EVENTID_MASK) != kdlog_value4)
1809 {
1810 return FALSE;
1811 }
1812 }
1813
1814 return TRUE;
1815 }
1816
1817 /*
1818 * Returns 0 if a string can be traced with these arguments. Returns errno
1819 * value if error occurred.
1820 */
1821 static errno_t
1822 kdebug_check_trace_string(uint32_t debugid, uint64_t str_id)
1823 {
1824 /* if there are function qualifiers on the debugid */
1825 if (debugid & ~KDBG_EVENTID_MASK) {
1826 return EINVAL;
1827 }
1828
1829 if (kdebug_validate_debugid(debugid)) {
1830 return EPERM;
1831 }
1832
1833 if (str_id != 0 && (str_id & STR_ID_SIG_MASK) != g_str_id_signature) {
1834 return EINVAL;
1835 }
1836
1837 return 0;
1838 }
1839
1840 /*
1841 * Implementation of KPI kernel_debug_string.
1842 */
1843 int
1844 kernel_debug_string(uint32_t debugid, uint64_t *str_id, const char *str)
1845 {
1846 /* arguments to tracepoints must be word-aligned */
1847 __attribute__((aligned(sizeof(uintptr_t)))) char str_buf[STR_BUF_SIZE];
1848 static_assert(sizeof(str_buf) > MAX_STR_LEN);
1849 vm_size_t len_copied;
1850 int err;
1851
1852 assert(str_id);
1853
1854 if (__probable(kdebug_enable == 0)) {
1855 return 0;
1856 }
1857
1858 if (!kdebug_current_proc_enabled(debugid)) {
1859 return 0;
1860 }
1861
1862 if (!kdebug_debugid_enabled(debugid)) {
1863 return 0;
1864 }
1865
1866 if ((err = kdebug_check_trace_string(debugid, *str_id)) != 0) {
1867 return err;
1868 }
1869
1870 if (str == NULL) {
1871 if (str_id == 0) {
1872 return EINVAL;
1873 }
1874
1875 *str_id = kernel_debug_string_internal(debugid, *str_id, NULL, 0);
1876 return 0;
1877 }
1878
1879 memset(str_buf, 0, sizeof(str_buf));
1880 len_copied = strlcpy(str_buf, str, MAX_STR_LEN + 1);
1881 *str_id = kernel_debug_string_internal(debugid, *str_id, str_buf,
1882 len_copied);
1883 return 0;
1884 }
1885
1886 /*
1887 * Support syscall kdebug_trace_string.
1888 */
1889 int
1890 kdebug_trace_string(__unused struct proc *p,
1891 struct kdebug_trace_string_args *uap,
1892 uint64_t *retval)
1893 {
1894 __attribute__((aligned(sizeof(uintptr_t)))) char str_buf[STR_BUF_SIZE];
1895 static_assert(sizeof(str_buf) > MAX_STR_LEN);
1896 size_t len_copied;
1897 int err;
1898
1899 if (__probable(kdebug_enable == 0)) {
1900 return 0;
1901 }
1902
1903 if (!kdebug_current_proc_enabled(uap->debugid)) {
1904 return 0;
1905 }
1906
1907 if (!kdebug_debugid_enabled(uap->debugid)) {
1908 return 0;
1909 }
1910
1911 if ((err = kdebug_check_trace_string(uap->debugid, uap->str_id)) != 0) {
1912 return err;
1913 }
1914
1915 if (uap->str == USER_ADDR_NULL) {
1916 if (uap->str_id == 0) {
1917 return EINVAL;
1918 }
1919
1920 *retval = kernel_debug_string_internal(uap->debugid, uap->str_id,
1921 NULL, 0);
1922 return 0;
1923 }
1924
1925 memset(str_buf, 0, sizeof(str_buf));
1926 err = copyinstr(uap->str, str_buf, MAX_STR_LEN + 1, &len_copied);
1927
1928 /* it's alright to truncate the string, so allow ENAMETOOLONG */
1929 if (err == ENAMETOOLONG) {
1930 str_buf[MAX_STR_LEN] = '\0';
1931 } else if (err) {
1932 return err;
1933 }
1934
1935 if (len_copied <= 1) {
1936 return EINVAL;
1937 }
1938
1939 /* convert back to a length */
1940 len_copied--;
1941
1942 *retval = kernel_debug_string_internal(uap->debugid, uap->str_id, str_buf,
1943 len_copied);
1944 return 0;
1945 }
1946
1947 static void
1948 kdbg_lock_init(void)
1949 {
1950 static lck_grp_attr_t *kdebug_lck_grp_attr = NULL;
1951 static lck_grp_t *kdebug_lck_grp = NULL;
1952 static lck_attr_t *kdebug_lck_attr = NULL;
1953
1954 if (kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT) {
1955 return;
1956 }
1957
1958 assert(kdebug_lck_grp_attr == NULL);
1959 kdebug_lck_grp_attr = lck_grp_attr_alloc_init();
1960 kdebug_lck_grp = lck_grp_alloc_init("kdebug", kdebug_lck_grp_attr);
1961 kdebug_lck_attr = lck_attr_alloc_init();
1962
1963 kds_spin_lock = lck_spin_alloc_init(kdebug_lck_grp, kdebug_lck_attr);
1964 kdw_spin_lock = lck_spin_alloc_init(kdebug_lck_grp, kdebug_lck_attr);
1965
1966 kd_ctrl_page.kdebug_flags |= KDBG_LOCKINIT;
1967 }
1968
1969 int
1970 kdbg_bootstrap(boolean_t early_trace)
1971 {
1972 kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
1973
1974 return (create_buffers(early_trace));
1975 }
1976
1977 int
1978 kdbg_reinit(boolean_t early_trace)
1979 {
1980 int ret = 0;
1981
1982 /*
1983 * Disable trace collecting
1984 * First make sure we're not in
1985 * the middle of cutting a trace
1986 */
1987 kernel_debug_disable();
1988
1989 /*
1990 * make sure the SLOW_NOLOG is seen
1991 * by everyone that might be trying
1992 * to cut a trace..
1993 */
1994 IOSleep(100);
1995
1996 delete_buffers();
1997
1998 kdbg_clear_thread_map();
1999 ret = kdbg_bootstrap(early_trace);
2000
2001 RAW_file_offset = 0;
2002 RAW_file_written = 0;
2003
2004 return(ret);
2005 }
2006
2007 void
2008 kdbg_trace_data(struct proc *proc, long *arg_pid, long *arg_uniqueid)
2009 {
2010 if (!proc) {
2011 *arg_pid = 0;
2012 *arg_uniqueid = 0;
2013 } else {
2014 *arg_pid = proc->p_pid;
2015 *arg_uniqueid = proc->p_uniqueid;
2016 if ((uint64_t) *arg_uniqueid != proc->p_uniqueid) {
2017 *arg_uniqueid = 0;
2018 }
2019 }
2020 }
2021
2022
2023 void
2024 kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3, long *arg4)
2025 {
2026 char *dbg_nameptr;
2027 int dbg_namelen;
2028 long dbg_parms[4];
2029
2030 if (!proc) {
2031 *arg1 = 0;
2032 *arg2 = 0;
2033 *arg3 = 0;
2034 *arg4 = 0;
2035 return;
2036 }
2037 /*
2038 * Collect the pathname for tracing
2039 */
2040 dbg_nameptr = proc->p_comm;
2041 dbg_namelen = (int)strlen(proc->p_comm);
2042 dbg_parms[0]=0L;
2043 dbg_parms[1]=0L;
2044 dbg_parms[2]=0L;
2045 dbg_parms[3]=0L;
2046
2047 if(dbg_namelen > (int)sizeof(dbg_parms))
2048 dbg_namelen = (int)sizeof(dbg_parms);
2049
2050 strncpy((char *)dbg_parms, dbg_nameptr, dbg_namelen);
2051
2052 *arg1=dbg_parms[0];
2053 *arg2=dbg_parms[1];
2054 *arg3=dbg_parms[2];
2055 *arg4=dbg_parms[3];
2056 }
2057
2058 static void
2059 kdbg_resolve_map(thread_t th_act, void *opaque)
2060 {
2061 kd_threadmap *mapptr;
2062 krt_t *t = (krt_t *)opaque;
2063
2064 if (t->count < t->maxcount) {
2065 mapptr = &t->map[t->count];
2066 mapptr->thread = (uintptr_t)thread_tid(th_act);
2067
2068 (void) strlcpy (mapptr->command, t->atts->task_comm,
2069 sizeof(t->atts->task_comm));
2070 /*
2071 * Some kernel threads have no associated pid.
2072 * We still need to mark the entry as valid.
2073 */
2074 if (t->atts->pid)
2075 mapptr->valid = t->atts->pid;
2076 else
2077 mapptr->valid = 1;
2078
2079 t->count++;
2080 }
2081 }
2082
2083 /*
2084 *
2085 * Writes a cpumap for the given iops_list/cpu_count to the provided buffer.
2086 *
2087 * You may provide a buffer and size, or if you set the buffer to NULL, a
2088 * buffer of sufficient size will be allocated.
2089 *
2090 * If you provide a buffer and it is too small, sets cpumap_size to the number
2091 * of bytes required and returns EINVAL.
2092 *
2093 * On success, if you provided a buffer, cpumap_size is set to the number of
2094 * bytes written. If you did not provide a buffer, cpumap is set to the newly
2095 * allocated buffer and cpumap_size is set to the number of bytes allocated.
2096 *
2097 * NOTE: It may seem redundant to pass both iops and a cpu_count.
2098 *
2099 * We may be reporting data from "now", or from the "past".
2100 *
2101 * The "past" data would be for kdbg_readcpumap().
2102 *
2103 * If we do not pass both iops and cpu_count, and iops is NULL, this function
2104 * will need to read "now" state to get the number of cpus, which would be in
2105 * error if we were reporting "past" state.
2106 */
2107
2108 int
2109 kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count, uint8_t** cpumap, uint32_t* cpumap_size)
2110 {
2111 assert(cpumap);
2112 assert(cpumap_size);
2113 assert(cpu_count);
2114 assert(!iops || iops->cpu_id + 1 == cpu_count);
2115
2116 uint32_t bytes_needed = sizeof(kd_cpumap_header) + cpu_count * sizeof(kd_cpumap);
2117 uint32_t bytes_available = *cpumap_size;
2118 *cpumap_size = bytes_needed;
2119
2120 if (*cpumap == NULL) {
2121 if (kmem_alloc(kernel_map, (vm_offset_t*)cpumap, (vm_size_t)*cpumap_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
2122 return ENOMEM;
2123 }
2124 bzero(*cpumap, *cpumap_size);
2125 } else if (bytes_available < bytes_needed) {
2126 return EINVAL;
2127 }
2128
2129 kd_cpumap_header* header = (kd_cpumap_header*)(uintptr_t)*cpumap;
2130
2131 header->version_no = RAW_VERSION1;
2132 header->cpu_count = cpu_count;
2133
2134 kd_cpumap* cpus = (kd_cpumap*)&header[1];
2135
2136 int32_t index = cpu_count - 1;
2137 while (iops) {
2138 cpus[index].cpu_id = iops->cpu_id;
2139 cpus[index].flags = KDBG_CPUMAP_IS_IOP;
2140 strlcpy(cpus[index].name, iops->callback.iop_name, sizeof(cpus->name));
2141
2142 iops = iops->next;
2143 index--;
2144 }
2145
2146 while (index >= 0) {
2147 cpus[index].cpu_id = index;
2148 cpus[index].flags = 0;
2149 strlcpy(cpus[index].name, "AP", sizeof(cpus->name));
2150
2151 index--;
2152 }
2153
2154 return KERN_SUCCESS;
2155 }
2156
2157 void
2158 kdbg_thrmap_init(void)
2159 {
2160 ktrace_assert_lock_held();
2161
2162 if (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) {
2163 return;
2164 }
2165
2166 kd_mapptr = kdbg_thrmap_init_internal(0, &kd_mapsize, &kd_mapcount);
2167
2168 if (kd_mapptr) {
2169 kd_ctrl_page.kdebug_flags |= KDBG_MAPINIT;
2170 }
2171 }
2172
2173 static kd_threadmap *
2174 kdbg_thrmap_init_internal(unsigned int count, unsigned int *mapsize, unsigned int *mapcount)
2175 {
2176 kd_threadmap *mapptr;
2177 proc_t p;
2178 struct krt akrt;
2179 int tts_count = 0; /* number of task-to-string structures */
2180 struct tts *tts_mapptr;
2181 unsigned int tts_mapsize = 0;
2182 vm_offset_t kaddr;
2183
2184 assert(mapsize != NULL);
2185 assert(mapcount != NULL);
2186
2187 *mapcount = threads_count;
2188 tts_count = tasks_count;
2189
2190 /*
2191 * The proc count could change during buffer allocation,
2192 * so introduce a small fudge factor to bump up the
2193 * buffer sizes. This gives new tasks some chance of
2194 * making into the tables. Bump up by 25%.
2195 */
2196 *mapcount += *mapcount / 4;
2197 tts_count += tts_count / 4;
2198
2199 *mapsize = *mapcount * sizeof(kd_threadmap);
2200
2201 if (count && count < *mapcount) {
2202 return 0;
2203 }
2204
2205 if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)*mapsize, VM_KERN_MEMORY_DIAG) == KERN_SUCCESS)) {
2206 bzero((void *)kaddr, *mapsize);
2207 mapptr = (kd_threadmap *)kaddr;
2208 } else {
2209 return 0;
2210 }
2211
2212 tts_mapsize = tts_count * sizeof(struct tts);
2213
2214 if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)tts_mapsize, VM_KERN_MEMORY_DIAG) == KERN_SUCCESS)) {
2215 bzero((void *)kaddr, tts_mapsize);
2216 tts_mapptr = (struct tts *)kaddr;
2217 } else {
2218 kmem_free(kernel_map, (vm_offset_t)mapptr, *mapsize);
2219
2220 return 0;
2221 }
2222
2223 /*
2224 * Save the proc's name and take a reference for each task associated
2225 * with a valid process.
2226 */
2227 proc_list_lock();
2228
2229 int i = 0;
2230 ALLPROC_FOREACH(p) {
2231 if (i >= tts_count) {
2232 break;
2233 }
2234 if (p->p_lflag & P_LEXIT) {
2235 continue;
2236 }
2237 if (p->task) {
2238 task_reference(p->task);
2239 tts_mapptr[i].task = p->task;
2240 tts_mapptr[i].pid = p->p_pid;
2241 (void)strlcpy(tts_mapptr[i].task_comm, proc_best_name(p), sizeof(tts_mapptr[i].task_comm));
2242 i++;
2243 }
2244 }
2245 tts_count = i;
2246
2247 proc_list_unlock();
2248
2249 /*
2250 * Initialize thread map data
2251 */
2252 akrt.map = mapptr;
2253 akrt.count = 0;
2254 akrt.maxcount = *mapcount;
2255
2256 for (i = 0; i < tts_count; i++) {
2257 akrt.atts = &tts_mapptr[i];
2258 task_act_iterate_wth_args(tts_mapptr[i].task, kdbg_resolve_map, &akrt);
2259 task_deallocate((task_t)tts_mapptr[i].task);
2260 }
2261 kmem_free(kernel_map, (vm_offset_t)tts_mapptr, tts_mapsize);
2262
2263 *mapcount = akrt.count;
2264
2265 return mapptr;
2266 }
2267
2268 static void
2269 kdbg_clear(void)
2270 {
2271 /*
2272 * Clean up the trace buffer
2273 * First make sure we're not in
2274 * the middle of cutting a trace
2275 */
2276 kernel_debug_disable();
2277 kdbg_disable_typefilter();
2278
2279 /*
2280 * make sure the SLOW_NOLOG is seen
2281 * by everyone that might be trying
2282 * to cut a trace..
2283 */
2284 IOSleep(100);
2285
2286 /* reset kdebug state for each process */
2287 if (kd_ctrl_page.kdebug_flags & (KDBG_PIDCHECK | KDBG_PIDEXCLUDE)) {
2288 proc_list_lock();
2289 proc_t p;
2290 ALLPROC_FOREACH(p) {
2291 p->p_kdebug = 0;
2292 }
2293 proc_list_unlock();
2294 }
2295
2296 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2297 kd_ctrl_page.kdebug_flags &= ~(KDBG_NOWRAP | KDBG_RANGECHECK | KDBG_VALCHECK);
2298 kd_ctrl_page.kdebug_flags &= ~(KDBG_PIDCHECK | KDBG_PIDEXCLUDE);
2299
2300 kd_ctrl_page.oldest_time = 0;
2301
2302 delete_buffers();
2303 nkdbufs = 0;
2304
2305 /* Clean up the thread map buffer */
2306 kdbg_clear_thread_map();
2307
2308 RAW_file_offset = 0;
2309 RAW_file_written = 0;
2310 }
2311
2312 void
2313 kdebug_reset(void)
2314 {
2315 ktrace_assert_lock_held();
2316
2317 kdbg_lock_init();
2318
2319 kdbg_clear();
2320 if (kdbg_typefilter) {
2321 typefilter_reject_all(kdbg_typefilter);
2322 typefilter_allow_class(kdbg_typefilter, DBG_TRACE);
2323 }
2324 }
2325
2326 void
2327 kdebug_free_early_buf(void)
2328 {
2329 #if !CONFIG_EMBEDDED
2330 /* Must be done with the buffer, so release it back to the VM.
2331 * On embedded targets this buffer is freed when the BOOTDATA segment is freed. */
2332 ml_static_mfree((vm_offset_t)&kd_early_buffer, sizeof(kd_early_buffer));
2333 #endif
2334 }
2335
2336 int
2337 kdbg_setpid(kd_regtype *kdr)
2338 {
2339 pid_t pid;
2340 int flag, ret=0;
2341 struct proc *p;
2342
2343 pid = (pid_t)kdr->value1;
2344 flag = (int)kdr->value2;
2345
2346 if (pid >= 0) {
2347 if ((p = proc_find(pid)) == NULL)
2348 ret = ESRCH;
2349 else {
2350 if (flag == 1) {
2351 /*
2352 * turn on pid check for this and all pids
2353 */
2354 kd_ctrl_page.kdebug_flags |= KDBG_PIDCHECK;
2355 kd_ctrl_page.kdebug_flags &= ~KDBG_PIDEXCLUDE;
2356 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2357
2358 p->p_kdebug = 1;
2359 } else {
2360 /*
2361 * turn off pid check for this pid value
2362 * Don't turn off all pid checking though
2363 *
2364 * kd_ctrl_page.kdebug_flags &= ~KDBG_PIDCHECK;
2365 */
2366 p->p_kdebug = 0;
2367 }
2368 proc_rele(p);
2369 }
2370 }
2371 else
2372 ret = EINVAL;
2373
2374 return(ret);
2375 }
2376
2377 /* This is for pid exclusion in the trace buffer */
2378 int
2379 kdbg_setpidex(kd_regtype *kdr)
2380 {
2381 pid_t pid;
2382 int flag, ret=0;
2383 struct proc *p;
2384
2385 pid = (pid_t)kdr->value1;
2386 flag = (int)kdr->value2;
2387
2388 if (pid >= 0) {
2389 if ((p = proc_find(pid)) == NULL)
2390 ret = ESRCH;
2391 else {
2392 if (flag == 1) {
2393 /*
2394 * turn on pid exclusion
2395 */
2396 kd_ctrl_page.kdebug_flags |= KDBG_PIDEXCLUDE;
2397 kd_ctrl_page.kdebug_flags &= ~KDBG_PIDCHECK;
2398 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2399
2400 p->p_kdebug = 1;
2401 }
2402 else {
2403 /*
2404 * turn off pid exclusion for this pid value
2405 * Don't turn off all pid exclusion though
2406 *
2407 * kd_ctrl_page.kdebug_flags &= ~KDBG_PIDEXCLUDE;
2408 */
2409 p->p_kdebug = 0;
2410 }
2411 proc_rele(p);
2412 }
2413 } else
2414 ret = EINVAL;
2415
2416 return(ret);
2417 }
2418
2419 /*
2420 * The following functions all operate on the "global" typefilter singleton.
2421 */
2422
2423 /*
2424 * The tf param is optional, you may pass either a valid typefilter or NULL.
2425 * If you pass a valid typefilter, you release ownership of that typefilter.
2426 */
2427 static int
2428 kdbg_initialize_typefilter(typefilter_t tf)
2429 {
2430 ktrace_assert_lock_held();
2431 assert(!kdbg_typefilter);
2432 assert(!kdbg_typefilter_memory_entry);
2433 typefilter_t deallocate_tf = NULL;
2434
2435 if (!tf && ((tf = deallocate_tf = typefilter_create()) == NULL)) {
2436 return ENOMEM;
2437 }
2438
2439 if ((kdbg_typefilter_memory_entry = typefilter_create_memory_entry(tf)) == MACH_PORT_NULL) {
2440 if (deallocate_tf) {
2441 typefilter_deallocate(deallocate_tf);
2442 }
2443 return ENOMEM;
2444 }
2445
2446 /*
2447 * The atomic store closes a race window with
2448 * the kdebug_typefilter syscall, which assumes
2449 * that any non-null kdbg_typefilter means a
2450 * valid memory_entry is available.
2451 */
2452 __c11_atomic_store(((_Atomic typefilter_t*)&kdbg_typefilter), tf, memory_order_release);
2453
2454 return KERN_SUCCESS;
2455 }
2456
2457 static int
2458 kdbg_copyin_typefilter(user_addr_t addr, size_t size)
2459 {
2460 int ret = ENOMEM;
2461 typefilter_t tf;
2462
2463 ktrace_assert_lock_held();
2464
2465 if (size != KDBG_TYPEFILTER_BITMAP_SIZE) {
2466 return EINVAL;
2467 }
2468
2469 if ((tf = typefilter_create())) {
2470 if ((ret = copyin(addr, tf, KDBG_TYPEFILTER_BITMAP_SIZE)) == 0) {
2471 /* The kernel typefilter must always allow DBG_TRACE */
2472 typefilter_allow_class(tf, DBG_TRACE);
2473
2474 /*
2475 * If this is the first typefilter; claim it.
2476 * Otherwise copy and deallocate.
2477 *
2478 * Allocating a typefilter for the copyin allows
2479 * the kernel to hold the invariant that DBG_TRACE
2480 * must always be allowed.
2481 */
2482 if (!kdbg_typefilter) {
2483 if ((ret = kdbg_initialize_typefilter(tf))) {
2484 return ret;
2485 }
2486 tf = NULL;
2487 } else {
2488 typefilter_copy(kdbg_typefilter, tf);
2489 }
2490
2491 kdbg_enable_typefilter();
2492 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_TYPEFILTER_CHANGED, kdbg_typefilter);
2493 }
2494
2495 if (tf)
2496 typefilter_deallocate(tf);
2497 }
2498
2499 return ret;
2500 }
2501
2502 /*
2503 * Enable the flags in the control page for the typefilter. Assumes that
2504 * kdbg_typefilter has already been allocated, so events being written
2505 * don't see a bad typefilter.
2506 */
2507 static void
2508 kdbg_enable_typefilter(void)
2509 {
2510 assert(kdbg_typefilter);
2511 kd_ctrl_page.kdebug_flags &= ~(KDBG_RANGECHECK | KDBG_VALCHECK);
2512 kd_ctrl_page.kdebug_flags |= KDBG_TYPEFILTER_CHECK;
2513 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2514 commpage_update_kdebug_state();
2515 }
2516
2517 /*
2518 * Disable the flags in the control page for the typefilter. The typefilter
2519 * may be safely deallocated shortly after this function returns.
2520 */
2521 static void
2522 kdbg_disable_typefilter(void)
2523 {
2524 bool notify_iops = kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK;
2525 kd_ctrl_page.kdebug_flags &= ~KDBG_TYPEFILTER_CHECK;
2526
2527 if ((kd_ctrl_page.kdebug_flags & (KDBG_PIDCHECK | KDBG_PIDEXCLUDE))) {
2528 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2529 } else {
2530 kdbg_set_flags(SLOW_CHECKS, 0, FALSE);
2531 }
2532 commpage_update_kdebug_state();
2533
2534 if (notify_iops) {
2535 /*
2536 * Notify IOPs that the typefilter will now allow everything.
2537 * Otherwise, they won't know a typefilter is no longer in
2538 * effect.
2539 */
2540 typefilter_allow_all(kdbg_typefilter);
2541 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops,
2542 KD_CALLBACK_TYPEFILTER_CHANGED, kdbg_typefilter);
2543 }
2544 }
2545
2546 uint32_t
2547 kdebug_commpage_state(void)
2548 {
2549 if (kdebug_enable) {
2550 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
2551 return KDEBUG_COMMPAGE_ENABLE_TYPEFILTER | KDEBUG_COMMPAGE_ENABLE_TRACE;
2552 }
2553
2554 return KDEBUG_COMMPAGE_ENABLE_TRACE;
2555 }
2556
2557 return 0;
2558 }
2559
2560 int
2561 kdbg_setreg(kd_regtype * kdr)
2562 {
2563 int ret=0;
2564 unsigned int val_1, val_2, val;
2565 switch (kdr->type) {
2566
2567 case KDBG_CLASSTYPE :
2568 val_1 = (kdr->value1 & 0xff);
2569 val_2 = (kdr->value2 & 0xff);
2570 kdlog_beg = (val_1<<24);
2571 kdlog_end = (val_2<<24);
2572 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2573 kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK; /* Turn off specific value check */
2574 kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_CLASSTYPE);
2575 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2576 break;
2577 case KDBG_SUBCLSTYPE :
2578 val_1 = (kdr->value1 & 0xff);
2579 val_2 = (kdr->value2 & 0xff);
2580 val = val_2 + 1;
2581 kdlog_beg = ((val_1<<24) | (val_2 << 16));
2582 kdlog_end = ((val_1<<24) | (val << 16));
2583 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2584 kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK; /* Turn off specific value check */
2585 kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_SUBCLSTYPE);
2586 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2587 break;
2588 case KDBG_RANGETYPE :
2589 kdlog_beg = (kdr->value1);
2590 kdlog_end = (kdr->value2);
2591 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2592 kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK; /* Turn off specific value check */
2593 kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_RANGETYPE);
2594 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2595 break;
2596 case KDBG_VALCHECK:
2597 kdlog_value1 = (kdr->value1);
2598 kdlog_value2 = (kdr->value2);
2599 kdlog_value3 = (kdr->value3);
2600 kdlog_value4 = (kdr->value4);
2601 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2602 kd_ctrl_page.kdebug_flags &= ~KDBG_RANGECHECK; /* Turn off range check */
2603 kd_ctrl_page.kdebug_flags |= KDBG_VALCHECK; /* Turn on specific value check */
2604 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2605 break;
2606 case KDBG_TYPENONE :
2607 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2608
2609 if ( (kd_ctrl_page.kdebug_flags & (KDBG_RANGECHECK | KDBG_VALCHECK |
2610 KDBG_PIDCHECK | KDBG_PIDEXCLUDE |
2611 KDBG_TYPEFILTER_CHECK)) )
2612 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2613 else
2614 kdbg_set_flags(SLOW_CHECKS, 0, FALSE);
2615
2616 kdlog_beg = 0;
2617 kdlog_end = 0;
2618 break;
2619 default :
2620 ret = EINVAL;
2621 break;
2622 }
2623 return(ret);
2624 }
2625
2626 static int
2627 kdbg_write_to_vnode(caddr_t buffer, size_t size, vnode_t vp, vfs_context_t ctx, off_t file_offset)
2628 {
2629 return vn_rdwr(UIO_WRITE, vp, buffer, size, file_offset, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT,
2630 vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
2631 }
2632
2633 int
2634 kdbg_write_v3_chunk_header(user_addr_t buffer, uint32_t tag, uint32_t sub_tag, uint64_t length, vnode_t vp, vfs_context_t ctx)
2635 {
2636 int ret = KERN_SUCCESS;
2637 kd_chunk_header_v3 header = {
2638 .tag = tag,
2639 .sub_tag = sub_tag,
2640 .length = length,
2641 };
2642
2643 // Check that only one of them is valid
2644 assert(!buffer ^ !vp);
2645 assert((vp == NULL) || (ctx != NULL));
2646
2647 // Write the 8-byte future_chunk_timestamp field in the payload
2648 if (buffer || vp) {
2649 if (vp) {
2650 ret = kdbg_write_to_vnode((caddr_t)&header, sizeof(kd_chunk_header_v3), vp, ctx, RAW_file_offset);
2651 if (ret) {
2652 goto write_error;
2653 }
2654 RAW_file_offset += (sizeof(kd_chunk_header_v3));
2655 }
2656 else {
2657 ret = copyout(&header, buffer, sizeof(kd_chunk_header_v3));
2658 if (ret) {
2659 goto write_error;
2660 }
2661 }
2662 }
2663 write_error:
2664 return ret;
2665 }
2666
2667 int
2668 kdbg_write_v3_chunk_header_to_buffer(void * buffer, uint32_t tag, uint32_t sub_tag, uint64_t length)
2669 {
2670 kd_chunk_header_v3 header = {
2671 .tag = tag,
2672 .sub_tag = sub_tag,
2673 .length = length,
2674 };
2675
2676 if (!buffer) {
2677 return 0;
2678 }
2679
2680 memcpy(buffer, &header, sizeof(kd_chunk_header_v3));
2681
2682 return (sizeof(kd_chunk_header_v3));
2683 }
2684
2685 int
2686 kdbg_write_v3_chunk_to_fd(uint32_t tag, uint32_t sub_tag, uint64_t length, void *payload, uint64_t payload_size, int fd)
2687 {
2688 proc_t p;
2689 struct vfs_context context;
2690 struct fileproc *fp;
2691 vnode_t vp;
2692 p = current_proc();
2693
2694 proc_fdlock(p);
2695 if ( (fp_lookup(p, fd, &fp, 1)) ) {
2696 proc_fdunlock(p);
2697 return EFAULT;
2698 }
2699
2700 context.vc_thread = current_thread();
2701 context.vc_ucred = fp->f_fglob->fg_cred;
2702
2703 if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
2704 fp_drop(p, fd, fp, 1);
2705 proc_fdunlock(p);
2706 return EBADF;
2707 }
2708 vp = (struct vnode *) fp->f_fglob->fg_data;
2709 proc_fdunlock(p);
2710
2711 if ( (vnode_getwithref(vp)) == 0 ) {
2712 RAW_file_offset = fp->f_fglob->fg_offset;
2713
2714 kd_chunk_header_v3 chunk_header = {
2715 .tag = tag,
2716 .sub_tag = sub_tag,
2717 .length = length,
2718 };
2719
2720 int ret = kdbg_write_to_vnode((caddr_t) &chunk_header, sizeof(kd_chunk_header_v3), vp, &context, RAW_file_offset);
2721 if (!ret) {
2722 RAW_file_offset += sizeof(kd_chunk_header_v3);
2723 }
2724
2725 ret = kdbg_write_to_vnode((caddr_t) payload, (size_t) payload_size, vp, &context, RAW_file_offset);
2726 if (!ret) {
2727 RAW_file_offset += payload_size;
2728 }
2729
2730 fp->f_fglob->fg_offset = RAW_file_offset;
2731 vnode_put(vp);
2732 }
2733
2734 fp_drop(p, fd, fp, 0);
2735 return KERN_SUCCESS;
2736 }
2737
2738 user_addr_t
2739 kdbg_write_v3_event_chunk_header(user_addr_t buffer, uint32_t tag, uint64_t length, vnode_t vp, vfs_context_t ctx)
2740 {
2741 uint64_t future_chunk_timestamp = 0;
2742 length += sizeof(uint64_t);
2743
2744 if (kdbg_write_v3_chunk_header(buffer, tag, V3_EVENT_DATA_VERSION, length, vp, ctx)) {
2745 return 0;
2746 }
2747 if (buffer) {
2748 buffer += sizeof(kd_chunk_header_v3);
2749 }
2750
2751 // Check that only one of them is valid
2752 assert(!buffer ^ !vp);
2753 assert((vp == NULL) || (ctx != NULL));
2754
2755 // Write the 8-byte future_chunk_timestamp field in the payload
2756 if (buffer || vp) {
2757 if (vp) {
2758 int ret = kdbg_write_to_vnode((caddr_t)&future_chunk_timestamp, sizeof(uint64_t), vp, ctx, RAW_file_offset);
2759 if (!ret) {
2760 RAW_file_offset += (sizeof(uint64_t));
2761 }
2762 }
2763 else {
2764 if (copyout(&future_chunk_timestamp, buffer, sizeof(uint64_t))) {
2765 return 0;
2766 }
2767 }
2768 }
2769
2770 return (buffer + sizeof(uint64_t));
2771 }
2772
2773 int
2774 kdbg_write_v3_header(user_addr_t user_header, size_t *user_header_size, int fd)
2775 {
2776 int ret = KERN_SUCCESS;
2777
2778 uint8_t* cpumap = 0;
2779 uint32_t cpumap_size = 0;
2780 uint32_t thrmap_size = 0;
2781
2782 size_t bytes_needed = 0;
2783
2784 // Check that only one of them is valid
2785 assert(!user_header ^ !fd);
2786 assert(user_header_size);
2787
2788 if ( !(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) ) {
2789 ret = EINVAL;
2790 goto bail;
2791 }
2792
2793 if ( !(user_header || fd) ) {
2794 ret = EINVAL;
2795 goto bail;
2796 }
2797
2798 // Initialize the cpu map
2799 ret = kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, &cpumap, &cpumap_size);
2800 if (ret != KERN_SUCCESS) {
2801 goto bail;
2802 }
2803
2804 // Check if a thread map is initialized
2805 if ( !kd_mapptr ) {
2806 ret = EINVAL;
2807 goto bail;
2808 }
2809 thrmap_size = kd_mapcount * sizeof(kd_threadmap);
2810
2811 mach_timebase_info_data_t timebase = {0, 0};
2812 clock_timebase_info(&timebase);
2813
2814 // Setup the header.
2815 // See v3 header description in sys/kdebug.h for more inforamtion.
2816 kd_header_v3 header = {
2817 .tag = RAW_VERSION3,
2818 .sub_tag = V3_HEADER_VERSION,
2819 .length = (sizeof(kd_header_v3) + cpumap_size - sizeof(kd_cpumap_header)),
2820 .timebase_numer = timebase.numer,
2821 .timebase_denom = timebase.denom,
2822 .timestamp = 0, /* FIXME rdar://problem/22053009 */
2823 .walltime_secs = 0,
2824 .walltime_usecs = 0,
2825 .timezone_minuteswest = 0,
2826 .timezone_dst = 0,
2827 #if defined(__LP64__)
2828 .flags = 1,
2829 #else
2830 .flags = 0,
2831 #endif
2832 };
2833
2834 // If its a buffer, check if we have enough space to copy the header and the maps.
2835 if (user_header) {
2836 bytes_needed = header.length + thrmap_size + (2 * sizeof(kd_chunk_header_v3));
2837 if (*user_header_size < bytes_needed) {
2838 ret = EINVAL;
2839 goto bail;
2840 }
2841 }
2842
2843 // Start writing the header
2844 if (fd) {
2845 void *hdr_ptr = (void *)(((uintptr_t) &header) + sizeof(kd_chunk_header_v3));
2846 size_t payload_size = (sizeof(kd_header_v3) - sizeof(kd_chunk_header_v3));
2847
2848 ret = kdbg_write_v3_chunk_to_fd(RAW_VERSION3, V3_HEADER_VERSION, header.length, hdr_ptr, payload_size, fd);
2849 if (ret) {
2850 goto bail;
2851 }
2852 }
2853 else {
2854 if (copyout(&header, user_header, sizeof(kd_header_v3))) {
2855 ret = EFAULT;
2856 goto bail;
2857 }
2858 // Update the user pointer
2859 user_header += sizeof(kd_header_v3);
2860 }
2861
2862 // Write a cpu map. This is a sub chunk of the header
2863 cpumap = (uint8_t*)((uintptr_t) cpumap + sizeof(kd_cpumap_header));
2864 size_t payload_size = (size_t)(cpumap_size - sizeof(kd_cpumap_header));
2865 if (fd) {
2866 ret = kdbg_write_v3_chunk_to_fd(V3_CPU_MAP, V3_CPUMAP_VERSION, payload_size, (void *)cpumap, payload_size, fd);
2867 if (ret) {
2868 goto bail;
2869 }
2870 }
2871 else {
2872 ret = kdbg_write_v3_chunk_header(user_header, V3_CPU_MAP, V3_CPUMAP_VERSION, payload_size, NULL, NULL);
2873 if (ret) {
2874 goto bail;
2875 }
2876 user_header += sizeof(kd_chunk_header_v3);
2877 if (copyout(cpumap, user_header, payload_size)) {
2878 ret = EFAULT;
2879 goto bail;
2880 }
2881 // Update the user pointer
2882 user_header += payload_size;
2883 }
2884
2885 // Write a thread map
2886 if (fd) {
2887 ret = kdbg_write_v3_chunk_to_fd(V3_THREAD_MAP, V3_THRMAP_VERSION, thrmap_size, (void *)kd_mapptr, thrmap_size, fd);
2888 if (ret) {
2889 goto bail;
2890 }
2891 }
2892 else {
2893 ret = kdbg_write_v3_chunk_header(user_header, V3_THREAD_MAP, V3_THRMAP_VERSION, thrmap_size, NULL, NULL);
2894 if (ret) {
2895 goto bail;
2896 }
2897 user_header += sizeof(kd_chunk_header_v3);
2898 if (copyout(kd_mapptr, user_header, thrmap_size)) {
2899 ret = EFAULT;
2900 goto bail;
2901 }
2902 user_header += thrmap_size;
2903 }
2904
2905 if (fd) {
2906 RAW_file_written += bytes_needed;
2907 }
2908
2909 *user_header_size = bytes_needed;
2910 bail:
2911 if (cpumap) {
2912 kmem_free(kernel_map, (vm_offset_t)cpumap, cpumap_size);
2913 }
2914 return (ret);
2915 }
2916
2917 int
2918 kdbg_readcpumap(user_addr_t user_cpumap, size_t *user_cpumap_size)
2919 {
2920 uint8_t* cpumap = NULL;
2921 uint32_t cpumap_size = 0;
2922 int ret = KERN_SUCCESS;
2923
2924 if (kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) {
2925 if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, &cpumap, &cpumap_size) == KERN_SUCCESS) {
2926 if (user_cpumap) {
2927 size_t bytes_to_copy = (*user_cpumap_size >= cpumap_size) ? cpumap_size : *user_cpumap_size;
2928 if (copyout(cpumap, user_cpumap, (size_t)bytes_to_copy)) {
2929 ret = EFAULT;
2930 }
2931 }
2932 *user_cpumap_size = cpumap_size;
2933 kmem_free(kernel_map, (vm_offset_t)cpumap, cpumap_size);
2934 } else
2935 ret = EINVAL;
2936 } else
2937 ret = EINVAL;
2938
2939 return (ret);
2940 }
2941
2942 int
2943 kdbg_readcurthrmap(user_addr_t buffer, size_t *bufsize)
2944 {
2945 kd_threadmap *mapptr;
2946 unsigned int mapsize;
2947 unsigned int mapcount;
2948 unsigned int count = 0;
2949 int ret = 0;
2950
2951 count = *bufsize/sizeof(kd_threadmap);
2952 *bufsize = 0;
2953
2954 if ( (mapptr = kdbg_thrmap_init_internal(count, &mapsize, &mapcount)) ) {
2955 if (copyout(mapptr, buffer, mapcount * sizeof(kd_threadmap)))
2956 ret = EFAULT;
2957 else
2958 *bufsize = (mapcount * sizeof(kd_threadmap));
2959
2960 kmem_free(kernel_map, (vm_offset_t)mapptr, mapsize);
2961 } else
2962 ret = EINVAL;
2963
2964 return (ret);
2965 }
2966
2967 static int
2968 kdbg_write_v1_header(boolean_t write_thread_map, vnode_t vp, vfs_context_t ctx)
2969 {
2970 int ret = 0;
2971 RAW_header header;
2972 clock_sec_t secs;
2973 clock_usec_t usecs;
2974 char *pad_buf;
2975 uint32_t pad_size;
2976 uint32_t extra_thread_count = 0;
2977 uint32_t cpumap_size;
2978 size_t map_size = 0;
2979 size_t map_count = 0;
2980
2981 if (write_thread_map) {
2982 assert(kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
2983 map_count = kd_mapcount;
2984 map_size = map_count * sizeof(kd_threadmap);
2985 }
2986
2987 /*
2988 * Without the buffers initialized, we cannot construct a CPU map or a
2989 * thread map, and cannot write a header.
2990 */
2991 if (!(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT)) {
2992 return EINVAL;
2993 }
2994
2995 /*
2996 * To write a RAW_VERSION1+ file, we must embed a cpumap in the
2997 * "padding" used to page align the events following the threadmap. If
2998 * the threadmap happens to not require enough padding, we artificially
2999 * increase its footprint until it needs enough padding.
3000 */
3001
3002 assert(vp);
3003 assert(ctx);
3004
3005 pad_size = PAGE_16KB - ((sizeof(RAW_header) + map_size) & PAGE_MASK_64);
3006 cpumap_size = sizeof(kd_cpumap_header) + kd_ctrl_page.kdebug_cpus * sizeof(kd_cpumap);
3007
3008 if (cpumap_size > pad_size) {
3009 /* If the cpu map doesn't fit in the current available pad_size,
3010 * we increase the pad_size by 16K. We do this so that the event
3011 * data is always available on a page aligned boundary for both
3012 * 4k and 16k systems. We enforce this alignment for the event
3013 * data so that we can take advantage of optimized file/disk writes.
3014 */
3015 pad_size += PAGE_16KB;
3016 }
3017
3018 /* The way we are silently embedding a cpumap in the "padding" is by artificially
3019 * increasing the number of thread entries. However, we'll also need to ensure that
3020 * the cpumap is embedded in the last 4K page before when the event data is expected.
3021 * This way the tools can read the data starting the next page boundary on both
3022 * 4K and 16K systems preserving compatibility with older versions of the tools
3023 */
3024 if (pad_size > PAGE_4KB) {
3025 pad_size -= PAGE_4KB;
3026 extra_thread_count = (pad_size / sizeof(kd_threadmap)) + 1;
3027 }
3028
3029 memset(&header, 0, sizeof(header));
3030 header.version_no = RAW_VERSION1;
3031 header.thread_count = map_count + extra_thread_count;
3032
3033 clock_get_calendar_microtime(&secs, &usecs);
3034 header.TOD_secs = secs;
3035 header.TOD_usecs = usecs;
3036
3037 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)&header, sizeof(RAW_header), RAW_file_offset,
3038 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
3039 if (ret) {
3040 goto write_error;
3041 }
3042 RAW_file_offset += sizeof(RAW_header);
3043 RAW_file_written += sizeof(RAW_header);
3044
3045 if (write_thread_map) {
3046 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)kd_mapptr, map_size, RAW_file_offset,
3047 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
3048 if (ret) {
3049 goto write_error;
3050 }
3051
3052 RAW_file_offset += map_size;
3053 RAW_file_written += map_size;
3054 }
3055
3056 if (extra_thread_count) {
3057 pad_size = extra_thread_count * sizeof(kd_threadmap);
3058 pad_buf = kalloc(pad_size);
3059 if (!pad_buf) {
3060 ret = ENOMEM;
3061 goto write_error;
3062 }
3063 memset(pad_buf, 0, pad_size);
3064
3065 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset,
3066 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
3067 kfree(pad_buf, pad_size);
3068 if (ret) {
3069 goto write_error;
3070 }
3071
3072 RAW_file_offset += pad_size;
3073 RAW_file_written += pad_size;
3074 }
3075
3076 pad_size = PAGE_SIZE - (RAW_file_offset & PAGE_MASK_64);
3077 if (pad_size) {
3078 pad_buf = (char *)kalloc(pad_size);
3079 if (!pad_buf) {
3080 ret = ENOMEM;
3081 goto write_error;
3082 }
3083 memset(pad_buf, 0, pad_size);
3084
3085 /*
3086 * embed a cpumap in the padding bytes.
3087 * older code will skip this.
3088 * newer code will know how to read it.
3089 */
3090 uint32_t temp = pad_size;
3091 if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, (uint8_t**)&pad_buf, &temp) != KERN_SUCCESS) {
3092 memset(pad_buf, 0, pad_size);
3093 }
3094
3095 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset,
3096 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
3097 kfree(pad_buf, pad_size);
3098 if (ret) {
3099 goto write_error;
3100 }
3101
3102 RAW_file_offset += pad_size;
3103 RAW_file_written += pad_size;
3104 }
3105
3106 write_error:
3107 return ret;
3108 }
3109
3110 static void
3111 kdbg_clear_thread_map(void)
3112 {
3113 ktrace_assert_lock_held();
3114
3115 if (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) {
3116 assert(kd_mapptr != NULL);
3117 kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize);
3118 kd_mapptr = NULL;
3119 kd_mapsize = 0;
3120 kd_mapcount = 0;
3121 kd_ctrl_page.kdebug_flags &= ~KDBG_MAPINIT;
3122 }
3123 }
3124
3125 /*
3126 * Write out a version 1 header and the thread map, if it is initialized, to a
3127 * vnode. Used by KDWRITEMAP and kdbg_dump_trace_to_file.
3128 *
3129 * Returns write errors from vn_rdwr if a write fails. Returns ENODATA if the
3130 * thread map has not been initialized, but the header will still be written.
3131 * Returns ENOMEM if padding could not be allocated. Returns 0 otherwise.
3132 */
3133 static int
3134 kdbg_write_thread_map(vnode_t vp, vfs_context_t ctx)
3135 {
3136 int ret = 0;
3137 boolean_t map_initialized;
3138
3139 ktrace_assert_lock_held();
3140 assert(ctx != NULL);
3141
3142 map_initialized = (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
3143
3144 ret = kdbg_write_v1_header(map_initialized, vp, ctx);
3145 if (ret == 0) {
3146 if (map_initialized) {
3147 kdbg_clear_thread_map();
3148 } else {
3149 ret = ENODATA;
3150 }
3151 }
3152
3153 return ret;
3154 }
3155
3156 /*
3157 * Copy out the thread map to a user space buffer. Used by KDTHRMAP.
3158 *
3159 * Returns copyout errors if the copyout fails. Returns ENODATA if the thread
3160 * map has not been initialized. Returns EINVAL if the buffer provided is not
3161 * large enough for the entire thread map. Returns 0 otherwise.
3162 */
3163 static int
3164 kdbg_copyout_thread_map(user_addr_t buffer, size_t *buffer_size)
3165 {
3166 boolean_t map_initialized;
3167 size_t map_size;
3168 int ret = 0;
3169
3170 ktrace_assert_lock_held();
3171 assert(buffer_size != NULL);
3172
3173 map_initialized = (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
3174 if (!map_initialized) {
3175 return ENODATA;
3176 }
3177
3178 map_size = kd_mapcount * sizeof(kd_threadmap);
3179 if (*buffer_size < map_size) {
3180 return EINVAL;
3181 }
3182
3183 ret = copyout(kd_mapptr, buffer, map_size);
3184 if (ret == 0) {
3185 kdbg_clear_thread_map();
3186 }
3187
3188 return ret;
3189 }
3190
3191 int
3192 kdbg_readthrmap_v3(user_addr_t buffer, size_t buffer_size, int fd)
3193 {
3194 int ret = 0;
3195 boolean_t map_initialized;
3196 size_t map_size;
3197
3198 ktrace_assert_lock_held();
3199
3200 if ((!fd && !buffer) || (fd && buffer)) {
3201 return EINVAL;
3202 }
3203
3204 map_initialized = (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
3205 map_size = kd_mapcount * sizeof(kd_threadmap);
3206
3207 if (map_initialized && (buffer_size >= map_size))
3208 {
3209 ret = kdbg_write_v3_header(buffer, &buffer_size, fd);
3210
3211 if (ret == 0) {
3212 kdbg_clear_thread_map();
3213 }
3214 } else {
3215 ret = EINVAL;
3216 }
3217
3218 return ret;
3219 }
3220
3221 static void
3222 kdbg_set_nkdbufs(unsigned int value)
3223 {
3224 /*
3225 * We allow a maximum buffer size of 50% of either ram or max mapped
3226 * address, whichever is smaller 'value' is the desired number of trace
3227 * entries
3228 */
3229 unsigned int max_entries = (sane_size / 2) / sizeof(kd_buf);
3230
3231 if (value <= max_entries) {
3232 nkdbufs = value;
3233 } else {
3234 nkdbufs = max_entries;
3235 }
3236 }
3237
3238 /*
3239 * Block until there are `n_storage_threshold` storage units filled with
3240 * events or `timeout_ms` milliseconds have passed. If `locked_wait` is true,
3241 * `ktrace_lock` is held while waiting. This is necessary while waiting to
3242 * write events out of the buffers.
3243 *
3244 * Returns true if the threshold was reached and false otherwise.
3245 *
3246 * Called with `ktrace_lock` locked and interrupts enabled.
3247 */
3248 static boolean_t
3249 kdbg_wait(uint64_t timeout_ms, boolean_t locked_wait)
3250 {
3251 int wait_result = THREAD_AWAKENED;
3252 uint64_t abstime = 0;
3253
3254 ktrace_assert_lock_held();
3255
3256 if (timeout_ms != 0) {
3257 uint64_t ns = timeout_ms * NSEC_PER_MSEC;
3258 nanoseconds_to_absolutetime(ns, &abstime);
3259 clock_absolutetime_interval_to_deadline(abstime, &abstime);
3260 }
3261
3262 boolean_t s = ml_set_interrupts_enabled(FALSE);
3263 if (!s) {
3264 panic("kdbg_wait() called with interrupts disabled");
3265 }
3266 lck_spin_lock(kdw_spin_lock);
3267
3268 if (!locked_wait) {
3269 /* drop the mutex to allow others to access trace */
3270 ktrace_unlock();
3271 }
3272
3273 while (wait_result == THREAD_AWAKENED &&
3274 kd_ctrl_page.kds_inuse_count < n_storage_threshold)
3275 {
3276 kds_waiter = 1;
3277
3278 if (abstime) {
3279 wait_result = lck_spin_sleep_deadline(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE, abstime);
3280 } else {
3281 wait_result = lck_spin_sleep(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE);
3282 }
3283
3284 kds_waiter = 0;
3285 }
3286
3287 /* check the count under the spinlock */
3288 boolean_t threshold_exceeded = (kd_ctrl_page.kds_inuse_count >= n_storage_threshold);
3289
3290 lck_spin_unlock(kdw_spin_lock);
3291 ml_set_interrupts_enabled(s);
3292
3293 if (!locked_wait) {
3294 /* pick the mutex back up again */
3295 ktrace_lock();
3296 }
3297
3298 /* write out whether we've exceeded the threshold */
3299 return threshold_exceeded;
3300 }
3301
3302 /*
3303 * Wakeup a thread waiting using `kdbg_wait` if there are at least
3304 * `n_storage_threshold` storage units in use.
3305 */
3306 static void
3307 kdbg_wakeup(void)
3308 {
3309 boolean_t need_kds_wakeup = FALSE;
3310
3311 /*
3312 * Try to take the lock here to synchronize with the waiter entering
3313 * the blocked state. Use the try mode to prevent deadlocks caused by
3314 * re-entering this routine due to various trace points triggered in the
3315 * lck_spin_sleep_xxxx routines used to actually enter one of our 2 wait
3316 * conditions. No problem if we fail, there will be lots of additional
3317 * events coming in that will eventually succeed in grabbing this lock.
3318 */
3319 boolean_t s = ml_set_interrupts_enabled(FALSE);
3320
3321 if (lck_spin_try_lock(kdw_spin_lock)) {
3322 if (kds_waiter &&
3323 (kd_ctrl_page.kds_inuse_count >= n_storage_threshold))
3324 {
3325 kds_waiter = 0;
3326 need_kds_wakeup = TRUE;
3327 }
3328 lck_spin_unlock(kdw_spin_lock);
3329 }
3330
3331 ml_set_interrupts_enabled(s);
3332
3333 if (need_kds_wakeup == TRUE) {
3334 wakeup(&kds_waiter);
3335 }
3336 }
3337
3338 int
3339 kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
3340 {
3341 int ret = 0;
3342 size_t size = *sizep;
3343 unsigned int value = 0;
3344 kd_regtype kd_Reg;
3345 kbufinfo_t kd_bufinfo;
3346 proc_t p;
3347
3348 if (name[0] == KERN_KDWRITETR ||
3349 name[0] == KERN_KDWRITETR_V3 ||
3350 name[0] == KERN_KDWRITEMAP ||
3351 name[0] == KERN_KDWRITEMAP_V3 ||
3352 name[0] == KERN_KDEFLAGS ||
3353 name[0] == KERN_KDDFLAGS ||
3354 name[0] == KERN_KDENABLE ||
3355 name[0] == KERN_KDSETBUF)
3356 {
3357 if (namelen < 2) {
3358 return EINVAL;
3359 }
3360 value = name[1];
3361 }
3362
3363 kdbg_lock_init();
3364 assert(kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT);
3365
3366 ktrace_lock();
3367
3368 /*
3369 * Some requests only require "read" access to kdebug trace. Regardless,
3370 * tell ktrace that a configuration or read is occurring (and see if it's
3371 * allowed).
3372 */
3373 if (name[0] != KERN_KDGETBUF &&
3374 name[0] != KERN_KDGETREG &&
3375 name[0] != KERN_KDREADCURTHRMAP)
3376 {
3377 if ((ret = ktrace_configure(KTRACE_KDEBUG))) {
3378 goto out;
3379 }
3380 } else {
3381 if ((ret = ktrace_read_check())) {
3382 goto out;
3383 }
3384 }
3385
3386 switch(name[0]) {
3387 case KERN_KDGETBUF:
3388 if (size < sizeof(kd_bufinfo.nkdbufs)) {
3389 /*
3390 * There is not enough room to return even
3391 * the first element of the info structure.
3392 */
3393 ret = EINVAL;
3394 break;
3395 }
3396
3397 memset(&kd_bufinfo, 0, sizeof(kd_bufinfo));
3398
3399 kd_bufinfo.nkdbufs = nkdbufs;
3400 kd_bufinfo.nkdthreads = kd_mapcount;
3401
3402 if ( (kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) )
3403 kd_bufinfo.nolog = 1;
3404 else
3405 kd_bufinfo.nolog = 0;
3406
3407 kd_bufinfo.flags = kd_ctrl_page.kdebug_flags;
3408 #if defined(__LP64__)
3409 kd_bufinfo.flags |= KDBG_LP64;
3410 #endif
3411 {
3412 int pid = ktrace_get_owning_pid();
3413 kd_bufinfo.bufid = (pid == 0 ? -1 : pid);
3414 }
3415
3416 if (size >= sizeof(kd_bufinfo)) {
3417 /*
3418 * Provide all the info we have
3419 */
3420 if (copyout(&kd_bufinfo, where, sizeof(kd_bufinfo)))
3421 ret = EINVAL;
3422 } else {
3423 /*
3424 * For backwards compatibility, only provide
3425 * as much info as there is room for.
3426 */
3427 if (copyout(&kd_bufinfo, where, size))
3428 ret = EINVAL;
3429 }
3430 break;
3431
3432 case KERN_KDREADCURTHRMAP:
3433 ret = kdbg_readcurthrmap(where, sizep);
3434 break;
3435
3436 case KERN_KDEFLAGS:
3437 value &= KDBG_USERFLAGS;
3438 kd_ctrl_page.kdebug_flags |= value;
3439 break;
3440
3441 case KERN_KDDFLAGS:
3442 value &= KDBG_USERFLAGS;
3443 kd_ctrl_page.kdebug_flags &= ~value;
3444 break;
3445
3446 case KERN_KDENABLE:
3447 /*
3448 * Enable tracing mechanism. Two types:
3449 * KDEBUG_TRACE is the standard one,
3450 * and KDEBUG_PPT which is a carefully
3451 * chosen subset to avoid performance impact.
3452 */
3453 if (value) {
3454 /*
3455 * enable only if buffer is initialized
3456 */
3457 if (!(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) ||
3458 !(value == KDEBUG_ENABLE_TRACE || value == KDEBUG_ENABLE_PPT)) {
3459 ret = EINVAL;
3460 break;
3461 }
3462 kdbg_thrmap_init();
3463
3464 kdbg_set_tracing_enabled(TRUE, value);
3465 }
3466 else
3467 {
3468 if (!kdebug_enable) {
3469 break;
3470 }
3471
3472 kernel_debug_disable();
3473 }
3474 break;
3475
3476 case KERN_KDSETBUF:
3477 kdbg_set_nkdbufs(value);
3478 break;
3479
3480 case KERN_KDSETUP:
3481 ret = kdbg_reinit(FALSE);
3482 break;
3483
3484 case KERN_KDREMOVE:
3485 ktrace_reset(KTRACE_KDEBUG);
3486 break;
3487
3488 case KERN_KDSETREG:
3489 if(size < sizeof(kd_regtype)) {
3490 ret = EINVAL;
3491 break;
3492 }
3493 if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
3494 ret = EINVAL;
3495 break;
3496 }
3497
3498 ret = kdbg_setreg(&kd_Reg);
3499 break;
3500
3501 case KERN_KDGETREG:
3502 ret = EINVAL;
3503 break;
3504
3505 case KERN_KDREADTR:
3506 ret = kdbg_read(where, sizep, NULL, NULL, RAW_VERSION1);
3507 break;
3508
3509 case KERN_KDWRITETR:
3510 case KERN_KDWRITETR_V3:
3511 case KERN_KDWRITEMAP:
3512 case KERN_KDWRITEMAP_V3:
3513 {
3514 struct vfs_context context;
3515 struct fileproc *fp;
3516 size_t number;
3517 vnode_t vp;
3518 int fd;
3519
3520 if (name[0] == KERN_KDWRITETR || name[0] == KERN_KDWRITETR_V3) {
3521 (void)kdbg_wait(size, TRUE);
3522 }
3523 p = current_proc();
3524 fd = value;
3525
3526 proc_fdlock(p);
3527 if ( (ret = fp_lookup(p, fd, &fp, 1)) ) {
3528 proc_fdunlock(p);
3529 break;
3530 }
3531 context.vc_thread = current_thread();
3532 context.vc_ucred = fp->f_fglob->fg_cred;
3533
3534 if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
3535 fp_drop(p, fd, fp, 1);
3536 proc_fdunlock(p);
3537
3538 ret = EBADF;
3539 break;
3540 }
3541 vp = (struct vnode *)fp->f_fglob->fg_data;
3542 proc_fdunlock(p);
3543
3544 if ((ret = vnode_getwithref(vp)) == 0) {
3545 RAW_file_offset = fp->f_fglob->fg_offset;
3546 if (name[0] == KERN_KDWRITETR || name[0] == KERN_KDWRITETR_V3) {
3547 number = nkdbufs * sizeof(kd_buf);
3548
3549 KDBG_RELEASE(TRACE_WRITING_EVENTS | DBG_FUNC_START);
3550 if (name[0] == KERN_KDWRITETR_V3)
3551 ret = kdbg_read(0, &number, vp, &context, RAW_VERSION3);
3552 else
3553 ret = kdbg_read(0, &number, vp, &context, RAW_VERSION1);
3554 KDBG_RELEASE(TRACE_WRITING_EVENTS | DBG_FUNC_END, number);
3555
3556 *sizep = number;
3557 } else {
3558 number = kd_mapcount * sizeof(kd_threadmap);
3559 if (name[0] == KERN_KDWRITEMAP_V3) {
3560 ret = kdbg_readthrmap_v3(0, number, fd);
3561 } else {
3562 ret = kdbg_write_thread_map(vp, &context);
3563 }
3564 }
3565 fp->f_fglob->fg_offset = RAW_file_offset;
3566 vnode_put(vp);
3567 }
3568 fp_drop(p, fd, fp, 0);
3569
3570 break;
3571 }
3572 case KERN_KDBUFWAIT:
3573 *sizep = kdbg_wait(size, FALSE);
3574 break;
3575
3576 case KERN_KDPIDTR:
3577 if (size < sizeof(kd_regtype)) {
3578 ret = EINVAL;
3579 break;
3580 }
3581 if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
3582 ret = EINVAL;
3583 break;
3584 }
3585
3586 ret = kdbg_setpid(&kd_Reg);
3587 break;
3588
3589 case KERN_KDPIDEX:
3590 if (size < sizeof(kd_regtype)) {
3591 ret = EINVAL;
3592 break;
3593 }
3594 if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
3595 ret = EINVAL;
3596 break;
3597 }
3598
3599 ret = kdbg_setpidex(&kd_Reg);
3600 break;
3601
3602 case KERN_KDCPUMAP:
3603 ret = kdbg_readcpumap(where, sizep);
3604 break;
3605
3606 case KERN_KDTHRMAP:
3607 ret = kdbg_copyout_thread_map(where, sizep);
3608 break;
3609
3610 case KERN_KDSET_TYPEFILTER: {
3611 ret = kdbg_copyin_typefilter(where, size);
3612 break;
3613 }
3614
3615 case KERN_KDTEST:
3616 ret = kdbg_test(size);
3617 break;
3618
3619 default:
3620 ret = EINVAL;
3621 break;
3622 }
3623 out:
3624 ktrace_unlock();
3625
3626 return ret;
3627 }
3628
3629
3630 /*
3631 * This code can run for the most part concurrently with kernel_debug_internal()...
3632 * 'release_storage_unit' will take the kds_spin_lock which may cause us to briefly
3633 * synchronize with the recording side of this puzzle... otherwise, we are able to
3634 * move through the lists w/o use of any locks
3635 */
3636 int
3637 kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx, uint32_t file_version)
3638 {
3639 unsigned int count;
3640 unsigned int cpu, min_cpu;
3641 uint64_t barrier_min = 0, barrier_max = 0, t, earliest_time;
3642 int error = 0;
3643 kd_buf *tempbuf;
3644 uint32_t rcursor;
3645 kd_buf lostevent;
3646 union kds_ptr kdsp;
3647 bool traced_retrograde = false;
3648 struct kd_storage *kdsp_actual;
3649 struct kd_bufinfo *kdbp;
3650 struct kd_bufinfo *min_kdbp;
3651 uint32_t tempbuf_count;
3652 uint32_t tempbuf_number;
3653 uint32_t old_kdebug_flags;
3654 uint32_t old_kdebug_slowcheck;
3655 boolean_t out_of_events = FALSE;
3656 boolean_t wrapped = FALSE;
3657
3658 assert(number);
3659 count = *number/sizeof(kd_buf);
3660 *number = 0;
3661
3662 ktrace_assert_lock_held();
3663
3664 if (count == 0 || !(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) || kdcopybuf == 0)
3665 return EINVAL;
3666
3667 thread_set_eager_preempt(current_thread());
3668
3669 memset(&lostevent, 0, sizeof(lostevent));
3670 lostevent.debugid = TRACE_LOST_EVENTS;
3671
3672 /*
3673 * Capture the current time. Only sort events that have occured
3674 * before now. Since the IOPs are being flushed here, it is possible
3675 * that events occur on the AP while running live tracing. If we are
3676 * disabled, no new events should occur on the AP.
3677 */
3678 if (kd_ctrl_page.enabled) {
3679 barrier_max = kdbg_timestamp() & KDBG_TIMESTAMP_MASK;
3680 }
3681
3682 /*
3683 * Request each IOP to provide us with up to date entries before merging
3684 * buffers together.
3685 */
3686 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_SYNC_FLUSH, NULL);
3687
3688 /*
3689 * Disable wrap so storage units cannot be stolen out from underneath us
3690 * while merging events.
3691 *
3692 * Because we hold ktrace_lock, no other control threads can be playing
3693 * with kdebug_flags. The code that emits new events could be running,
3694 * but it grabs kds_spin_lock if it needs to acquire a new storage
3695 * chunk, which is where it examines kdebug_flags. If it is adding to
3696 * the same chunk we're reading from, check for that below.
3697 */
3698 wrapped = disable_wrap(&old_kdebug_slowcheck, &old_kdebug_flags);
3699
3700 if (count > nkdbufs)
3701 count = nkdbufs;
3702
3703 if ((tempbuf_count = count) > KDCOPYBUF_COUNT) {
3704 tempbuf_count = KDCOPYBUF_COUNT;
3705 }
3706
3707 /*
3708 * If the buffers have wrapped, do not emit additional lost events for the
3709 * oldest storage units.
3710 */
3711 if (wrapped) {
3712 kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
3713
3714 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page.kdebug_cpus; cpu++, kdbp++) {
3715 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
3716 continue;
3717 }
3718 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3719 kdsp_actual->kds_lostevents = FALSE;
3720 }
3721 }
3722 /*
3723 * Capture the earliest time where there are events for all CPUs and don't
3724 * emit events with timestamps prior.
3725 */
3726 barrier_min = kd_ctrl_page.oldest_time;
3727
3728 while (count) {
3729 tempbuf = kdcopybuf;
3730 tempbuf_number = 0;
3731
3732 if (wrapped) {
3733 /*
3734 * Emit a lost events tracepoint to indicate that previous events
3735 * were lost -- the thread map cannot be trusted. A new one must
3736 * be taken so tools can analyze the trace in a backwards-facing
3737 * fashion.
3738 */
3739 kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, 0);
3740 *tempbuf = lostevent;
3741 wrapped = FALSE;
3742 goto nextevent;
3743 }
3744
3745 /* While space left in merged events scratch buffer. */
3746 while (tempbuf_count) {
3747 bool lostevents = false;
3748 int lostcpu = 0;
3749 earliest_time = UINT64_MAX;
3750 min_kdbp = NULL;
3751 min_cpu = 0;
3752
3753 /* Check each CPU's buffers for the earliest event. */
3754 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page.kdebug_cpus; cpu++, kdbp++) {
3755 /* Skip CPUs without data in their oldest storage unit. */
3756 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
3757 next_cpu:
3758 continue;
3759 }
3760 /* From CPU data to buffer header to buffer. */
3761 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3762
3763 next_event:
3764 /* The next event to be read from this buffer. */
3765 rcursor = kdsp_actual->kds_readlast;
3766
3767 /* Skip this buffer if there are no events left. */
3768 if (rcursor == kdsp_actual->kds_bufindx) {
3769 continue;
3770 }
3771
3772 /*
3773 * Check that this storage unit wasn't stolen and events were
3774 * lost. This must have happened while wrapping was disabled
3775 * in this function.
3776 */
3777 if (kdsp_actual->kds_lostevents) {
3778 lostevents = true;
3779 kdsp_actual->kds_lostevents = FALSE;
3780
3781 /*
3782 * The earliest event we can trust is the first one in this
3783 * stolen storage unit.
3784 */
3785 uint64_t lost_time =
3786 kdbg_get_timestamp(&kdsp_actual->kds_records[0]);
3787 if (kd_ctrl_page.oldest_time < lost_time) {
3788 /*
3789 * If this is the first time we've seen lost events for
3790 * this gap, record its timestamp as the oldest
3791 * timestamp we're willing to merge for the lost events
3792 * tracepoint.
3793 */
3794 kd_ctrl_page.oldest_time = barrier_min = lost_time;
3795 lostcpu = cpu;
3796 }
3797 }
3798
3799 t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
3800
3801 if ((t > barrier_max) && (barrier_max > 0)) {
3802 if (kdbg_debug) {
3803 printf("kdebug: FUTURE EVENT: debugid %#8x: "
3804 "time %lld from CPU %u "
3805 "(barrier at time %lld, read %lu events)\n",
3806 kdsp_actual->kds_records[rcursor].debugid,
3807 t, cpu, barrier_max, *number + tempbuf_number);
3808 }
3809 /*
3810 * Need to flush IOPs again before we can sort any more
3811 * data from the buffers.
3812 */
3813 out_of_events = TRUE;
3814 break;
3815 }
3816 if (t < kdsp_actual->kds_timestamp) {
3817 /*
3818 * This indicates the event emitter hasn't completed
3819 * filling in the event (becuase we're looking at the
3820 * buffer that the record head is using). The max barrier
3821 * timestamp should have saved us from seeing these kinds
3822 * of things, but other CPUs might be slow on the up-take.
3823 *
3824 * Bail out so we don't get out-of-order events by
3825 * continuing to read events from other CPUs' events.
3826 */
3827 out_of_events = TRUE;
3828 break;
3829 }
3830
3831 /*
3832 * Ignore events that have aged out due to wrapping or storage
3833 * unit exhaustion while merging events.
3834 */
3835 if (t < barrier_min) {
3836 kdsp_actual->kds_readlast++;
3837
3838 if (kdsp_actual->kds_readlast >= EVENTS_PER_STORAGE_UNIT) {
3839 release_storage_unit(cpu, kdsp.raw);
3840
3841 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
3842 goto next_cpu;
3843 }
3844 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3845 }
3846
3847 goto next_event;
3848 }
3849
3850 /*
3851 * Don't worry about merging any events -- just walk through
3852 * the CPUs and find the latest timestamp of lost events.
3853 */
3854 if (lostevents) {
3855 continue;
3856 }
3857
3858 if (t < earliest_time) {
3859 earliest_time = t;
3860 min_kdbp = kdbp;
3861 min_cpu = cpu;
3862 }
3863 }
3864 if (lostevents) {
3865 /*
3866 * If any lost events were hit in the buffers, emit an event
3867 * with the latest timestamp.
3868 */
3869 kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, lostcpu);
3870 *tempbuf = lostevent;
3871 tempbuf->arg1 = 1;
3872 goto nextevent;
3873 }
3874 if (min_kdbp == NULL) {
3875 /* All buffers ran empty. */
3876 out_of_events = TRUE;
3877 }
3878 if (out_of_events) {
3879 break;
3880 }
3881
3882 kdsp = min_kdbp->kd_list_head;
3883 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3884
3885 /* Copy earliest event into merged events scratch buffer. */
3886 *tempbuf = kdsp_actual->kds_records[kdsp_actual->kds_readlast++];
3887
3888 if (kdsp_actual->kds_readlast == EVENTS_PER_STORAGE_UNIT)
3889 release_storage_unit(min_cpu, kdsp.raw);
3890
3891 /*
3892 * Watch for out of order timestamps (from IOPs).
3893 */
3894 if (earliest_time < min_kdbp->kd_prev_timebase) {
3895 /*
3896 * If we haven't already, emit a retrograde events event.
3897 * Otherwise, ignore this event.
3898 */
3899 if (traced_retrograde) {
3900 continue;
3901 }
3902
3903 kdbg_set_timestamp_and_cpu(tempbuf, min_kdbp->kd_prev_timebase, kdbg_get_cpu(tempbuf));
3904 tempbuf->arg1 = tempbuf->debugid;
3905 tempbuf->arg2 = earliest_time;
3906 tempbuf->arg3 = 0;
3907 tempbuf->arg4 = 0;
3908 tempbuf->debugid = TRACE_RETROGRADE_EVENTS;
3909 traced_retrograde = true;
3910 } else {
3911 min_kdbp->kd_prev_timebase = earliest_time;
3912 }
3913 nextevent:
3914 tempbuf_count--;
3915 tempbuf_number++;
3916 tempbuf++;
3917
3918 if ((RAW_file_written += sizeof(kd_buf)) >= RAW_FLUSH_SIZE)
3919 break;
3920 }
3921 if (tempbuf_number) {
3922 /*
3923 * Remember the latest timestamp of events that we've merged so we
3924 * don't think we've lost events later.
3925 */
3926 uint64_t latest_time = kdbg_get_timestamp(tempbuf - 1);
3927 if (kd_ctrl_page.oldest_time < latest_time) {
3928 kd_ctrl_page.oldest_time = latest_time;
3929 }
3930 if (file_version == RAW_VERSION3) {
3931 if ( !(kdbg_write_v3_event_chunk_header(buffer, V3_RAW_EVENTS, (tempbuf_number * sizeof(kd_buf)), vp, ctx))) {
3932 error = EFAULT;
3933 goto check_error;
3934 }
3935 if (buffer)
3936 buffer += (sizeof(kd_chunk_header_v3) + sizeof(uint64_t));
3937
3938 assert(count >= (sizeof(kd_chunk_header_v3) + sizeof(uint64_t)));
3939 count -= (sizeof(kd_chunk_header_v3) + sizeof(uint64_t));
3940 *number += (sizeof(kd_chunk_header_v3) + sizeof(uint64_t));
3941 }
3942 if (vp) {
3943 size_t write_size = tempbuf_number * sizeof(kd_buf);
3944 error = kdbg_write_to_vnode((caddr_t)kdcopybuf, write_size, vp, ctx, RAW_file_offset);
3945 if (!error)
3946 RAW_file_offset += write_size;
3947
3948 if (RAW_file_written >= RAW_FLUSH_SIZE) {
3949 error = VNOP_FSYNC(vp, MNT_NOWAIT, ctx);
3950
3951 RAW_file_written = 0;
3952 }
3953 } else {
3954 error = copyout(kdcopybuf, buffer, tempbuf_number * sizeof(kd_buf));
3955 buffer += (tempbuf_number * sizeof(kd_buf));
3956 }
3957 check_error:
3958 if (error) {
3959 *number = 0;
3960 error = EINVAL;
3961 break;
3962 }
3963 count -= tempbuf_number;
3964 *number += tempbuf_number;
3965 }
3966 if (out_of_events == TRUE)
3967 /*
3968 * all trace buffers are empty
3969 */
3970 break;
3971
3972 if ((tempbuf_count = count) > KDCOPYBUF_COUNT)
3973 tempbuf_count = KDCOPYBUF_COUNT;
3974 }
3975 if ( !(old_kdebug_flags & KDBG_NOWRAP)) {
3976 enable_wrap(old_kdebug_slowcheck);
3977 }
3978 thread_clear_eager_preempt(current_thread());
3979 return (error);
3980 }
3981
3982 static int
3983 kdbg_test(size_t flavor)
3984 {
3985 int code = 0;
3986 int dummy_iop = 0;
3987
3988 #define KDEBUG_TEST_CODE(code) BSDDBG_CODE(DBG_BSD_KDEBUG_TEST, (code))
3989 switch (flavor) {
3990 case 1:
3991 /* try each macro */
3992 KDBG(KDEBUG_TEST_CODE(code)); code++;
3993 KDBG(KDEBUG_TEST_CODE(code), 1); code++;
3994 KDBG(KDEBUG_TEST_CODE(code), 1, 2); code++;
3995 KDBG(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
3996 KDBG(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
3997
3998 KDBG_RELEASE(KDEBUG_TEST_CODE(code)); code++;
3999 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1); code++;
4000 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1, 2); code++;
4001 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
4002 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
4003
4004 KDBG_FILTERED(KDEBUG_TEST_CODE(code)); code++;
4005 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1); code++;
4006 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1, 2); code++;
4007 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
4008 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
4009
4010 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code)); code++;
4011 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code), 1); code++;
4012 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code), 1, 2); code++;
4013 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
4014 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
4015
4016 KDBG_DEBUG(KDEBUG_TEST_CODE(code)); code++;
4017 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1); code++;
4018 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1, 2); code++;
4019 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
4020 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
4021 break;
4022
4023 case 2:
4024 if (kd_ctrl_page.kdebug_iops) {
4025 /* avoid the assertion in kernel_debug_enter for a valid IOP */
4026 dummy_iop = kd_ctrl_page.kdebug_iops[0].cpu_id;
4027 }
4028
4029 /* ensure old timestamps are not emitted from kernel_debug_enter */
4030 kernel_debug_enter(dummy_iop, KDEBUG_TEST_CODE(code),
4031 100 /* very old timestamp */, 0, 0, 0,
4032 0, (uintptr_t)thread_tid(current_thread()));
4033 code++;
4034 kernel_debug_enter(dummy_iop, KDEBUG_TEST_CODE(code),
4035 kdbg_timestamp(), 0, 0, 0, 0,
4036 (uintptr_t)thread_tid(current_thread()));
4037 code++;
4038 break;
4039
4040 default:
4041 return ENOTSUP;
4042 }
4043 #undef KDEBUG_TEST_CODE
4044
4045 return 0;
4046 }
4047
4048 void
4049 kdebug_init(unsigned int n_events, char *filter_desc, boolean_t wrapping)
4050 {
4051 assert(filter_desc != NULL);
4052
4053 #if defined(__x86_64__)
4054 /* only trace MACH events when outputting kdebug to serial */
4055 if (kdebug_serial) {
4056 n_events = 1;
4057 if (filter_desc[0] == '\0') {
4058 filter_desc[0] = 'C';
4059 filter_desc[1] = '1';
4060 filter_desc[2] = '\0';
4061 }
4062 }
4063 #endif /* defined(__x86_64__) */
4064
4065 if (log_leaks && n_events == 0) {
4066 n_events = 200000;
4067 }
4068
4069 kdebug_trace_start(n_events, filter_desc, wrapping, FALSE);
4070 }
4071
4072 static void
4073 kdbg_set_typefilter_string(const char *filter_desc)
4074 {
4075 char *end = NULL;
4076
4077 ktrace_assert_lock_held();
4078
4079 assert(filter_desc != NULL);
4080
4081 typefilter_reject_all(kdbg_typefilter);
4082 typefilter_allow_class(kdbg_typefilter, DBG_TRACE);
4083
4084 /* if the filter description starts with a number, assume it's a csc */
4085 if (filter_desc[0] >= '0' && filter_desc[0] <= '9'){
4086 unsigned long csc = strtoul(filter_desc, NULL, 0);
4087 if (filter_desc != end && csc <= KDBG_CSC_MAX) {
4088 typefilter_allow_csc(kdbg_typefilter, csc);
4089 }
4090 return;
4091 }
4092
4093 while (filter_desc[0] != '\0') {
4094 unsigned long allow_value;
4095
4096 char filter_type = filter_desc[0];
4097 if (filter_type != 'C' && filter_type != 'S') {
4098 return;
4099 }
4100 filter_desc++;
4101
4102 allow_value = strtoul(filter_desc, &end, 0);
4103 if (filter_desc == end) {
4104 /* cannot parse as integer */
4105 return;
4106 }
4107
4108 switch (filter_type) {
4109 case 'C':
4110 if (allow_value <= KDBG_CLASS_MAX) {
4111 typefilter_allow_class(kdbg_typefilter, allow_value);
4112 } else {
4113 /* illegal class */
4114 return;
4115 }
4116 break;
4117 case 'S':
4118 if (allow_value <= KDBG_CSC_MAX) {
4119 typefilter_allow_csc(kdbg_typefilter, allow_value);
4120 } else {
4121 /* illegal class subclass */
4122 return;
4123 }
4124 break;
4125 default:
4126 return;
4127 }
4128
4129 /* advance to next filter entry */
4130 filter_desc = end;
4131 if (filter_desc[0] == ',') {
4132 filter_desc++;
4133 }
4134 }
4135 }
4136
4137 /*
4138 * This function is meant to be called from the bootstrap thread or coming out
4139 * of acpi_idle_kernel.
4140 */
4141 void
4142 kdebug_trace_start(unsigned int n_events, const char *filter_desc,
4143 boolean_t wrapping, boolean_t at_wake)
4144 {
4145 if (!n_events) {
4146 kd_early_done = true;
4147 return;
4148 }
4149
4150 ktrace_start_single_threaded();
4151
4152 kdbg_lock_init();
4153
4154 ktrace_kernel_configure(KTRACE_KDEBUG);
4155
4156 kdbg_set_nkdbufs(n_events);
4157
4158 kernel_debug_string_early("start_kern_tracing");
4159
4160 if (kdbg_reinit(TRUE)) {
4161 printf("error from kdbg_reinit, kernel tracing not started\n");
4162 goto out;
4163 }
4164
4165 /*
4166 * Wrapping is disabled because boot and wake tracing is interested in
4167 * the earliest events, at the expense of later ones.
4168 */
4169 if (!wrapping) {
4170 uint32_t old1, old2;
4171 (void)disable_wrap(&old1, &old2);
4172 }
4173
4174 if (filter_desc && filter_desc[0] != '\0') {
4175 if (kdbg_initialize_typefilter(NULL) == KERN_SUCCESS) {
4176 kdbg_set_typefilter_string(filter_desc);
4177 kdbg_enable_typefilter();
4178 }
4179 }
4180
4181 /*
4182 * Hold off interrupts between getting a thread map and enabling trace
4183 * and until the early traces are recorded.
4184 */
4185 boolean_t s = ml_set_interrupts_enabled(FALSE);
4186
4187 if (at_wake) {
4188 kdbg_thrmap_init();
4189 }
4190
4191 kdbg_set_tracing_enabled(TRUE, KDEBUG_ENABLE_TRACE | (kdebug_serial ?
4192 KDEBUG_ENABLE_SERIAL : 0));
4193
4194 if (!at_wake) {
4195 /*
4196 * Transfer all very early events from the static buffer into the real
4197 * buffers.
4198 */
4199 kernel_debug_early_end();
4200 }
4201
4202 ml_set_interrupts_enabled(s);
4203
4204 printf("kernel tracing started with %u events\n", n_events);
4205
4206 #if KDEBUG_MOJO_TRACE
4207 if (kdebug_serial) {
4208 printf("serial output enabled with %lu named events\n",
4209 sizeof(kd_events)/sizeof(kd_event_t));
4210 }
4211 #endif /* KDEBUG_MOJO_TRACE */
4212
4213 out:
4214 ktrace_end_single_threaded();
4215 }
4216
4217 void
4218 kdbg_dump_trace_to_file(const char *filename)
4219 {
4220 vfs_context_t ctx;
4221 vnode_t vp;
4222 size_t write_size;
4223 int ret;
4224
4225 ktrace_lock();
4226
4227 if (!(kdebug_enable & KDEBUG_ENABLE_TRACE)) {
4228 goto out;
4229 }
4230
4231 if (ktrace_get_owning_pid() != 0) {
4232 /*
4233 * Another process owns ktrace and is still active, disable tracing to
4234 * prevent wrapping.
4235 */
4236 kdebug_enable = 0;
4237 kd_ctrl_page.enabled = 0;
4238 commpage_update_kdebug_state();
4239 goto out;
4240 }
4241
4242 KDBG_RELEASE(TRACE_WRITING_EVENTS | DBG_FUNC_START);
4243
4244 kdebug_enable = 0;
4245 kd_ctrl_page.enabled = 0;
4246 commpage_update_kdebug_state();
4247
4248 ctx = vfs_context_kernel();
4249
4250 if (vnode_open(filename, (O_CREAT | FWRITE | O_NOFOLLOW), 0600, 0, &vp, ctx)) {
4251 goto out;
4252 }
4253
4254 kdbg_write_thread_map(vp, ctx);
4255
4256 write_size = nkdbufs * sizeof(kd_buf);
4257 ret = kdbg_read(0, &write_size, vp, ctx, RAW_VERSION1);
4258 if (ret) {
4259 goto out_close;
4260 }
4261
4262 /*
4263 * Wait to synchronize the file to capture the I/O in the
4264 * TRACE_WRITING_EVENTS interval.
4265 */
4266 ret = VNOP_FSYNC(vp, MNT_WAIT, ctx);
4267
4268 /*
4269 * Balance the starting TRACE_WRITING_EVENTS tracepoint manually.
4270 */
4271 kd_buf end_event = {
4272 .debugid = TRACE_WRITING_EVENTS | DBG_FUNC_END,
4273 .arg1 = write_size,
4274 .arg2 = ret,
4275 .arg5 = thread_tid(current_thread()),
4276 };
4277 kdbg_set_timestamp_and_cpu(&end_event, kdbg_timestamp(),
4278 cpu_number());
4279
4280 /* this is best effort -- ignore any errors */
4281 (void)kdbg_write_to_vnode((caddr_t)&end_event, sizeof(kd_buf), vp, ctx,
4282 RAW_file_offset);
4283
4284 out_close:
4285 vnode_close(vp, FWRITE, ctx);
4286 sync(current_proc(), (void *)NULL, (int *)NULL);
4287
4288 out:
4289 ktrace_unlock();
4290 }
4291
4292 static int
4293 kdbg_sysctl_continuous SYSCTL_HANDLER_ARGS
4294 {
4295 #pragma unused(oidp, arg1, arg2)
4296 int value = kdbg_continuous_time;
4297 int ret = sysctl_io_number(req, value, sizeof(value), &value, NULL);
4298
4299 if (ret || !req->newptr) {
4300 return ret;
4301 }
4302
4303 kdbg_continuous_time = value;
4304 return 0;
4305 }
4306
4307 SYSCTL_NODE(_kern, OID_AUTO, kdbg, CTLFLAG_RD | CTLFLAG_LOCKED, 0,
4308 "kdbg");
4309
4310 SYSCTL_PROC(_kern_kdbg, OID_AUTO, experimental_continuous,
4311 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0,
4312 sizeof(int), kdbg_sysctl_continuous, "I",
4313 "Set kdebug to use mach_continuous_time");
4314
4315 SYSCTL_INT(_kern_kdbg, OID_AUTO, debug,
4316 CTLFLAG_RW | CTLFLAG_LOCKED,
4317 &kdbg_debug, 0, "Set kdebug debug mode");
4318
4319 SYSCTL_QUAD(_kern_kdbg, OID_AUTO, oldest_time,
4320 CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED,
4321 &kd_ctrl_page.oldest_time,
4322 "Find the oldest timestamp still in trace");
4323
4324 #if KDEBUG_MOJO_TRACE
4325 static kd_event_t *
4326 binary_search(uint32_t id)
4327 {
4328 int low, high, mid;
4329
4330 low = 0;
4331 high = sizeof(kd_events)/sizeof(kd_event_t) - 1;
4332
4333 while (TRUE)
4334 {
4335 mid = (low + high) / 2;
4336
4337 if (low > high)
4338 return NULL; /* failed */
4339 else if ( low + 1 >= high) {
4340 /* We have a match */
4341 if (kd_events[high].id == id)
4342 return &kd_events[high];
4343 else if (kd_events[low].id == id)
4344 return &kd_events[low];
4345 else
4346 return NULL; /* search failed */
4347 }
4348 else if (id < kd_events[mid].id)
4349 high = mid;
4350 else
4351 low = mid;
4352 }
4353 }
4354
4355 /*
4356 * Look up event id to get name string.
4357 * Using a per-cpu cache of a single entry
4358 * before resorting to a binary search of the full table.
4359 */
4360 #define NCACHE 1
4361 static kd_event_t *last_hit[MAX_CPUS];
4362 static kd_event_t *
4363 event_lookup_cache(uint32_t cpu, uint32_t id)
4364 {
4365 if (last_hit[cpu] == NULL || last_hit[cpu]->id != id)
4366 last_hit[cpu] = binary_search(id);
4367 return last_hit[cpu];
4368 }
4369
4370 static uint64_t kd_last_timstamp;
4371
4372 static void
4373 kdebug_serial_print(
4374 uint32_t cpunum,
4375 uint32_t debugid,
4376 uint64_t timestamp,
4377 uintptr_t arg1,
4378 uintptr_t arg2,
4379 uintptr_t arg3,
4380 uintptr_t arg4,
4381 uintptr_t threadid
4382 )
4383 {
4384 char kprintf_line[192];
4385 char event[40];
4386 uint64_t us = timestamp / NSEC_PER_USEC;
4387 uint64_t us_tenth = (timestamp % NSEC_PER_USEC) / 100;
4388 uint64_t delta = timestamp - kd_last_timstamp;
4389 uint64_t delta_us = delta / NSEC_PER_USEC;
4390 uint64_t delta_us_tenth = (delta % NSEC_PER_USEC) / 100;
4391 uint32_t event_id = debugid & KDBG_EVENTID_MASK;
4392 const char *command;
4393 const char *bra;
4394 const char *ket;
4395 kd_event_t *ep;
4396
4397 /* event time and delta from last */
4398 snprintf(kprintf_line, sizeof(kprintf_line),
4399 "%11llu.%1llu %8llu.%1llu ",
4400 us, us_tenth, delta_us, delta_us_tenth);
4401
4402
4403 /* event (id or name) - start prefixed by "[", end postfixed by "]" */
4404 bra = (debugid & DBG_FUNC_START) ? "[" : " ";
4405 ket = (debugid & DBG_FUNC_END) ? "]" : " ";
4406 ep = event_lookup_cache(cpunum, event_id);
4407 if (ep) {
4408 if (strlen(ep->name) < sizeof(event) - 3)
4409 snprintf(event, sizeof(event), "%s%s%s",
4410 bra, ep->name, ket);
4411 else
4412 snprintf(event, sizeof(event), "%s%x(name too long)%s",
4413 bra, event_id, ket);
4414 } else {
4415 snprintf(event, sizeof(event), "%s%x%s",
4416 bra, event_id, ket);
4417 }
4418 snprintf(kprintf_line + strlen(kprintf_line),
4419 sizeof(kprintf_line) - strlen(kprintf_line),
4420 "%-40s ", event);
4421
4422 /* arg1 .. arg4 with special cases for strings */
4423 switch (event_id) {
4424 case VFS_LOOKUP:
4425 case VFS_LOOKUP_DONE:
4426 if (debugid & DBG_FUNC_START) {
4427 /* arg1 hex then arg2..arg4 chars */
4428 snprintf(kprintf_line + strlen(kprintf_line),
4429 sizeof(kprintf_line) - strlen(kprintf_line),
4430 "%-16lx %-8s%-8s%-8s ",
4431 arg1, (char*)&arg2, (char*)&arg3, (char*)&arg4);
4432 break;
4433 }
4434 /* else fall through for arg1..arg4 chars */
4435 case TRACE_STRING_EXEC:
4436 case TRACE_STRING_NEWTHREAD:
4437 case TRACE_INFO_STRING:
4438 snprintf(kprintf_line + strlen(kprintf_line),
4439 sizeof(kprintf_line) - strlen(kprintf_line),
4440 "%-8s%-8s%-8s%-8s ",
4441 (char*)&arg1, (char*)&arg2, (char*)&arg3, (char*)&arg4);
4442 break;
4443 default:
4444 snprintf(kprintf_line + strlen(kprintf_line),
4445 sizeof(kprintf_line) - strlen(kprintf_line),
4446 "%-16lx %-16lx %-16lx %-16lx",
4447 arg1, arg2, arg3, arg4);
4448 }
4449
4450 /* threadid, cpu and command name */
4451 if (threadid == (uintptr_t)thread_tid(current_thread()) &&
4452 current_proc() &&
4453 current_proc()->p_comm[0])
4454 command = current_proc()->p_comm;
4455 else
4456 command = "-";
4457 snprintf(kprintf_line + strlen(kprintf_line),
4458 sizeof(kprintf_line) - strlen(kprintf_line),
4459 " %-16lx %-2d %s\n",
4460 threadid, cpunum, command);
4461
4462 kprintf("%s", kprintf_line);
4463 kd_last_timstamp = timestamp;
4464 }
4465
4466 #endif