]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kdebug.c
xnu-3789.70.16.tar.gz
[apple/xnu.git] / bsd / kern / kdebug.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @Apple_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
21 */
22
23 #include <machine/spl.h>
24
25 #include <sys/errno.h>
26 #include <sys/param.h>
27 #include <sys/systm.h>
28 #include <sys/proc_internal.h>
29 #include <sys/vm.h>
30 #include <sys/sysctl.h>
31 #include <sys/kdebug.h>
32 #include <sys/kauth.h>
33 #include <sys/ktrace.h>
34 #include <sys/sysproto.h>
35 #include <sys/bsdtask_info.h>
36 #include <sys/random.h>
37
38 #include <mach/clock_types.h>
39 #include <mach/mach_types.h>
40 #include <mach/mach_time.h>
41 #include <mach/mach_vm.h>
42 #include <machine/machine_routines.h>
43
44 #include <mach/machine.h>
45 #include <mach/vm_map.h>
46
47 #if defined(__i386__) || defined(__x86_64__)
48 #include <i386/rtclock_protos.h>
49 #include <i386/mp.h>
50 #include <i386/machine_routines.h>
51 #endif
52
53 #include <kern/clock.h>
54
55 #include <kern/thread.h>
56 #include <kern/task.h>
57 #include <kern/debug.h>
58 #include <kern/kalloc.h>
59 #include <kern/cpu_data.h>
60 #include <kern/assert.h>
61 #include <kern/telemetry.h>
62 #include <kern/sched_prim.h>
63 #include <vm/vm_kern.h>
64 #include <sys/lock.h>
65 #include <kperf/kperf.h>
66 #include <pexpert/device_tree.h>
67
68 #include <sys/malloc.h>
69 #include <sys/mcache.h>
70
71 #include <sys/vnode.h>
72 #include <sys/vnode_internal.h>
73 #include <sys/fcntl.h>
74 #include <sys/file_internal.h>
75 #include <sys/ubc.h>
76 #include <sys/param.h> /* for isset() */
77
78 #include <mach/mach_host.h> /* for host_info() */
79 #include <libkern/OSAtomic.h>
80
81 #include <machine/pal_routines.h>
82 #include <machine/atomic.h>
83
84 /*
85 * IOP(s)
86 *
87 * https://coreoswiki.apple.com/wiki/pages/U6z3i0q9/Consistent_Logging_Implementers_Guide.html
88 *
89 * IOP(s) are auxiliary cores that want to participate in kdebug event logging.
90 * They are registered dynamically. Each is assigned a cpu_id at registration.
91 *
92 * NOTE: IOP trace events may not use the same clock hardware as "normal"
93 * cpus. There is an effort made to synchronize the IOP timebase with the
94 * AP, but it should be understood that there may be discrepancies.
95 *
96 * Once registered, an IOP is permanent, it cannot be unloaded/unregistered.
97 * The current implementation depends on this for thread safety.
98 *
99 * New registrations occur by allocating an kd_iop struct and assigning
100 * a provisional cpu_id of list_head->cpu_id + 1. Then a CAS to claim the
101 * list_head pointer resolves any races.
102 *
103 * You may safely walk the kd_iops list at any time, without holding locks.
104 *
105 * When allocating buffers, the current kd_iops head is captured. Any operations
106 * that depend on the buffer state (such as flushing IOP traces on reads,
107 * etc.) should use the captured list head. This will allow registrations to
108 * take place while trace is in use.
109 */
110
111 typedef struct kd_iop {
112 kd_callback_t callback;
113 uint32_t cpu_id;
114 uint64_t last_timestamp; /* Prevent timer rollback */
115 struct kd_iop* next;
116 } kd_iop_t;
117
118 static kd_iop_t* kd_iops = NULL;
119
120 /*
121 * Typefilter(s)
122 *
123 * A typefilter is a 8KB bitmap that is used to selectively filter events
124 * being recorded. It is able to individually address every class & subclass.
125 *
126 * There is a shared typefilter in the kernel which is lazily allocated. Once
127 * allocated, the shared typefilter is never deallocated. The shared typefilter
128 * is also mapped on demand into userspace processes that invoke kdebug_trace
129 * API from Libsyscall. When mapped into a userspace process, the memory is
130 * read only, and does not have a fixed address.
131 *
132 * It is a requirement that the kernel's shared typefilter always pass DBG_TRACE
133 * events. This is enforced automatically, by having the needed bits set any
134 * time the shared typefilter is mutated.
135 */
136
137 typedef uint8_t* typefilter_t;
138
139 static typefilter_t kdbg_typefilter;
140 static mach_port_t kdbg_typefilter_memory_entry;
141
142 /*
143 * There are 3 combinations of page sizes:
144 *
145 * 4KB / 4KB
146 * 4KB / 16KB
147 * 16KB / 16KB
148 *
149 * The typefilter is exactly 8KB. In the first two scenarios, we would like
150 * to use 2 pages exactly; in the third scenario we must make certain that
151 * a full page is allocated so we do not inadvertantly share 8KB of random
152 * data to userspace. The round_page_32 macro rounds to kernel page size.
153 */
154 #define TYPEFILTER_ALLOC_SIZE MAX(round_page_32(KDBG_TYPEFILTER_BITMAP_SIZE), KDBG_TYPEFILTER_BITMAP_SIZE)
155
156 static typefilter_t typefilter_create(void)
157 {
158 typefilter_t tf;
159 if (KERN_SUCCESS == kmem_alloc(kernel_map, (vm_offset_t*)&tf, TYPEFILTER_ALLOC_SIZE, VM_KERN_MEMORY_DIAG)) {
160 memset(&tf[KDBG_TYPEFILTER_BITMAP_SIZE], 0, TYPEFILTER_ALLOC_SIZE - KDBG_TYPEFILTER_BITMAP_SIZE);
161 return tf;
162 }
163 return NULL;
164 }
165
166 static void typefilter_deallocate(typefilter_t tf)
167 {
168 assert(tf);
169 assert(tf != kdbg_typefilter);
170 kmem_free(kernel_map, (vm_offset_t)tf, TYPEFILTER_ALLOC_SIZE);
171 }
172
173 static void typefilter_copy(typefilter_t dst, typefilter_t src)
174 {
175 assert(src);
176 assert(dst);
177 memcpy(dst, src, KDBG_TYPEFILTER_BITMAP_SIZE);
178 }
179
180 static void typefilter_reject_all(typefilter_t tf)
181 {
182 assert(tf);
183 memset(tf, 0, KDBG_TYPEFILTER_BITMAP_SIZE);
184 }
185
186 static void typefilter_allow_class(typefilter_t tf, uint8_t class)
187 {
188 assert(tf);
189 const uint32_t BYTES_PER_CLASS = 256 / 8; // 256 subclasses, 1 bit each
190 memset(&tf[class * BYTES_PER_CLASS], 0xFF, BYTES_PER_CLASS);
191 }
192
193 static void typefilter_allow_csc(typefilter_t tf, uint16_t csc)
194 {
195 assert(tf);
196 setbit(tf, csc);
197 }
198
199 static boolean_t typefilter_is_debugid_allowed(typefilter_t tf, uint32_t id)
200 {
201 assert(tf);
202 return isset(tf, KDBG_EXTRACT_CSC(id));
203 }
204
205 static mach_port_t typefilter_create_memory_entry(typefilter_t tf)
206 {
207 assert(tf);
208
209 mach_port_t memory_entry = MACH_PORT_NULL;
210 memory_object_size_t size = TYPEFILTER_ALLOC_SIZE;
211
212 mach_make_memory_entry_64(kernel_map,
213 &size,
214 (memory_object_offset_t)tf,
215 VM_PROT_READ,
216 &memory_entry,
217 MACH_PORT_NULL);
218
219 return memory_entry;
220 }
221
222 static int kdbg_copyin_typefilter(user_addr_t addr, size_t size);
223 static void kdbg_enable_typefilter(void);
224 static void kdbg_disable_typefilter(void);
225
226 /*
227 * External prototypes
228 */
229
230 void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *);
231 int cpu_number(void); /* XXX <machine/...> include path broken */
232 void commpage_update_kdebug_state(void); /* XXX sign */
233
234 extern int log_leaks;
235 extern boolean_t kdebug_serial;
236
237 #if KDEBUG_MOJO_TRACE
238 #include <sys/kdebugevents.h>
239 static void kdebug_serial_print( /* forward */
240 uint32_t, uint32_t, uint64_t,
241 uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
242 #endif
243
244 int kdbg_control(int *, u_int, user_addr_t, size_t *);
245
246 static int kdbg_read(user_addr_t, size_t *, vnode_t, vfs_context_t, uint32_t);
247 static int kdbg_readcpumap(user_addr_t, size_t *);
248 static int kdbg_readthrmap_v3(user_addr_t, size_t, int);
249 static int kdbg_readcurthrmap(user_addr_t, size_t *);
250 static int kdbg_setreg(kd_regtype *);
251 static int kdbg_setpidex(kd_regtype *);
252 static int kdbg_setpid(kd_regtype *);
253 static void kdbg_thrmap_init(void);
254 static int kdbg_reinit(boolean_t);
255 static int kdbg_bootstrap(boolean_t);
256 static int kdbg_test(void);
257
258 static int kdbg_write_v1_header(boolean_t write_thread_map, vnode_t vp, vfs_context_t ctx);
259 static int kdbg_write_thread_map(vnode_t vp, vfs_context_t ctx);
260 static int kdbg_copyout_thread_map(user_addr_t buffer, size_t *buffer_size);
261 static void kdbg_clear_thread_map(void);
262
263 static boolean_t kdbg_wait(uint64_t timeout_ms, boolean_t locked_wait);
264 static void kdbg_wakeup(void);
265
266 int kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count,
267 uint8_t** cpumap, uint32_t* cpumap_size);
268
269 static kd_threadmap *kdbg_thrmap_init_internal(unsigned int count,
270 unsigned int *mapsize,
271 unsigned int *mapcount);
272
273 static boolean_t kdebug_current_proc_enabled(uint32_t debugid);
274 boolean_t kdebug_debugid_enabled(uint32_t debugid);
275 static errno_t kdebug_check_trace_string(uint32_t debugid, uint64_t str_id);
276
277 int kdbg_write_v3_header(user_addr_t, size_t *, int);
278 int kdbg_write_v3_chunk_header(user_addr_t buffer, uint32_t tag,
279 uint32_t sub_tag, uint64_t length,
280 vnode_t vp, vfs_context_t ctx);
281
282 user_addr_t kdbg_write_v3_event_chunk_header(user_addr_t buffer, uint32_t tag,
283 uint64_t length, vnode_t vp,
284 vfs_context_t ctx);
285
286 // Helper functions
287
288 static int create_buffers(boolean_t);
289 static void delete_buffers(void);
290
291 extern int tasks_count;
292 extern int threads_count;
293 extern char *proc_best_name(proc_t p);
294 extern void IOSleep(int);
295
296 /* trace enable status */
297 unsigned int kdebug_enable = 0;
298
299 /* A static buffer to record events prior to the start of regular logging */
300 #define KD_EARLY_BUFFER_MAX 64
301 static kd_buf kd_early_buffer[KD_EARLY_BUFFER_MAX];
302 static int kd_early_index = 0;
303 static boolean_t kd_early_overflow = FALSE;
304
305 #define SLOW_NOLOG 0x01
306 #define SLOW_CHECKS 0x02
307
308 #define EVENTS_PER_STORAGE_UNIT 2048
309 #define MIN_STORAGE_UNITS_PER_CPU 4
310
311 #define POINTER_FROM_KDS_PTR(x) (&kd_bufs[x.buffer_index].kdsb_addr[x.offset])
312
313 union kds_ptr {
314 struct {
315 uint32_t buffer_index:21;
316 uint16_t offset:11;
317 };
318 uint32_t raw;
319 };
320
321 struct kd_storage {
322 union kds_ptr kds_next;
323 uint32_t kds_bufindx;
324 uint32_t kds_bufcnt;
325 uint32_t kds_readlast;
326 boolean_t kds_lostevents;
327 uint64_t kds_timestamp;
328
329 kd_buf kds_records[EVENTS_PER_STORAGE_UNIT];
330 };
331
332 #define MAX_BUFFER_SIZE (1024 * 1024 * 128)
333 #define N_STORAGE_UNITS_PER_BUFFER (MAX_BUFFER_SIZE / sizeof(struct kd_storage))
334
335 struct kd_storage_buffers {
336 struct kd_storage *kdsb_addr;
337 uint32_t kdsb_size;
338 };
339
340 #define KDS_PTR_NULL 0xffffffff
341 struct kd_storage_buffers *kd_bufs = NULL;
342 int n_storage_units = 0;
343 int n_storage_buffers = 0;
344 int n_storage_threshold = 0;
345 int kds_waiter = 0;
346
347 #pragma pack(0)
348 struct kd_bufinfo {
349 union kds_ptr kd_list_head;
350 union kds_ptr kd_list_tail;
351 boolean_t kd_lostevents;
352 uint32_t _pad;
353 uint64_t kd_prev_timebase;
354 uint32_t num_bufs;
355 } __attribute__(( aligned(MAX_CPU_CACHE_LINE_SIZE) ));
356
357
358 /*
359 * In principle, this control block can be shared in DRAM with other
360 * coprocessors and runtimes, for configuring what tracing is enabled.
361 */
362 struct kd_ctrl_page_t {
363 union kds_ptr kds_free_list;
364 uint32_t enabled :1;
365 uint32_t _pad0 :31;
366 int kds_inuse_count;
367 uint32_t kdebug_flags;
368 uint32_t kdebug_slowcheck;
369 uint64_t oldest_time;
370 /*
371 * The number of kd_bufinfo structs allocated may not match the current
372 * number of active cpus. We capture the iops list head at initialization
373 * which we could use to calculate the number of cpus we allocated data for,
374 * unless it happens to be null. To avoid that case, we explicitly also
375 * capture a cpu count.
376 */
377 kd_iop_t* kdebug_iops;
378 uint32_t kdebug_cpus;
379 } kd_ctrl_page = {
380 .kds_free_list = {.raw = KDS_PTR_NULL},
381 .kdebug_slowcheck = SLOW_NOLOG,
382 .oldest_time = 0
383 };
384
385 #pragma pack()
386
387 struct kd_bufinfo *kdbip = NULL;
388
389 #define KDCOPYBUF_COUNT 8192
390 #define KDCOPYBUF_SIZE (KDCOPYBUF_COUNT * sizeof(kd_buf))
391
392 #define PAGE_4KB 4096
393 #define PAGE_16KB 16384
394
395 kd_buf *kdcopybuf = NULL;
396
397 unsigned int nkdbufs = 0;
398 unsigned int kdlog_beg=0;
399 unsigned int kdlog_end=0;
400 unsigned int kdlog_value1=0;
401 unsigned int kdlog_value2=0;
402 unsigned int kdlog_value3=0;
403 unsigned int kdlog_value4=0;
404
405 static lck_spin_t * kdw_spin_lock;
406 static lck_spin_t * kds_spin_lock;
407
408 kd_threadmap *kd_mapptr = 0;
409 unsigned int kd_mapsize = 0;
410 unsigned int kd_mapcount = 0;
411
412 off_t RAW_file_offset = 0;
413 int RAW_file_written = 0;
414
415 #define RAW_FLUSH_SIZE (2 * 1024 * 1024)
416
417 /*
418 * A globally increasing counter for identifying strings in trace. Starts at
419 * 1 because 0 is a reserved return value.
420 */
421 __attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE)))
422 static uint64_t g_curr_str_id = 1;
423
424 #define STR_ID_SIG_OFFSET (48)
425 #define STR_ID_MASK ((1ULL << STR_ID_SIG_OFFSET) - 1)
426 #define STR_ID_SIG_MASK (~STR_ID_MASK)
427
428 /*
429 * A bit pattern for identifying string IDs generated by
430 * kdebug_trace_string(2).
431 */
432 static uint64_t g_str_id_signature = (0x70acULL << STR_ID_SIG_OFFSET);
433
434 #define INTERRUPT 0x01050000
435 #define MACH_vmfault 0x01300008
436 #define BSC_SysCall 0x040c0000
437 #define MACH_SysCall 0x010c0000
438
439 /* task to string structure */
440 struct tts
441 {
442 task_t task; /* from procs task */
443 pid_t pid; /* from procs p_pid */
444 char task_comm[20]; /* from procs p_comm */
445 };
446
447 typedef struct tts tts_t;
448
449 struct krt
450 {
451 kd_threadmap *map; /* pointer to the map buffer */
452 int count;
453 int maxcount;
454 struct tts *atts;
455 };
456
457 typedef struct krt krt_t;
458
459 static uint32_t
460 kdbg_cpu_count(boolean_t early_trace)
461 {
462 if (early_trace) {
463 return max_ncpus;
464 }
465
466 host_basic_info_data_t hinfo;
467 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
468 host_info((host_t)1 /* BSD_HOST */, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
469 assert(hinfo.logical_cpu_max > 0);
470 return hinfo.logical_cpu_max;
471 }
472
473 #if MACH_ASSERT
474 #endif /* MACH_ASSERT */
475
476 static void
477 kdbg_iop_list_callback(kd_iop_t* iop, kd_callback_type type, void* arg)
478 {
479 while (iop) {
480 iop->callback.func(iop->callback.context, type, arg);
481 iop = iop->next;
482 }
483 }
484
485 static void
486 kdbg_set_tracing_enabled(boolean_t enabled, uint32_t trace_type)
487 {
488 int s = ml_set_interrupts_enabled(FALSE);
489 lck_spin_lock(kds_spin_lock);
490 if (enabled) {
491 kdebug_enable |= trace_type;
492 kd_ctrl_page.kdebug_slowcheck &= ~SLOW_NOLOG;
493 kd_ctrl_page.enabled = 1;
494 commpage_update_kdebug_state();
495 } else {
496 kdebug_enable &= ~(KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_PPT);
497 kd_ctrl_page.kdebug_slowcheck |= SLOW_NOLOG;
498 kd_ctrl_page.enabled = 0;
499 commpage_update_kdebug_state();
500 }
501 lck_spin_unlock(kds_spin_lock);
502 ml_set_interrupts_enabled(s);
503
504 if (enabled) {
505 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_KDEBUG_ENABLED, NULL);
506 } else {
507 /*
508 * If you do not flush the IOP trace buffers, they can linger
509 * for a considerable period; consider code which disables and
510 * deallocates without a final sync flush.
511 */
512 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_KDEBUG_DISABLED, NULL);
513 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_SYNC_FLUSH, NULL);
514 }
515 }
516
517 static void
518 kdbg_set_flags(int slowflag, int enableflag, boolean_t enabled)
519 {
520 int s = ml_set_interrupts_enabled(FALSE);
521 lck_spin_lock(kds_spin_lock);
522
523 if (enabled) {
524 kd_ctrl_page.kdebug_slowcheck |= slowflag;
525 kdebug_enable |= enableflag;
526 } else {
527 kd_ctrl_page.kdebug_slowcheck &= ~slowflag;
528 kdebug_enable &= ~enableflag;
529 }
530
531 lck_spin_unlock(kds_spin_lock);
532 ml_set_interrupts_enabled(s);
533 }
534
535 /*
536 * Disable wrapping and return true if trace wrapped, false otherwise.
537 */
538 boolean_t
539 disable_wrap(uint32_t *old_slowcheck, uint32_t *old_flags)
540 {
541 boolean_t wrapped;
542 int s = ml_set_interrupts_enabled(FALSE);
543 lck_spin_lock(kds_spin_lock);
544
545 *old_slowcheck = kd_ctrl_page.kdebug_slowcheck;
546 *old_flags = kd_ctrl_page.kdebug_flags;
547
548 wrapped = kd_ctrl_page.kdebug_flags & KDBG_WRAPPED;
549 kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
550 kd_ctrl_page.kdebug_flags |= KDBG_NOWRAP;
551
552 lck_spin_unlock(kds_spin_lock);
553 ml_set_interrupts_enabled(s);
554
555 return wrapped;
556 }
557
558 void
559 enable_wrap(uint32_t old_slowcheck, boolean_t lostevents)
560 {
561 int s = ml_set_interrupts_enabled(FALSE);
562 lck_spin_lock(kds_spin_lock);
563
564 kd_ctrl_page.kdebug_flags &= ~KDBG_NOWRAP;
565
566 if ( !(old_slowcheck & SLOW_NOLOG))
567 kd_ctrl_page.kdebug_slowcheck &= ~SLOW_NOLOG;
568
569 if (lostevents == TRUE)
570 kd_ctrl_page.kdebug_flags |= KDBG_WRAPPED;
571
572 lck_spin_unlock(kds_spin_lock);
573 ml_set_interrupts_enabled(s);
574 }
575
576 static int
577 create_buffers(boolean_t early_trace)
578 {
579 int i;
580 int p_buffer_size;
581 int f_buffer_size;
582 int f_buffers;
583 int error = 0;
584
585 /*
586 * For the duration of this allocation, trace code will only reference
587 * kdebug_iops. Any iops registered after this enabling will not be
588 * messaged until the buffers are reallocated.
589 *
590 * TLDR; Must read kd_iops once and only once!
591 */
592 kd_ctrl_page.kdebug_iops = kd_iops;
593
594
595 /*
596 * If the list is valid, it is sorted, newest -> oldest. Each iop entry
597 * has a cpu_id of "the older entry + 1", so the highest cpu_id will
598 * be the list head + 1.
599 */
600
601 kd_ctrl_page.kdebug_cpus = kd_ctrl_page.kdebug_iops ? kd_ctrl_page.kdebug_iops->cpu_id + 1 : kdbg_cpu_count(early_trace);
602
603 if (kmem_alloc(kernel_map, (vm_offset_t *)&kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
604 error = ENOSPC;
605 goto out;
606 }
607
608 if (nkdbufs < (kd_ctrl_page.kdebug_cpus * EVENTS_PER_STORAGE_UNIT * MIN_STORAGE_UNITS_PER_CPU))
609 n_storage_units = kd_ctrl_page.kdebug_cpus * MIN_STORAGE_UNITS_PER_CPU;
610 else
611 n_storage_units = nkdbufs / EVENTS_PER_STORAGE_UNIT;
612
613 nkdbufs = n_storage_units * EVENTS_PER_STORAGE_UNIT;
614
615 f_buffers = n_storage_units / N_STORAGE_UNITS_PER_BUFFER;
616 n_storage_buffers = f_buffers;
617
618 f_buffer_size = N_STORAGE_UNITS_PER_BUFFER * sizeof(struct kd_storage);
619 p_buffer_size = (n_storage_units % N_STORAGE_UNITS_PER_BUFFER) * sizeof(struct kd_storage);
620
621 if (p_buffer_size)
622 n_storage_buffers++;
623
624 kd_bufs = NULL;
625
626 if (kdcopybuf == 0) {
627 if (kmem_alloc(kernel_map, (vm_offset_t *)&kdcopybuf, (vm_size_t)KDCOPYBUF_SIZE, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
628 error = ENOSPC;
629 goto out;
630 }
631 }
632 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers)), VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
633 error = ENOSPC;
634 goto out;
635 }
636 bzero(kd_bufs, n_storage_buffers * sizeof(struct kd_storage_buffers));
637
638 for (i = 0; i < f_buffers; i++) {
639 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)f_buffer_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
640 error = ENOSPC;
641 goto out;
642 }
643 bzero(kd_bufs[i].kdsb_addr, f_buffer_size);
644
645 kd_bufs[i].kdsb_size = f_buffer_size;
646 }
647 if (p_buffer_size) {
648 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)p_buffer_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
649 error = ENOSPC;
650 goto out;
651 }
652 bzero(kd_bufs[i].kdsb_addr, p_buffer_size);
653
654 kd_bufs[i].kdsb_size = p_buffer_size;
655 }
656 n_storage_units = 0;
657
658 for (i = 0; i < n_storage_buffers; i++) {
659 struct kd_storage *kds;
660 int n_elements;
661 int n;
662
663 n_elements = kd_bufs[i].kdsb_size / sizeof(struct kd_storage);
664 kds = kd_bufs[i].kdsb_addr;
665
666 for (n = 0; n < n_elements; n++) {
667 kds[n].kds_next.buffer_index = kd_ctrl_page.kds_free_list.buffer_index;
668 kds[n].kds_next.offset = kd_ctrl_page.kds_free_list.offset;
669
670 kd_ctrl_page.kds_free_list.buffer_index = i;
671 kd_ctrl_page.kds_free_list.offset = n;
672 }
673 n_storage_units += n_elements;
674 }
675
676 bzero((char *)kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus);
677
678 for (i = 0; i < (int)kd_ctrl_page.kdebug_cpus; i++) {
679 kdbip[i].kd_list_head.raw = KDS_PTR_NULL;
680 kdbip[i].kd_list_tail.raw = KDS_PTR_NULL;
681 kdbip[i].kd_lostevents = FALSE;
682 kdbip[i].num_bufs = 0;
683 }
684
685 kd_ctrl_page.kdebug_flags |= KDBG_BUFINIT;
686
687 kd_ctrl_page.kds_inuse_count = 0;
688 n_storage_threshold = n_storage_units / 2;
689 out:
690 if (error)
691 delete_buffers();
692
693 return(error);
694 }
695
696 static void
697 delete_buffers(void)
698 {
699 int i;
700
701 if (kd_bufs) {
702 for (i = 0; i < n_storage_buffers; i++) {
703 if (kd_bufs[i].kdsb_addr) {
704 kmem_free(kernel_map, (vm_offset_t)kd_bufs[i].kdsb_addr, (vm_size_t)kd_bufs[i].kdsb_size);
705 }
706 }
707 kmem_free(kernel_map, (vm_offset_t)kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers)));
708
709 kd_bufs = NULL;
710 n_storage_buffers = 0;
711 }
712 if (kdcopybuf) {
713 kmem_free(kernel_map, (vm_offset_t)kdcopybuf, KDCOPYBUF_SIZE);
714
715 kdcopybuf = NULL;
716 }
717 kd_ctrl_page.kds_free_list.raw = KDS_PTR_NULL;
718
719 if (kdbip) {
720 kmem_free(kernel_map, (vm_offset_t)kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus);
721
722 kdbip = NULL;
723 }
724 kd_ctrl_page.kdebug_iops = NULL;
725 kd_ctrl_page.kdebug_cpus = 0;
726 kd_ctrl_page.kdebug_flags &= ~KDBG_BUFINIT;
727 }
728
729 void
730 release_storage_unit(int cpu, uint32_t kdsp_raw)
731 {
732 int s = 0;
733 struct kd_storage *kdsp_actual;
734 struct kd_bufinfo *kdbp;
735 union kds_ptr kdsp;
736
737 kdsp.raw = kdsp_raw;
738
739 s = ml_set_interrupts_enabled(FALSE);
740 lck_spin_lock(kds_spin_lock);
741
742 kdbp = &kdbip[cpu];
743
744 if (kdsp.raw == kdbp->kd_list_head.raw) {
745 /*
746 * it's possible for the storage unit pointed to
747 * by kdsp to have already been stolen... so
748 * check to see if it's still the head of the list
749 * now that we're behind the lock that protects
750 * adding and removing from the queue...
751 * since we only ever release and steal units from
752 * that position, if it's no longer the head
753 * we having nothing to do in this context
754 */
755 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
756 kdbp->kd_list_head = kdsp_actual->kds_next;
757
758 kdsp_actual->kds_next = kd_ctrl_page.kds_free_list;
759 kd_ctrl_page.kds_free_list = kdsp;
760
761 kd_ctrl_page.kds_inuse_count--;
762 }
763 lck_spin_unlock(kds_spin_lock);
764 ml_set_interrupts_enabled(s);
765 }
766
767
768 boolean_t
769 allocate_storage_unit(int cpu)
770 {
771 union kds_ptr kdsp;
772 struct kd_storage *kdsp_actual, *kdsp_next_actual;
773 struct kd_bufinfo *kdbp, *kdbp_vict, *kdbp_try;
774 uint64_t oldest_ts, ts;
775 boolean_t retval = TRUE;
776 int s = 0;
777
778 s = ml_set_interrupts_enabled(FALSE);
779 lck_spin_lock(kds_spin_lock);
780
781 kdbp = &kdbip[cpu];
782
783 /* If someone beat us to the allocate, return success */
784 if (kdbp->kd_list_tail.raw != KDS_PTR_NULL) {
785 kdsp_actual = POINTER_FROM_KDS_PTR(kdbp->kd_list_tail);
786
787 if (kdsp_actual->kds_bufindx < EVENTS_PER_STORAGE_UNIT)
788 goto out;
789 }
790
791 if ((kdsp = kd_ctrl_page.kds_free_list).raw != KDS_PTR_NULL) {
792 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
793 kd_ctrl_page.kds_free_list = kdsp_actual->kds_next;
794
795 kd_ctrl_page.kds_inuse_count++;
796 } else {
797 if (kd_ctrl_page.kdebug_flags & KDBG_NOWRAP) {
798 kd_ctrl_page.kdebug_slowcheck |= SLOW_NOLOG;
799 kdbp->kd_lostevents = TRUE;
800 retval = FALSE;
801 goto out;
802 }
803 kdbp_vict = NULL;
804 oldest_ts = UINT64_MAX;
805
806 for (kdbp_try = &kdbip[0]; kdbp_try < &kdbip[kd_ctrl_page.kdebug_cpus]; kdbp_try++) {
807
808 if (kdbp_try->kd_list_head.raw == KDS_PTR_NULL) {
809 /*
810 * no storage unit to steal
811 */
812 continue;
813 }
814
815 kdsp_actual = POINTER_FROM_KDS_PTR(kdbp_try->kd_list_head);
816
817 if (kdsp_actual->kds_bufcnt < EVENTS_PER_STORAGE_UNIT) {
818 /*
819 * make sure we don't steal the storage unit
820 * being actively recorded to... need to
821 * move on because we don't want an out-of-order
822 * set of events showing up later
823 */
824 continue;
825 }
826
827 /*
828 * When wrapping, steal the storage unit with the
829 * earliest timestamp on its last event, instead of the
830 * earliest timestamp on the first event. This allows a
831 * storage unit with more recent events to be preserved,
832 * even if the storage unit contains events that are
833 * older than those found in other CPUs.
834 */
835 ts = kdbg_get_timestamp(&kdsp_actual->kds_records[EVENTS_PER_STORAGE_UNIT - 1]);
836 if (ts < oldest_ts) {
837 oldest_ts = ts;
838 kdbp_vict = kdbp_try;
839 }
840 }
841 if (kdbp_vict == NULL) {
842 kdebug_enable = 0;
843 kd_ctrl_page.enabled = 0;
844 commpage_update_kdebug_state();
845 retval = FALSE;
846 goto out;
847 }
848 kdsp = kdbp_vict->kd_list_head;
849 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
850 kdbp_vict->kd_list_head = kdsp_actual->kds_next;
851
852 if (kdbp_vict->kd_list_head.raw != KDS_PTR_NULL) {
853 kdsp_next_actual = POINTER_FROM_KDS_PTR(kdbp_vict->kd_list_head);
854 kdsp_next_actual->kds_lostevents = TRUE;
855 } else
856 kdbp_vict->kd_lostevents = TRUE;
857
858 kd_ctrl_page.oldest_time = oldest_ts;
859 kd_ctrl_page.kdebug_flags |= KDBG_WRAPPED;
860 }
861 kdsp_actual->kds_timestamp = mach_absolute_time();
862 kdsp_actual->kds_next.raw = KDS_PTR_NULL;
863 kdsp_actual->kds_bufcnt = 0;
864 kdsp_actual->kds_readlast = 0;
865
866 kdsp_actual->kds_lostevents = kdbp->kd_lostevents;
867 kdbp->kd_lostevents = FALSE;
868 kdsp_actual->kds_bufindx = 0;
869
870 if (kdbp->kd_list_head.raw == KDS_PTR_NULL)
871 kdbp->kd_list_head = kdsp;
872 else
873 POINTER_FROM_KDS_PTR(kdbp->kd_list_tail)->kds_next = kdsp;
874 kdbp->kd_list_tail = kdsp;
875 out:
876 lck_spin_unlock(kds_spin_lock);
877 ml_set_interrupts_enabled(s);
878
879 return (retval);
880 }
881
882 int
883 kernel_debug_register_callback(kd_callback_t callback)
884 {
885 kd_iop_t* iop;
886 if (kmem_alloc(kernel_map, (vm_offset_t *)&iop, sizeof(kd_iop_t), VM_KERN_MEMORY_DIAG) == KERN_SUCCESS) {
887 memcpy(&iop->callback, &callback, sizeof(kd_callback_t));
888
889 /*
890 * <rdar://problem/13351477> Some IOP clients are not providing a name.
891 *
892 * Remove when fixed.
893 */
894 {
895 boolean_t is_valid_name = FALSE;
896 for (uint32_t length=0; length<sizeof(callback.iop_name); ++length) {
897 /* This is roughly isprintable(c) */
898 if (callback.iop_name[length] > 0x20 && callback.iop_name[length] < 0x7F)
899 continue;
900 if (callback.iop_name[length] == 0) {
901 if (length)
902 is_valid_name = TRUE;
903 break;
904 }
905 }
906
907 if (!is_valid_name) {
908 strlcpy(iop->callback.iop_name, "IOP-???", sizeof(iop->callback.iop_name));
909 }
910 }
911
912 iop->last_timestamp = 0;
913
914 do {
915 /*
916 * We use two pieces of state, the old list head
917 * pointer, and the value of old_list_head->cpu_id.
918 * If we read kd_iops more than once, it can change
919 * between reads.
920 *
921 * TLDR; Must not read kd_iops more than once per loop.
922 */
923 iop->next = kd_iops;
924 iop->cpu_id = iop->next ? (iop->next->cpu_id+1) : kdbg_cpu_count(FALSE);
925
926 /*
927 * Header says OSCompareAndSwapPtr has a memory barrier
928 */
929 } while (!OSCompareAndSwapPtr(iop->next, iop, (void* volatile*)&kd_iops));
930
931 return iop->cpu_id;
932 }
933
934 return 0;
935 }
936
937 void
938 kernel_debug_enter(
939 uint32_t coreid,
940 uint32_t debugid,
941 uint64_t timestamp,
942 uintptr_t arg1,
943 uintptr_t arg2,
944 uintptr_t arg3,
945 uintptr_t arg4,
946 uintptr_t threadid
947 )
948 {
949 uint32_t bindx;
950 kd_buf *kd;
951 struct kd_bufinfo *kdbp;
952 struct kd_storage *kdsp_actual;
953 union kds_ptr kds_raw;
954
955 if (kd_ctrl_page.kdebug_slowcheck) {
956
957 if ( (kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) || !(kdebug_enable & (KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_PPT)))
958 goto out1;
959
960 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
961 if (typefilter_is_debugid_allowed(kdbg_typefilter, debugid))
962 goto record_event;
963 goto out1;
964 }
965 else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
966 if (debugid >= kdlog_beg && debugid <= kdlog_end)
967 goto record_event;
968 goto out1;
969 }
970 else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
971 if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 &&
972 (debugid & KDBG_EVENTID_MASK) != kdlog_value2 &&
973 (debugid & KDBG_EVENTID_MASK) != kdlog_value3 &&
974 (debugid & KDBG_EVENTID_MASK) != kdlog_value4)
975 goto out1;
976 }
977 }
978
979 if (kd_ctrl_page.kdebug_flags & KDBG_WRAPPED) {
980 if (timestamp < kd_ctrl_page.oldest_time) {
981 goto out1;
982 }
983 }
984
985 record_event:
986
987 disable_preemption();
988
989 if (kd_ctrl_page.enabled == 0)
990 goto out;
991
992 kdbp = &kdbip[coreid];
993 timestamp &= KDBG_TIMESTAMP_MASK;
994
995 #if KDEBUG_MOJO_TRACE
996 if (kdebug_enable & KDEBUG_ENABLE_SERIAL)
997 kdebug_serial_print(coreid, debugid, timestamp,
998 arg1, arg2, arg3, arg4, threadid);
999 #endif
1000
1001 retry_q:
1002 kds_raw = kdbp->kd_list_tail;
1003
1004 if (kds_raw.raw != KDS_PTR_NULL) {
1005 kdsp_actual = POINTER_FROM_KDS_PTR(kds_raw);
1006 bindx = kdsp_actual->kds_bufindx;
1007 } else
1008 kdsp_actual = NULL;
1009
1010 if (kdsp_actual == NULL || bindx >= EVENTS_PER_STORAGE_UNIT) {
1011 if (allocate_storage_unit(coreid) == FALSE) {
1012 /*
1013 * this can only happen if wrapping
1014 * has been disabled
1015 */
1016 goto out;
1017 }
1018 goto retry_q;
1019 }
1020 if ( !OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx))
1021 goto retry_q;
1022
1023 // IOP entries can be allocated before xnu allocates and inits the buffer
1024 if (timestamp < kdsp_actual->kds_timestamp)
1025 kdsp_actual->kds_timestamp = timestamp;
1026
1027 kd = &kdsp_actual->kds_records[bindx];
1028
1029 kd->debugid = debugid;
1030 kd->arg1 = arg1;
1031 kd->arg2 = arg2;
1032 kd->arg3 = arg3;
1033 kd->arg4 = arg4;
1034 kd->arg5 = threadid;
1035
1036 kdbg_set_timestamp_and_cpu(kd, timestamp, coreid);
1037
1038 OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
1039 out:
1040 enable_preemption();
1041 out1:
1042 if ((kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold)) {
1043 kdbg_wakeup();
1044 }
1045 }
1046
1047 static void
1048 kernel_debug_internal(
1049 boolean_t only_filter,
1050 uint32_t debugid,
1051 uintptr_t arg1,
1052 uintptr_t arg2,
1053 uintptr_t arg3,
1054 uintptr_t arg4,
1055 uintptr_t arg5)
1056 {
1057 struct proc *curproc;
1058 uint64_t now;
1059 uint32_t bindx;
1060 kd_buf *kd;
1061 int cpu;
1062 struct kd_bufinfo *kdbp;
1063 struct kd_storage *kdsp_actual;
1064 union kds_ptr kds_raw;
1065
1066 if (kd_ctrl_page.kdebug_slowcheck) {
1067 if ((kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) ||
1068 !(kdebug_enable & (KDEBUG_ENABLE_TRACE | KDEBUG_ENABLE_PPT)))
1069 {
1070 goto out1;
1071 }
1072
1073 if ( !ml_at_interrupt_context()) {
1074 if (kd_ctrl_page.kdebug_flags & KDBG_PIDCHECK) {
1075 /*
1076 * If kdebug flag is not set for current proc, return
1077 */
1078 curproc = current_proc();
1079
1080 if ((curproc && !(curproc->p_kdebug)) &&
1081 ((debugid & 0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE)) &&
1082 (debugid >> 24 != DBG_TRACE))
1083 goto out1;
1084 }
1085 else if (kd_ctrl_page.kdebug_flags & KDBG_PIDEXCLUDE) {
1086 /*
1087 * If kdebug flag is set for current proc, return
1088 */
1089 curproc = current_proc();
1090
1091 if ((curproc && curproc->p_kdebug) &&
1092 ((debugid & 0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE)) &&
1093 (debugid >> 24 != DBG_TRACE))
1094 goto out1;
1095 }
1096 }
1097
1098 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
1099 if (typefilter_is_debugid_allowed(kdbg_typefilter, debugid))
1100 goto record_event;
1101
1102 goto out1;
1103 } else if (only_filter == TRUE) {
1104 goto out1;
1105 }
1106 else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
1107 /* Always record trace system info */
1108 if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE)
1109 goto record_event;
1110
1111 if (debugid < kdlog_beg || debugid > kdlog_end)
1112 goto out1;
1113 }
1114 else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
1115 /* Always record trace system info */
1116 if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE)
1117 goto record_event;
1118
1119 if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 &&
1120 (debugid & KDBG_EVENTID_MASK) != kdlog_value2 &&
1121 (debugid & KDBG_EVENTID_MASK) != kdlog_value3 &&
1122 (debugid & KDBG_EVENTID_MASK) != kdlog_value4)
1123 goto out1;
1124 }
1125 } else if (only_filter == TRUE) {
1126 goto out1;
1127 }
1128
1129 record_event:
1130 disable_preemption();
1131
1132 if (kd_ctrl_page.enabled == 0)
1133 goto out;
1134
1135 cpu = cpu_number();
1136 kdbp = &kdbip[cpu];
1137
1138 #if KDEBUG_MOJO_TRACE
1139 if (kdebug_enable & KDEBUG_ENABLE_SERIAL)
1140 kdebug_serial_print(cpu, debugid,
1141 mach_absolute_time() & KDBG_TIMESTAMP_MASK,
1142 arg1, arg2, arg3, arg4, arg5);
1143 #endif
1144
1145 retry_q:
1146 kds_raw = kdbp->kd_list_tail;
1147
1148 if (kds_raw.raw != KDS_PTR_NULL) {
1149 kdsp_actual = POINTER_FROM_KDS_PTR(kds_raw);
1150 bindx = kdsp_actual->kds_bufindx;
1151 } else
1152 kdsp_actual = NULL;
1153
1154 if (kdsp_actual == NULL || bindx >= EVENTS_PER_STORAGE_UNIT) {
1155 if (allocate_storage_unit(cpu) == FALSE) {
1156 /*
1157 * this can only happen if wrapping
1158 * has been disabled
1159 */
1160 goto out;
1161 }
1162 goto retry_q;
1163 }
1164 now = mach_absolute_time() & KDBG_TIMESTAMP_MASK;
1165
1166 if ( !OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx))
1167 goto retry_q;
1168
1169 kd = &kdsp_actual->kds_records[bindx];
1170
1171 kd->debugid = debugid;
1172 kd->arg1 = arg1;
1173 kd->arg2 = arg2;
1174 kd->arg3 = arg3;
1175 kd->arg4 = arg4;
1176 kd->arg5 = arg5;
1177
1178 kdbg_set_timestamp_and_cpu(kd, now, cpu);
1179
1180 OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
1181
1182 #if KPERF
1183 kperf_kdebug_callback(debugid, __builtin_frame_address(0));
1184 #endif
1185 out:
1186 enable_preemption();
1187 out1:
1188 if (kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold) {
1189 uint32_t etype;
1190 uint32_t stype;
1191
1192 etype = debugid & KDBG_EVENTID_MASK;
1193 stype = debugid & KDBG_CSC_MASK;
1194
1195 if (etype == INTERRUPT || etype == MACH_vmfault ||
1196 stype == BSC_SysCall || stype == MACH_SysCall) {
1197 kdbg_wakeup();
1198 }
1199 }
1200 }
1201
1202 void
1203 kernel_debug(
1204 uint32_t debugid,
1205 uintptr_t arg1,
1206 uintptr_t arg2,
1207 uintptr_t arg3,
1208 uintptr_t arg4,
1209 __unused uintptr_t arg5)
1210 {
1211 kernel_debug_internal(FALSE, debugid, arg1, arg2, arg3, arg4,
1212 (uintptr_t)thread_tid(current_thread()));
1213 }
1214
1215 void
1216 kernel_debug1(
1217 uint32_t debugid,
1218 uintptr_t arg1,
1219 uintptr_t arg2,
1220 uintptr_t arg3,
1221 uintptr_t arg4,
1222 uintptr_t arg5)
1223 {
1224 kernel_debug_internal(FALSE, debugid, arg1, arg2, arg3, arg4, arg5);
1225 }
1226
1227 void
1228 kernel_debug_filtered(
1229 uint32_t debugid,
1230 uintptr_t arg1,
1231 uintptr_t arg2,
1232 uintptr_t arg3,
1233 uintptr_t arg4)
1234 {
1235 kernel_debug_internal(TRUE, debugid, arg1, arg2, arg3, arg4,
1236 (uintptr_t)thread_tid(current_thread()));
1237 }
1238
1239 void
1240 kernel_debug_string_early(const char *message)
1241 {
1242 uintptr_t arg[4] = {0, 0, 0, 0};
1243
1244 /* Stuff the message string in the args and log it. */
1245 strncpy((char *)arg, message, MIN(sizeof(arg), strlen(message)));
1246 KERNEL_DEBUG_EARLY(
1247 TRACE_INFO_STRING,
1248 arg[0], arg[1], arg[2], arg[3]);
1249 }
1250
1251 #define SIMPLE_STR_LEN (64)
1252 static_assert(SIMPLE_STR_LEN % sizeof(uintptr_t) == 0);
1253
1254 void
1255 kernel_debug_string_simple(uint32_t eventid, const char *str)
1256 {
1257 /* array of uintptr_ts simplifies emitting the string as arguments */
1258 uintptr_t str_buf[(SIMPLE_STR_LEN / sizeof(uintptr_t)) + 1] = { 0 };
1259 size_t len = strlcpy((char *)str_buf, str, SIMPLE_STR_LEN + 1);
1260
1261 uintptr_t thread_id = (uintptr_t)thread_tid(current_thread());
1262 uint32_t debugid = eventid | DBG_FUNC_START;
1263
1264 /* string can fit in a single tracepoint */
1265 if (len <= (4 * sizeof(uintptr_t))) {
1266 debugid |= DBG_FUNC_END;
1267 }
1268
1269 kernel_debug_internal(FALSE, debugid, str_buf[0],
1270 str_buf[1],
1271 str_buf[2],
1272 str_buf[3], thread_id);
1273
1274 debugid &= KDBG_EVENTID_MASK;
1275 int i = 4;
1276 size_t written = 4 * sizeof(uintptr_t);
1277
1278 for (; written < len; i += 4, written += 4 * sizeof(uintptr_t)) {
1279 /* if this is the last tracepoint to be emitted */
1280 if ((written + (4 * sizeof(uintptr_t))) >= len) {
1281 debugid |= DBG_FUNC_END;
1282 }
1283 kernel_debug_internal(FALSE, debugid, str_buf[i],
1284 str_buf[i + 1],
1285 str_buf[i + 2],
1286 str_buf[i + 3], thread_id);
1287 }
1288 }
1289
1290 extern int master_cpu; /* MACH_KERNEL_PRIVATE */
1291 /*
1292 * Used prior to start_kern_tracing() being called.
1293 * Log temporarily into a static buffer.
1294 */
1295 void
1296 kernel_debug_early(
1297 uint32_t debugid,
1298 uintptr_t arg1,
1299 uintptr_t arg2,
1300 uintptr_t arg3,
1301 uintptr_t arg4)
1302 {
1303 /* If tracing is already initialized, use it */
1304 if (nkdbufs) {
1305 KERNEL_DEBUG_CONSTANT(debugid, arg1, arg2, arg3, arg4, 0);
1306 return;
1307 }
1308
1309 /* Do nothing if the buffer is full or we're not on the boot cpu */
1310 kd_early_overflow = kd_early_index >= KD_EARLY_BUFFER_MAX;
1311 if (kd_early_overflow ||
1312 cpu_number() != master_cpu)
1313 return;
1314
1315 kd_early_buffer[kd_early_index].debugid = debugid;
1316 kd_early_buffer[kd_early_index].timestamp = mach_absolute_time();
1317 kd_early_buffer[kd_early_index].arg1 = arg1;
1318 kd_early_buffer[kd_early_index].arg2 = arg2;
1319 kd_early_buffer[kd_early_index].arg3 = arg3;
1320 kd_early_buffer[kd_early_index].arg4 = arg4;
1321 kd_early_buffer[kd_early_index].arg5 = 0;
1322 kd_early_index++;
1323 }
1324
1325 /*
1326 * Transfen the contents of the temporary buffer into the trace buffers.
1327 * Precede that by logging the rebase time (offset) - the TSC-based time (in ns)
1328 * when mach_absolute_time is set to 0.
1329 */
1330 static void
1331 kernel_debug_early_end(void)
1332 {
1333 int i;
1334
1335 if (cpu_number() != master_cpu)
1336 panic("kernel_debug_early_end() not call on boot processor");
1337
1338 /* Fake sentinel marking the start of kernel time relative to TSC */
1339 kernel_debug_enter(
1340 0,
1341 TRACE_TIMESTAMPS,
1342 0,
1343 (uint32_t)(tsc_rebase_abs_time >> 32),
1344 (uint32_t)tsc_rebase_abs_time,
1345 0,
1346 0,
1347 0);
1348 for (i = 0; i < kd_early_index; i++) {
1349 kernel_debug_enter(
1350 0,
1351 kd_early_buffer[i].debugid,
1352 kd_early_buffer[i].timestamp,
1353 kd_early_buffer[i].arg1,
1354 kd_early_buffer[i].arg2,
1355 kd_early_buffer[i].arg3,
1356 kd_early_buffer[i].arg4,
1357 0);
1358 }
1359
1360 /* Cut events-lost event on overflow */
1361 if (kd_early_overflow)
1362 KERNEL_DEBUG_CONSTANT(
1363 TRACE_LOST_EVENTS, 0, 0, 0, 0, 0);
1364
1365 /* This trace marks the start of kernel tracing */
1366 kernel_debug_string_early("early trace done");
1367 }
1368
1369 void
1370 kernel_debug_disable(void)
1371 {
1372 if (kdebug_enable) {
1373 kdbg_set_tracing_enabled(FALSE, 0);
1374 }
1375 }
1376
1377 /*
1378 * Returns non-zero if debugid is in a reserved class.
1379 */
1380 static int
1381 kdebug_validate_debugid(uint32_t debugid)
1382 {
1383 uint8_t debugid_class;
1384
1385 debugid_class = KDBG_EXTRACT_CLASS(debugid);
1386 switch (debugid_class) {
1387 case DBG_TRACE:
1388 return EPERM;
1389 }
1390
1391 return 0;
1392 }
1393
1394 /*
1395 * Support syscall SYS_kdebug_typefilter.
1396 */
1397 int
1398 kdebug_typefilter(__unused struct proc* p,
1399 struct kdebug_typefilter_args* uap,
1400 __unused int *retval)
1401 {
1402 int ret = KERN_SUCCESS;
1403
1404 if (uap->addr == USER_ADDR_NULL ||
1405 uap->size == USER_ADDR_NULL) {
1406 return EINVAL;
1407 }
1408
1409 /*
1410 * The atomic load is to close a race window with setting the typefilter
1411 * and memory entry values. A description follows:
1412 *
1413 * Thread 1 (writer)
1414 *
1415 * Allocate Typefilter
1416 * Allocate MemoryEntry
1417 * Write Global MemoryEntry Ptr
1418 * Atomic Store (Release) Global Typefilter Ptr
1419 *
1420 * Thread 2 (reader, AKA us)
1421 *
1422 * if ((Atomic Load (Acquire) Global Typefilter Ptr) == NULL)
1423 * return;
1424 *
1425 * Without the atomic store, it isn't guaranteed that the write of
1426 * Global MemoryEntry Ptr is visible before we can see the write of
1427 * Global Typefilter Ptr.
1428 *
1429 * Without the atomic load, it isn't guaranteed that the loads of
1430 * Global MemoryEntry Ptr aren't speculated.
1431 *
1432 * The global pointers transition from NULL -> valid once and only once,
1433 * and never change after becoming valid. This means that having passed
1434 * the first atomic load test of Global Typefilter Ptr, this function
1435 * can then safely use the remaining global state without atomic checks.
1436 */
1437 if (!__c11_atomic_load((_Atomic typefilter_t *)&kdbg_typefilter, memory_order_acquire)) {
1438 return EINVAL;
1439 }
1440
1441 assert(kdbg_typefilter_memory_entry);
1442
1443 mach_vm_offset_t user_addr = 0;
1444 vm_map_t user_map = current_map();
1445
1446 ret = mach_to_bsd_errno(
1447 mach_vm_map(user_map, // target map
1448 &user_addr, // [in, out] target address
1449 TYPEFILTER_ALLOC_SIZE, // initial size
1450 0, // mask (alignment?)
1451 VM_FLAGS_ANYWHERE, // flags
1452 kdbg_typefilter_memory_entry, // port (memory entry!)
1453 0, // offset (in memory entry)
1454 FALSE, // should copy
1455 VM_PROT_READ, // cur_prot
1456 VM_PROT_READ, // max_prot
1457 VM_INHERIT_SHARE)); // inherit behavior on fork
1458
1459 if (ret == KERN_SUCCESS) {
1460 vm_size_t user_ptr_size = vm_map_is_64bit(user_map) ? 8 : 4;
1461 ret = copyout(CAST_DOWN(void *, &user_addr), uap->addr, user_ptr_size );
1462
1463 if (ret != KERN_SUCCESS) {
1464 mach_vm_deallocate(user_map, user_addr, TYPEFILTER_ALLOC_SIZE);
1465 }
1466 }
1467
1468 return ret;
1469 }
1470
1471 /*
1472 * Support syscall SYS_kdebug_trace. U64->K32 args may get truncated in kdebug_trace64
1473 */
1474 int
1475 kdebug_trace(struct proc *p, struct kdebug_trace_args *uap, int32_t *retval)
1476 {
1477 struct kdebug_trace64_args uap64;
1478
1479 uap64.code = uap->code;
1480 uap64.arg1 = uap->arg1;
1481 uap64.arg2 = uap->arg2;
1482 uap64.arg3 = uap->arg3;
1483 uap64.arg4 = uap->arg4;
1484
1485 return kdebug_trace64(p, &uap64, retval);
1486 }
1487
1488 /*
1489 * Support syscall SYS_kdebug_trace64. 64-bit args on K32 will get truncated
1490 * to fit in 32-bit record format.
1491 *
1492 * It is intentional that error conditions are not checked until kdebug is
1493 * enabled. This is to match the userspace wrapper behavior, which is optimizing
1494 * for non-error case performance.
1495 */
1496 int kdebug_trace64(__unused struct proc *p, struct kdebug_trace64_args *uap, __unused int32_t *retval)
1497 {
1498 int err;
1499
1500 if ( __probable(kdebug_enable == 0) )
1501 return(0);
1502
1503 if ((err = kdebug_validate_debugid(uap->code)) != 0) {
1504 return err;
1505 }
1506
1507 kernel_debug_internal(FALSE, uap->code,
1508 (uintptr_t)uap->arg1,
1509 (uintptr_t)uap->arg2,
1510 (uintptr_t)uap->arg3,
1511 (uintptr_t)uap->arg4,
1512 (uintptr_t)thread_tid(current_thread()));
1513
1514 return(0);
1515 }
1516
1517 /*
1518 * Adding enough padding to contain a full tracepoint for the last
1519 * portion of the string greatly simplifies the logic of splitting the
1520 * string between tracepoints. Full tracepoints can be generated using
1521 * the buffer itself, without having to manually add zeros to pad the
1522 * arguments.
1523 */
1524
1525 /* 2 string args in first tracepoint and 9 string data tracepoints */
1526 #define STR_BUF_ARGS (2 + (9 * 4))
1527 /* times the size of each arg on K64 */
1528 #define MAX_STR_LEN (STR_BUF_ARGS * sizeof(uint64_t))
1529 /* on K32, ending straddles a tracepoint, so reserve blanks */
1530 #define STR_BUF_SIZE (MAX_STR_LEN + (2 * sizeof(uint32_t)))
1531
1532 /*
1533 * This function does no error checking and assumes that it is called with
1534 * the correct arguments, including that the buffer pointed to by str is at
1535 * least STR_BUF_SIZE bytes. However, str must be aligned to word-size and
1536 * be NUL-terminated. In cases where a string can fit evenly into a final
1537 * tracepoint without its NUL-terminator, this function will not end those
1538 * strings with a NUL in trace. It's up to clients to look at the function
1539 * qualifier for DBG_FUNC_END in this case, to end the string.
1540 */
1541 static uint64_t
1542 kernel_debug_string_internal(uint32_t debugid, uint64_t str_id, void *vstr,
1543 size_t str_len)
1544 {
1545 /* str must be word-aligned */
1546 uintptr_t *str = vstr;
1547 size_t written = 0;
1548 uintptr_t thread_id;
1549 int i;
1550 uint32_t trace_debugid = TRACEDBG_CODE(DBG_TRACE_STRING,
1551 TRACE_STRING_GLOBAL);
1552
1553 thread_id = (uintptr_t)thread_tid(current_thread());
1554
1555 /* if the ID is being invalidated, just emit that */
1556 if (str_id != 0 && str_len == 0) {
1557 kernel_debug_internal(FALSE, trace_debugid | DBG_FUNC_START | DBG_FUNC_END,
1558 (uintptr_t)debugid, (uintptr_t)str_id, 0, 0,
1559 thread_id);
1560 return str_id;
1561 }
1562
1563 /* generate an ID, if necessary */
1564 if (str_id == 0) {
1565 str_id = OSIncrementAtomic64((SInt64 *)&g_curr_str_id);
1566 str_id = (str_id & STR_ID_MASK) | g_str_id_signature;
1567 }
1568
1569 trace_debugid |= DBG_FUNC_START;
1570 /* string can fit in a single tracepoint */
1571 if (str_len <= (2 * sizeof(uintptr_t))) {
1572 trace_debugid |= DBG_FUNC_END;
1573 }
1574
1575 kernel_debug_internal(FALSE, trace_debugid, (uintptr_t)debugid,
1576 (uintptr_t)str_id, str[0],
1577 str[1], thread_id);
1578
1579 trace_debugid &= KDBG_EVENTID_MASK;
1580 i = 2;
1581 written += 2 * sizeof(uintptr_t);
1582
1583 for (; written < str_len; i += 4, written += 4 * sizeof(uintptr_t)) {
1584 if ((written + (4 * sizeof(uintptr_t))) >= str_len) {
1585 trace_debugid |= DBG_FUNC_END;
1586 }
1587 kernel_debug_internal(FALSE, trace_debugid, str[i],
1588 str[i + 1],
1589 str[i + 2],
1590 str[i + 3], thread_id);
1591 }
1592
1593 return str_id;
1594 }
1595
1596 /*
1597 * Returns true if the current process can emit events, and false otherwise.
1598 * Trace system and scheduling events circumvent this check, as do events
1599 * emitted in interrupt context.
1600 */
1601 static boolean_t
1602 kdebug_current_proc_enabled(uint32_t debugid)
1603 {
1604 /* can't determine current process in interrupt context */
1605 if (ml_at_interrupt_context()) {
1606 return TRUE;
1607 }
1608
1609 /* always emit trace system and scheduling events */
1610 if ((KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE ||
1611 (debugid & KDBG_CSC_MASK) == MACHDBG_CODE(DBG_MACH_SCHED, 0)))
1612 {
1613 return TRUE;
1614 }
1615
1616 if (kd_ctrl_page.kdebug_flags & KDBG_PIDCHECK) {
1617 proc_t cur_proc = current_proc();
1618
1619 /* only the process with the kdebug bit set is allowed */
1620 if (cur_proc && !(cur_proc->p_kdebug)) {
1621 return FALSE;
1622 }
1623 } else if (kd_ctrl_page.kdebug_flags & KDBG_PIDEXCLUDE) {
1624 proc_t cur_proc = current_proc();
1625
1626 /* every process except the one with the kdebug bit set is allowed */
1627 if (cur_proc && cur_proc->p_kdebug) {
1628 return FALSE;
1629 }
1630 }
1631
1632 return TRUE;
1633 }
1634
1635 /*
1636 * Returns false if the debugid is disabled by filters, and true if the
1637 * debugid is allowed to be traced. A debugid may not be traced if the
1638 * typefilter disables its class and subclass, it's outside a range
1639 * check, or if it's not an allowed debugid in a value check. Trace
1640 * system events bypass this check.
1641 */
1642 boolean_t
1643 kdebug_debugid_enabled(uint32_t debugid)
1644 {
1645 /* if no filtering is enabled */
1646 if (!kd_ctrl_page.kdebug_slowcheck) {
1647 return TRUE;
1648 }
1649
1650 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
1651 return typefilter_is_debugid_allowed(kdbg_typefilter, debugid);
1652 } else if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE) {
1653 return TRUE;
1654 }
1655
1656 if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
1657 if (debugid < kdlog_beg || debugid > kdlog_end) {
1658 return FALSE;
1659 }
1660 } else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
1661 if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 &&
1662 (debugid & KDBG_EVENTID_MASK) != kdlog_value2 &&
1663 (debugid & KDBG_EVENTID_MASK) != kdlog_value3 &&
1664 (debugid & KDBG_EVENTID_MASK) != kdlog_value4)
1665 {
1666 return FALSE;
1667 }
1668 }
1669
1670 return TRUE;
1671 }
1672
1673 /*
1674 * Returns 0 if a string can be traced with these arguments. Returns errno
1675 * value if error occurred.
1676 */
1677 static errno_t
1678 kdebug_check_trace_string(uint32_t debugid, uint64_t str_id)
1679 {
1680 /* if there are function qualifiers on the debugid */
1681 if (debugid & ~KDBG_EVENTID_MASK) {
1682 return EINVAL;
1683 }
1684
1685 if (kdebug_validate_debugid(debugid)) {
1686 return EPERM;
1687 }
1688
1689 if (str_id != 0 && (str_id & STR_ID_SIG_MASK) != g_str_id_signature) {
1690 return EINVAL;
1691 }
1692
1693 return 0;
1694 }
1695
1696 /*
1697 * Implementation of KPI kernel_debug_string.
1698 */
1699 int
1700 kernel_debug_string(uint32_t debugid, uint64_t *str_id, const char *str)
1701 {
1702 /* arguments to tracepoints must be word-aligned */
1703 __attribute__((aligned(sizeof(uintptr_t)))) char str_buf[STR_BUF_SIZE];
1704 static_assert(sizeof(str_buf) > MAX_STR_LEN);
1705 vm_size_t len_copied;
1706 int err;
1707
1708 assert(str_id);
1709
1710 if (__probable(kdebug_enable == 0)) {
1711 return 0;
1712 }
1713
1714 if (!kdebug_current_proc_enabled(debugid)) {
1715 return 0;
1716 }
1717
1718 if (!kdebug_debugid_enabled(debugid)) {
1719 return 0;
1720 }
1721
1722 if ((err = kdebug_check_trace_string(debugid, *str_id)) != 0) {
1723 return err;
1724 }
1725
1726 if (str == NULL) {
1727 if (str_id == 0) {
1728 return EINVAL;
1729 }
1730
1731 *str_id = kernel_debug_string_internal(debugid, *str_id, NULL, 0);
1732 return 0;
1733 }
1734
1735 memset(str_buf, 0, sizeof(str_buf));
1736 len_copied = strlcpy(str_buf, str, MAX_STR_LEN + 1);
1737 *str_id = kernel_debug_string_internal(debugid, *str_id, str_buf,
1738 len_copied);
1739 return 0;
1740 }
1741
1742 /*
1743 * Support syscall kdebug_trace_string.
1744 */
1745 int
1746 kdebug_trace_string(__unused struct proc *p,
1747 struct kdebug_trace_string_args *uap,
1748 uint64_t *retval)
1749 {
1750 __attribute__((aligned(sizeof(uintptr_t)))) char str_buf[STR_BUF_SIZE];
1751 static_assert(sizeof(str_buf) > MAX_STR_LEN);
1752 size_t len_copied;
1753 int err;
1754
1755 if (__probable(kdebug_enable == 0)) {
1756 return 0;
1757 }
1758
1759 if (!kdebug_current_proc_enabled(uap->debugid)) {
1760 return 0;
1761 }
1762
1763 if (!kdebug_debugid_enabled(uap->debugid)) {
1764 return 0;
1765 }
1766
1767 if ((err = kdebug_check_trace_string(uap->debugid, uap->str_id)) != 0) {
1768 return err;
1769 }
1770
1771 if (uap->str == USER_ADDR_NULL) {
1772 if (uap->str_id == 0) {
1773 return EINVAL;
1774 }
1775
1776 *retval = kernel_debug_string_internal(uap->debugid, uap->str_id,
1777 NULL, 0);
1778 return 0;
1779 }
1780
1781 memset(str_buf, 0, sizeof(str_buf));
1782 err = copyinstr(uap->str, str_buf, MAX_STR_LEN + 1, &len_copied);
1783
1784 /* it's alright to truncate the string, so allow ENAMETOOLONG */
1785 if (err == ENAMETOOLONG) {
1786 str_buf[MAX_STR_LEN] = '\0';
1787 } else if (err) {
1788 return err;
1789 }
1790
1791 if (len_copied <= 1) {
1792 return EINVAL;
1793 }
1794
1795 /* convert back to a length */
1796 len_copied--;
1797
1798 *retval = kernel_debug_string_internal(uap->debugid, uap->str_id, str_buf,
1799 len_copied);
1800 return 0;
1801 }
1802
1803 static void
1804 kdbg_lock_init(void)
1805 {
1806 static lck_grp_attr_t *kdebug_lck_grp_attr = NULL;
1807 static lck_grp_t *kdebug_lck_grp = NULL;
1808 static lck_attr_t *kdebug_lck_attr = NULL;
1809
1810 if (kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT) {
1811 return;
1812 }
1813
1814 assert(kdebug_lck_grp_attr == NULL);
1815 kdebug_lck_grp_attr = lck_grp_attr_alloc_init();
1816 kdebug_lck_grp = lck_grp_alloc_init("kdebug", kdebug_lck_grp_attr);
1817 kdebug_lck_attr = lck_attr_alloc_init();
1818
1819 kds_spin_lock = lck_spin_alloc_init(kdebug_lck_grp, kdebug_lck_attr);
1820 kdw_spin_lock = lck_spin_alloc_init(kdebug_lck_grp, kdebug_lck_attr);
1821
1822 kd_ctrl_page.kdebug_flags |= KDBG_LOCKINIT;
1823 }
1824
1825 int
1826 kdbg_bootstrap(boolean_t early_trace)
1827 {
1828 kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
1829
1830 return (create_buffers(early_trace));
1831 }
1832
1833 int
1834 kdbg_reinit(boolean_t early_trace)
1835 {
1836 int ret = 0;
1837
1838 /*
1839 * Disable trace collecting
1840 * First make sure we're not in
1841 * the middle of cutting a trace
1842 */
1843 kernel_debug_disable();
1844
1845 /*
1846 * make sure the SLOW_NOLOG is seen
1847 * by everyone that might be trying
1848 * to cut a trace..
1849 */
1850 IOSleep(100);
1851
1852 delete_buffers();
1853
1854 kdbg_clear_thread_map();
1855 ret = kdbg_bootstrap(early_trace);
1856
1857 RAW_file_offset = 0;
1858 RAW_file_written = 0;
1859
1860 return(ret);
1861 }
1862
1863 void
1864 kdbg_trace_data(struct proc *proc, long *arg_pid)
1865 {
1866 if (!proc)
1867 *arg_pid = 0;
1868 else
1869 *arg_pid = proc->p_pid;
1870 }
1871
1872
1873 void
1874 kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3, long *arg4)
1875 {
1876 char *dbg_nameptr;
1877 int dbg_namelen;
1878 long dbg_parms[4];
1879
1880 if (!proc) {
1881 *arg1 = 0;
1882 *arg2 = 0;
1883 *arg3 = 0;
1884 *arg4 = 0;
1885 return;
1886 }
1887 /*
1888 * Collect the pathname for tracing
1889 */
1890 dbg_nameptr = proc->p_comm;
1891 dbg_namelen = (int)strlen(proc->p_comm);
1892 dbg_parms[0]=0L;
1893 dbg_parms[1]=0L;
1894 dbg_parms[2]=0L;
1895 dbg_parms[3]=0L;
1896
1897 if(dbg_namelen > (int)sizeof(dbg_parms))
1898 dbg_namelen = (int)sizeof(dbg_parms);
1899
1900 strncpy((char *)dbg_parms, dbg_nameptr, dbg_namelen);
1901
1902 *arg1=dbg_parms[0];
1903 *arg2=dbg_parms[1];
1904 *arg3=dbg_parms[2];
1905 *arg4=dbg_parms[3];
1906 }
1907
1908 static void
1909 kdbg_resolve_map(thread_t th_act, void *opaque)
1910 {
1911 kd_threadmap *mapptr;
1912 krt_t *t = (krt_t *)opaque;
1913
1914 if (t->count < t->maxcount) {
1915 mapptr = &t->map[t->count];
1916 mapptr->thread = (uintptr_t)thread_tid(th_act);
1917
1918 (void) strlcpy (mapptr->command, t->atts->task_comm,
1919 sizeof(t->atts->task_comm));
1920 /*
1921 * Some kernel threads have no associated pid.
1922 * We still need to mark the entry as valid.
1923 */
1924 if (t->atts->pid)
1925 mapptr->valid = t->atts->pid;
1926 else
1927 mapptr->valid = 1;
1928
1929 t->count++;
1930 }
1931 }
1932
1933 /*
1934 *
1935 * Writes a cpumap for the given iops_list/cpu_count to the provided buffer.
1936 *
1937 * You may provide a buffer and size, or if you set the buffer to NULL, a
1938 * buffer of sufficient size will be allocated.
1939 *
1940 * If you provide a buffer and it is too small, sets cpumap_size to the number
1941 * of bytes required and returns EINVAL.
1942 *
1943 * On success, if you provided a buffer, cpumap_size is set to the number of
1944 * bytes written. If you did not provide a buffer, cpumap is set to the newly
1945 * allocated buffer and cpumap_size is set to the number of bytes allocated.
1946 *
1947 * NOTE: It may seem redundant to pass both iops and a cpu_count.
1948 *
1949 * We may be reporting data from "now", or from the "past".
1950 *
1951 * The "past" data would be for kdbg_readcpumap().
1952 *
1953 * If we do not pass both iops and cpu_count, and iops is NULL, this function
1954 * will need to read "now" state to get the number of cpus, which would be in
1955 * error if we were reporting "past" state.
1956 */
1957
1958 int
1959 kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count, uint8_t** cpumap, uint32_t* cpumap_size)
1960 {
1961 assert(cpumap);
1962 assert(cpumap_size);
1963 assert(cpu_count);
1964 assert(!iops || iops->cpu_id + 1 == cpu_count);
1965
1966 uint32_t bytes_needed = sizeof(kd_cpumap_header) + cpu_count * sizeof(kd_cpumap);
1967 uint32_t bytes_available = *cpumap_size;
1968 *cpumap_size = bytes_needed;
1969
1970 if (*cpumap == NULL) {
1971 if (kmem_alloc(kernel_map, (vm_offset_t*)cpumap, (vm_size_t)*cpumap_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
1972 return ENOMEM;
1973 }
1974 bzero(*cpumap, *cpumap_size);
1975 } else if (bytes_available < bytes_needed) {
1976 return EINVAL;
1977 }
1978
1979 kd_cpumap_header* header = (kd_cpumap_header*)(uintptr_t)*cpumap;
1980
1981 header->version_no = RAW_VERSION1;
1982 header->cpu_count = cpu_count;
1983
1984 kd_cpumap* cpus = (kd_cpumap*)&header[1];
1985
1986 int32_t index = cpu_count - 1;
1987 while (iops) {
1988 cpus[index].cpu_id = iops->cpu_id;
1989 cpus[index].flags = KDBG_CPUMAP_IS_IOP;
1990 strlcpy(cpus[index].name, iops->callback.iop_name, sizeof(cpus->name));
1991
1992 iops = iops->next;
1993 index--;
1994 }
1995
1996 while (index >= 0) {
1997 cpus[index].cpu_id = index;
1998 cpus[index].flags = 0;
1999 strlcpy(cpus[index].name, "AP", sizeof(cpus->name));
2000
2001 index--;
2002 }
2003
2004 return KERN_SUCCESS;
2005 }
2006
2007 void
2008 kdbg_thrmap_init(void)
2009 {
2010 lck_mtx_assert(ktrace_lock, LCK_MTX_ASSERT_OWNED);
2011
2012 if (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) {
2013 return;
2014 }
2015
2016 kd_mapptr = kdbg_thrmap_init_internal(0, &kd_mapsize, &kd_mapcount);
2017
2018 if (kd_mapptr) {
2019 kd_ctrl_page.kdebug_flags |= KDBG_MAPINIT;
2020 }
2021 }
2022
2023 static kd_threadmap *
2024 kdbg_thrmap_init_internal(unsigned int count, unsigned int *mapsize, unsigned int *mapcount)
2025 {
2026 kd_threadmap *mapptr;
2027 proc_t p;
2028 struct krt akrt;
2029 int tts_count = 0; /* number of task-to-string structures */
2030 struct tts *tts_mapptr;
2031 unsigned int tts_mapsize = 0;
2032 vm_offset_t kaddr;
2033
2034 assert(mapsize != NULL);
2035 assert(mapcount != NULL);
2036
2037 *mapcount = threads_count;
2038 tts_count = tasks_count;
2039
2040 /*
2041 * The proc count could change during buffer allocation,
2042 * so introduce a small fudge factor to bump up the
2043 * buffer sizes. This gives new tasks some chance of
2044 * making into the tables. Bump up by 25%.
2045 */
2046 *mapcount += *mapcount / 4;
2047 tts_count += tts_count / 4;
2048
2049 *mapsize = *mapcount * sizeof(kd_threadmap);
2050
2051 if (count && count < *mapcount) {
2052 return 0;
2053 }
2054
2055 if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)*mapsize, VM_KERN_MEMORY_DIAG) == KERN_SUCCESS)) {
2056 bzero((void *)kaddr, *mapsize);
2057 mapptr = (kd_threadmap *)kaddr;
2058 } else {
2059 return 0;
2060 }
2061
2062 tts_mapsize = tts_count * sizeof(struct tts);
2063
2064 if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)tts_mapsize, VM_KERN_MEMORY_DIAG) == KERN_SUCCESS)) {
2065 bzero((void *)kaddr, tts_mapsize);
2066 tts_mapptr = (struct tts *)kaddr;
2067 } else {
2068 kmem_free(kernel_map, (vm_offset_t)mapptr, *mapsize);
2069
2070 return 0;
2071 }
2072
2073 /*
2074 * Save the proc's name and take a reference for each task associated
2075 * with a valid process.
2076 */
2077 proc_list_lock();
2078
2079 int i = 0;
2080 ALLPROC_FOREACH(p) {
2081 if (i >= tts_count) {
2082 break;
2083 }
2084 if (p->p_lflag & P_LEXIT) {
2085 continue;
2086 }
2087 if (p->task) {
2088 task_reference(p->task);
2089 tts_mapptr[i].task = p->task;
2090 tts_mapptr[i].pid = p->p_pid;
2091 (void)strlcpy(tts_mapptr[i].task_comm, proc_best_name(p), sizeof(tts_mapptr[i].task_comm));
2092 i++;
2093 }
2094 }
2095 tts_count = i;
2096
2097 proc_list_unlock();
2098
2099 /*
2100 * Initialize thread map data
2101 */
2102 akrt.map = mapptr;
2103 akrt.count = 0;
2104 akrt.maxcount = *mapcount;
2105
2106 for (i = 0; i < tts_count; i++) {
2107 akrt.atts = &tts_mapptr[i];
2108 task_act_iterate_wth_args(tts_mapptr[i].task, kdbg_resolve_map, &akrt);
2109 task_deallocate((task_t)tts_mapptr[i].task);
2110 }
2111 kmem_free(kernel_map, (vm_offset_t)tts_mapptr, tts_mapsize);
2112
2113 *mapcount = akrt.count;
2114
2115 return mapptr;
2116 }
2117
2118 static void
2119 kdbg_clear(void)
2120 {
2121 /*
2122 * Clean up the trace buffer
2123 * First make sure we're not in
2124 * the middle of cutting a trace
2125 */
2126 kernel_debug_disable();
2127 kdbg_disable_typefilter();
2128
2129 /*
2130 * make sure the SLOW_NOLOG is seen
2131 * by everyone that might be trying
2132 * to cut a trace..
2133 */
2134 IOSleep(100);
2135
2136 /* reset kdebug state for each process */
2137 if (kd_ctrl_page.kdebug_flags & (KDBG_PIDCHECK | KDBG_PIDEXCLUDE)) {
2138 proc_list_lock();
2139 proc_t p;
2140 ALLPROC_FOREACH(p) {
2141 p->p_kdebug = 0;
2142 }
2143 proc_list_unlock();
2144 }
2145
2146 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2147 kd_ctrl_page.kdebug_flags &= ~(KDBG_NOWRAP | KDBG_RANGECHECK | KDBG_VALCHECK);
2148 kd_ctrl_page.kdebug_flags &= ~(KDBG_PIDCHECK | KDBG_PIDEXCLUDE);
2149
2150 kd_ctrl_page.oldest_time = 0;
2151
2152 delete_buffers();
2153 nkdbufs = 0;
2154
2155 /* Clean up the thread map buffer */
2156 kdbg_clear_thread_map();
2157
2158 RAW_file_offset = 0;
2159 RAW_file_written = 0;
2160 }
2161
2162 void
2163 kdebug_reset(void)
2164 {
2165 lck_mtx_assert(ktrace_lock, LCK_MTX_ASSERT_OWNED);
2166
2167 kdbg_lock_init();
2168
2169 kdbg_clear();
2170 if (kdbg_typefilter) {
2171 typefilter_reject_all(kdbg_typefilter);
2172 typefilter_allow_class(kdbg_typefilter, DBG_TRACE);
2173 }
2174 }
2175
2176 int
2177 kdbg_setpid(kd_regtype *kdr)
2178 {
2179 pid_t pid;
2180 int flag, ret=0;
2181 struct proc *p;
2182
2183 pid = (pid_t)kdr->value1;
2184 flag = (int)kdr->value2;
2185
2186 if (pid >= 0) {
2187 if ((p = proc_find(pid)) == NULL)
2188 ret = ESRCH;
2189 else {
2190 if (flag == 1) {
2191 /*
2192 * turn on pid check for this and all pids
2193 */
2194 kd_ctrl_page.kdebug_flags |= KDBG_PIDCHECK;
2195 kd_ctrl_page.kdebug_flags &= ~KDBG_PIDEXCLUDE;
2196 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2197
2198 p->p_kdebug = 1;
2199 } else {
2200 /*
2201 * turn off pid check for this pid value
2202 * Don't turn off all pid checking though
2203 *
2204 * kd_ctrl_page.kdebug_flags &= ~KDBG_PIDCHECK;
2205 */
2206 p->p_kdebug = 0;
2207 }
2208 proc_rele(p);
2209 }
2210 }
2211 else
2212 ret = EINVAL;
2213
2214 return(ret);
2215 }
2216
2217 /* This is for pid exclusion in the trace buffer */
2218 int
2219 kdbg_setpidex(kd_regtype *kdr)
2220 {
2221 pid_t pid;
2222 int flag, ret=0;
2223 struct proc *p;
2224
2225 pid = (pid_t)kdr->value1;
2226 flag = (int)kdr->value2;
2227
2228 if (pid >= 0) {
2229 if ((p = proc_find(pid)) == NULL)
2230 ret = ESRCH;
2231 else {
2232 if (flag == 1) {
2233 /*
2234 * turn on pid exclusion
2235 */
2236 kd_ctrl_page.kdebug_flags |= KDBG_PIDEXCLUDE;
2237 kd_ctrl_page.kdebug_flags &= ~KDBG_PIDCHECK;
2238 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2239
2240 p->p_kdebug = 1;
2241 }
2242 else {
2243 /*
2244 * turn off pid exclusion for this pid value
2245 * Don't turn off all pid exclusion though
2246 *
2247 * kd_ctrl_page.kdebug_flags &= ~KDBG_PIDEXCLUDE;
2248 */
2249 p->p_kdebug = 0;
2250 }
2251 proc_rele(p);
2252 }
2253 } else
2254 ret = EINVAL;
2255
2256 return(ret);
2257 }
2258
2259 /*
2260 * The following functions all operate on the "global" typefilter singleton.
2261 */
2262
2263 /*
2264 * The tf param is optional, you may pass either a valid typefilter or NULL.
2265 * If you pass a valid typefilter, you release ownership of that typefilter.
2266 */
2267 static int
2268 kdbg_initialize_typefilter(typefilter_t tf)
2269 {
2270 lck_mtx_assert(ktrace_lock, LCK_MTX_ASSERT_OWNED);
2271 assert(!kdbg_typefilter);
2272 assert(!kdbg_typefilter_memory_entry);
2273 typefilter_t deallocate_tf = NULL;
2274
2275 if (!tf && ((tf = deallocate_tf = typefilter_create()) == NULL)) {
2276 return ENOMEM;
2277 }
2278
2279 if ((kdbg_typefilter_memory_entry = typefilter_create_memory_entry(tf)) == MACH_PORT_NULL) {
2280 if (deallocate_tf) {
2281 typefilter_deallocate(deallocate_tf);
2282 }
2283 return ENOMEM;
2284 }
2285
2286 /*
2287 * The atomic store closes a race window with
2288 * the kdebug_typefilter syscall, which assumes
2289 * that any non-null kdbg_typefilter means a
2290 * valid memory_entry is available.
2291 */
2292 __c11_atomic_store(((_Atomic typefilter_t*)&kdbg_typefilter), tf, memory_order_release);
2293
2294 return KERN_SUCCESS;
2295 }
2296
2297 static int
2298 kdbg_copyin_typefilter(user_addr_t addr, size_t size)
2299 {
2300 int ret = ENOMEM;
2301 typefilter_t tf;
2302
2303 lck_mtx_assert(ktrace_lock, LCK_MTX_ASSERT_OWNED);
2304
2305 if (size != KDBG_TYPEFILTER_BITMAP_SIZE) {
2306 return EINVAL;
2307 }
2308
2309 if ((tf = typefilter_create())) {
2310 if ((ret = copyin(addr, tf, KDBG_TYPEFILTER_BITMAP_SIZE)) == 0) {
2311 /* The kernel typefilter must always allow DBG_TRACE */
2312 typefilter_allow_class(tf, DBG_TRACE);
2313
2314 /*
2315 * If this is the first typefilter; claim it.
2316 * Otherwise copy and deallocate.
2317 *
2318 * Allocating a typefilter for the copyin allows
2319 * the kernel to hold the invariant that DBG_TRACE
2320 * must always be allowed.
2321 */
2322 if (!kdbg_typefilter) {
2323 if ((ret = kdbg_initialize_typefilter(tf))) {
2324 return ret;
2325 }
2326 tf = NULL;
2327 } else {
2328 typefilter_copy(kdbg_typefilter, tf);
2329 }
2330
2331 kdbg_enable_typefilter();
2332 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_TYPEFILTER_CHANGED, kdbg_typefilter);
2333 }
2334
2335 if (tf)
2336 typefilter_deallocate(tf);
2337 }
2338
2339 return ret;
2340 }
2341
2342 /*
2343 * Enable the flags in the control page for the typefilter. Assumes that
2344 * kdbg_typefilter has already been allocated, so events being written
2345 * don't see a bad typefilter.
2346 */
2347 static void
2348 kdbg_enable_typefilter(void)
2349 {
2350 assert(kdbg_typefilter);
2351 kd_ctrl_page.kdebug_flags &= ~(KDBG_RANGECHECK | KDBG_VALCHECK);
2352 kd_ctrl_page.kdebug_flags |= KDBG_TYPEFILTER_CHECK;
2353 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2354 commpage_update_kdebug_state();
2355 }
2356
2357 /*
2358 * Disable the flags in the control page for the typefilter. The typefilter
2359 * may be safely deallocated shortly after this function returns.
2360 */
2361 static void
2362 kdbg_disable_typefilter(void)
2363 {
2364 kd_ctrl_page.kdebug_flags &= ~KDBG_TYPEFILTER_CHECK;
2365
2366 if ((kd_ctrl_page.kdebug_flags & (KDBG_PIDCHECK | KDBG_PIDEXCLUDE))) {
2367 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2368 } else {
2369 kdbg_set_flags(SLOW_CHECKS, 0, FALSE);
2370 }
2371 commpage_update_kdebug_state();
2372 }
2373
2374 uint32_t
2375 kdebug_commpage_state(void)
2376 {
2377 if (kdebug_enable) {
2378 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
2379 return KDEBUG_COMMPAGE_ENABLE_TYPEFILTER | KDEBUG_COMMPAGE_ENABLE_TRACE;
2380 }
2381
2382 return KDEBUG_COMMPAGE_ENABLE_TRACE;
2383 }
2384
2385 return 0;
2386 }
2387
2388 int
2389 kdbg_setreg(kd_regtype * kdr)
2390 {
2391 int ret=0;
2392 unsigned int val_1, val_2, val;
2393 switch (kdr->type) {
2394
2395 case KDBG_CLASSTYPE :
2396 val_1 = (kdr->value1 & 0xff);
2397 val_2 = (kdr->value2 & 0xff);
2398 kdlog_beg = (val_1<<24);
2399 kdlog_end = (val_2<<24);
2400 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2401 kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK; /* Turn off specific value check */
2402 kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_CLASSTYPE);
2403 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2404 break;
2405 case KDBG_SUBCLSTYPE :
2406 val_1 = (kdr->value1 & 0xff);
2407 val_2 = (kdr->value2 & 0xff);
2408 val = val_2 + 1;
2409 kdlog_beg = ((val_1<<24) | (val_2 << 16));
2410 kdlog_end = ((val_1<<24) | (val << 16));
2411 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2412 kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK; /* Turn off specific value check */
2413 kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_SUBCLSTYPE);
2414 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2415 break;
2416 case KDBG_RANGETYPE :
2417 kdlog_beg = (kdr->value1);
2418 kdlog_end = (kdr->value2);
2419 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2420 kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK; /* Turn off specific value check */
2421 kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_RANGETYPE);
2422 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2423 break;
2424 case KDBG_VALCHECK:
2425 kdlog_value1 = (kdr->value1);
2426 kdlog_value2 = (kdr->value2);
2427 kdlog_value3 = (kdr->value3);
2428 kdlog_value4 = (kdr->value4);
2429 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2430 kd_ctrl_page.kdebug_flags &= ~KDBG_RANGECHECK; /* Turn off range check */
2431 kd_ctrl_page.kdebug_flags |= KDBG_VALCHECK; /* Turn on specific value check */
2432 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2433 break;
2434 case KDBG_TYPENONE :
2435 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2436
2437 if ( (kd_ctrl_page.kdebug_flags & (KDBG_RANGECHECK | KDBG_VALCHECK |
2438 KDBG_PIDCHECK | KDBG_PIDEXCLUDE |
2439 KDBG_TYPEFILTER_CHECK)) )
2440 kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
2441 else
2442 kdbg_set_flags(SLOW_CHECKS, 0, FALSE);
2443
2444 kdlog_beg = 0;
2445 kdlog_end = 0;
2446 break;
2447 default :
2448 ret = EINVAL;
2449 break;
2450 }
2451 return(ret);
2452 }
2453
2454 static int
2455 kdbg_write_to_vnode(caddr_t buffer, size_t size, vnode_t vp, vfs_context_t ctx, off_t file_offset)
2456 {
2457 return vn_rdwr(UIO_WRITE, vp, buffer, size, file_offset, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT,
2458 vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
2459 }
2460
2461 int
2462 kdbg_write_v3_chunk_header(user_addr_t buffer, uint32_t tag, uint32_t sub_tag, uint64_t length, vnode_t vp, vfs_context_t ctx)
2463 {
2464 int ret = KERN_SUCCESS;
2465 kd_chunk_header_v3 header = {
2466 .tag = tag,
2467 .sub_tag = sub_tag,
2468 .length = length,
2469 };
2470
2471 // Check that only one of them is valid
2472 assert(!buffer ^ !vp);
2473 assert((vp == NULL) || (ctx != NULL));
2474
2475 // Write the 8-byte future_chunk_timestamp field in the payload
2476 if (buffer || vp) {
2477 if (vp) {
2478 ret = kdbg_write_to_vnode((caddr_t)&header, sizeof(kd_chunk_header_v3), vp, ctx, RAW_file_offset);
2479 if (ret) {
2480 goto write_error;
2481 }
2482 RAW_file_offset += (sizeof(kd_chunk_header_v3));
2483 }
2484 else {
2485 ret = copyout(&header, buffer, sizeof(kd_chunk_header_v3));
2486 if (ret) {
2487 goto write_error;
2488 }
2489 }
2490 }
2491 write_error:
2492 return ret;
2493 }
2494
2495 int
2496 kdbg_write_v3_chunk_header_to_buffer(void * buffer, uint32_t tag, uint32_t sub_tag, uint64_t length)
2497 {
2498 kd_chunk_header_v3 header = {
2499 .tag = tag,
2500 .sub_tag = sub_tag,
2501 .length = length,
2502 };
2503
2504 if (!buffer) {
2505 return 0;
2506 }
2507
2508 memcpy(buffer, &header, sizeof(kd_chunk_header_v3));
2509
2510 return (sizeof(kd_chunk_header_v3));
2511 }
2512
2513 int
2514 kdbg_write_v3_chunk_to_fd(uint32_t tag, uint32_t sub_tag, uint64_t length, void *payload, uint64_t payload_size, int fd)
2515 {
2516 proc_t p;
2517 struct vfs_context context;
2518 struct fileproc *fp;
2519 vnode_t vp;
2520 p = current_proc();
2521
2522 proc_fdlock(p);
2523 if ( (fp_lookup(p, fd, &fp, 1)) ) {
2524 proc_fdunlock(p);
2525 return EFAULT;
2526 }
2527
2528 context.vc_thread = current_thread();
2529 context.vc_ucred = fp->f_fglob->fg_cred;
2530
2531 if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
2532 fp_drop(p, fd, fp, 1);
2533 proc_fdunlock(p);
2534 return EBADF;
2535 }
2536 vp = (struct vnode *) fp->f_fglob->fg_data;
2537 proc_fdunlock(p);
2538
2539 if ( (vnode_getwithref(vp)) == 0 ) {
2540 RAW_file_offset = fp->f_fglob->fg_offset;
2541
2542 kd_chunk_header_v3 chunk_header = {
2543 .tag = tag,
2544 .sub_tag = sub_tag,
2545 .length = length,
2546 };
2547
2548 int ret = kdbg_write_to_vnode((caddr_t) &chunk_header, sizeof(kd_chunk_header_v3), vp, &context, RAW_file_offset);
2549 if (!ret) {
2550 RAW_file_offset += sizeof(kd_chunk_header_v3);
2551 }
2552
2553 ret = kdbg_write_to_vnode((caddr_t) payload, (size_t) payload_size, vp, &context, RAW_file_offset);
2554 if (!ret) {
2555 RAW_file_offset += payload_size;
2556 }
2557
2558 fp->f_fglob->fg_offset = RAW_file_offset;
2559 vnode_put(vp);
2560 }
2561
2562 fp_drop(p, fd, fp, 0);
2563 return KERN_SUCCESS;
2564 }
2565
2566 user_addr_t
2567 kdbg_write_v3_event_chunk_header(user_addr_t buffer, uint32_t tag, uint64_t length, vnode_t vp, vfs_context_t ctx)
2568 {
2569 uint64_t future_chunk_timestamp = 0;
2570 length += sizeof(uint64_t);
2571
2572 if (kdbg_write_v3_chunk_header(buffer, tag, V3_EVENT_DATA_VERSION, length, vp, ctx)) {
2573 return 0;
2574 }
2575 if (buffer) {
2576 buffer += sizeof(kd_chunk_header_v3);
2577 }
2578
2579 // Check that only one of them is valid
2580 assert(!buffer ^ !vp);
2581 assert((vp == NULL) || (ctx != NULL));
2582
2583 // Write the 8-byte future_chunk_timestamp field in the payload
2584 if (buffer || vp) {
2585 if (vp) {
2586 int ret = kdbg_write_to_vnode((caddr_t)&future_chunk_timestamp, sizeof(uint64_t), vp, ctx, RAW_file_offset);
2587 if (!ret) {
2588 RAW_file_offset += (sizeof(uint64_t));
2589 }
2590 }
2591 else {
2592 if (copyout(&future_chunk_timestamp, buffer, sizeof(uint64_t))) {
2593 return 0;
2594 }
2595 }
2596 }
2597
2598 return (buffer + sizeof(uint64_t));
2599 }
2600
2601 int
2602 kdbg_write_v3_header(user_addr_t user_header, size_t *user_header_size, int fd)
2603 {
2604 int ret = KERN_SUCCESS;
2605
2606 uint8_t* cpumap = 0;
2607 uint32_t cpumap_size = 0;
2608 uint32_t thrmap_size = 0;
2609
2610 size_t bytes_needed = 0;
2611
2612 // Check that only one of them is valid
2613 assert(!user_header ^ !fd);
2614 assert(user_header_size);
2615
2616 if ( !(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) ) {
2617 ret = EINVAL;
2618 goto bail;
2619 }
2620
2621 if ( !(user_header || fd) ) {
2622 ret = EINVAL;
2623 goto bail;
2624 }
2625
2626 // Initialize the cpu map
2627 ret = kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, &cpumap, &cpumap_size);
2628 if (ret != KERN_SUCCESS) {
2629 goto bail;
2630 }
2631
2632 // Check if a thread map is initialized
2633 if ( !kd_mapptr ) {
2634 ret = EINVAL;
2635 goto bail;
2636 }
2637 thrmap_size = kd_mapcount * sizeof(kd_threadmap);
2638
2639 mach_timebase_info_data_t timebase = {0, 0};
2640 clock_timebase_info(&timebase);
2641
2642 // Setup the header.
2643 // See v3 header description in sys/kdebug.h for more inforamtion.
2644 kd_header_v3 header = {
2645 .tag = RAW_VERSION3,
2646 .sub_tag = V3_HEADER_VERSION,
2647 .length = (sizeof(kd_header_v3) + cpumap_size - sizeof(kd_cpumap_header)),
2648 .timebase_numer = timebase.numer,
2649 .timebase_denom = timebase.denom,
2650 .timestamp = 0, /* FIXME rdar://problem/22053009 */
2651 .walltime_secs = 0,
2652 .walltime_usecs = 0,
2653 .timezone_minuteswest = 0,
2654 .timezone_dst = 0,
2655 #if defined(__LP64__)
2656 .flags = 1,
2657 #else
2658 .flags = 0,
2659 #endif
2660 };
2661
2662 // If its a buffer, check if we have enough space to copy the header and the maps.
2663 if (user_header) {
2664 bytes_needed = header.length + thrmap_size + (2 * sizeof(kd_chunk_header_v3));
2665 if (*user_header_size < bytes_needed) {
2666 ret = EINVAL;
2667 goto bail;
2668 }
2669 }
2670
2671 // Start writing the header
2672 if (fd) {
2673 void *hdr_ptr = (void *)(((uintptr_t) &header) + sizeof(kd_chunk_header_v3));
2674 size_t payload_size = (sizeof(kd_header_v3) - sizeof(kd_chunk_header_v3));
2675
2676 ret = kdbg_write_v3_chunk_to_fd(RAW_VERSION3, V3_HEADER_VERSION, header.length, hdr_ptr, payload_size, fd);
2677 if (ret) {
2678 goto bail;
2679 }
2680 }
2681 else {
2682 if (copyout(&header, user_header, sizeof(kd_header_v3))) {
2683 ret = EFAULT;
2684 goto bail;
2685 }
2686 // Update the user pointer
2687 user_header += sizeof(kd_header_v3);
2688 }
2689
2690 // Write a cpu map. This is a sub chunk of the header
2691 cpumap = (uint8_t*)((uintptr_t) cpumap + sizeof(kd_cpumap_header));
2692 size_t payload_size = (size_t)(cpumap_size - sizeof(kd_cpumap_header));
2693 if (fd) {
2694 ret = kdbg_write_v3_chunk_to_fd(V3_CPU_MAP, V3_CPUMAP_VERSION, payload_size, (void *)cpumap, payload_size, fd);
2695 if (ret) {
2696 goto bail;
2697 }
2698 }
2699 else {
2700 ret = kdbg_write_v3_chunk_header(user_header, V3_CPU_MAP, V3_CPUMAP_VERSION, payload_size, NULL, NULL);
2701 if (ret) {
2702 goto bail;
2703 }
2704 user_header += sizeof(kd_chunk_header_v3);
2705 if (copyout(cpumap, user_header, payload_size)) {
2706 ret = EFAULT;
2707 goto bail;
2708 }
2709 // Update the user pointer
2710 user_header += payload_size;
2711 }
2712
2713 // Write a thread map
2714 if (fd) {
2715 ret = kdbg_write_v3_chunk_to_fd(V3_THREAD_MAP, V3_THRMAP_VERSION, thrmap_size, (void *)kd_mapptr, thrmap_size, fd);
2716 if (ret) {
2717 goto bail;
2718 }
2719 }
2720 else {
2721 ret = kdbg_write_v3_chunk_header(user_header, V3_THREAD_MAP, V3_THRMAP_VERSION, thrmap_size, NULL, NULL);
2722 if (ret) {
2723 goto bail;
2724 }
2725 user_header += sizeof(kd_chunk_header_v3);
2726 if (copyout(kd_mapptr, user_header, thrmap_size)) {
2727 ret = EFAULT;
2728 goto bail;
2729 }
2730 user_header += thrmap_size;
2731 }
2732
2733 if (fd) {
2734 RAW_file_written += bytes_needed;
2735 }
2736
2737 *user_header_size = bytes_needed;
2738 bail:
2739 if (cpumap) {
2740 kmem_free(kernel_map, (vm_offset_t)cpumap, cpumap_size);
2741 }
2742 return (ret);
2743 }
2744
2745 int
2746 kdbg_readcpumap(user_addr_t user_cpumap, size_t *user_cpumap_size)
2747 {
2748 uint8_t* cpumap = NULL;
2749 uint32_t cpumap_size = 0;
2750 int ret = KERN_SUCCESS;
2751
2752 if (kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) {
2753 if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, &cpumap, &cpumap_size) == KERN_SUCCESS) {
2754 if (user_cpumap) {
2755 size_t bytes_to_copy = (*user_cpumap_size >= cpumap_size) ? cpumap_size : *user_cpumap_size;
2756 if (copyout(cpumap, user_cpumap, (size_t)bytes_to_copy)) {
2757 ret = EFAULT;
2758 }
2759 }
2760 *user_cpumap_size = cpumap_size;
2761 kmem_free(kernel_map, (vm_offset_t)cpumap, cpumap_size);
2762 } else
2763 ret = EINVAL;
2764 } else
2765 ret = EINVAL;
2766
2767 return (ret);
2768 }
2769
2770 int
2771 kdbg_readcurthrmap(user_addr_t buffer, size_t *bufsize)
2772 {
2773 kd_threadmap *mapptr;
2774 unsigned int mapsize;
2775 unsigned int mapcount;
2776 unsigned int count = 0;
2777 int ret = 0;
2778
2779 count = *bufsize/sizeof(kd_threadmap);
2780 *bufsize = 0;
2781
2782 if ( (mapptr = kdbg_thrmap_init_internal(count, &mapsize, &mapcount)) ) {
2783 if (copyout(mapptr, buffer, mapcount * sizeof(kd_threadmap)))
2784 ret = EFAULT;
2785 else
2786 *bufsize = (mapcount * sizeof(kd_threadmap));
2787
2788 kmem_free(kernel_map, (vm_offset_t)mapptr, mapsize);
2789 } else
2790 ret = EINVAL;
2791
2792 return (ret);
2793 }
2794
2795 static int
2796 kdbg_write_v1_header(boolean_t write_thread_map, vnode_t vp, vfs_context_t ctx)
2797 {
2798 int ret = 0;
2799 RAW_header header;
2800 clock_sec_t secs;
2801 clock_usec_t usecs;
2802 char *pad_buf;
2803 uint32_t pad_size;
2804 uint32_t extra_thread_count = 0;
2805 uint32_t cpumap_size;
2806 size_t map_size = 0;
2807 size_t map_count = 0;
2808
2809 if (write_thread_map) {
2810 assert(kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
2811 map_count = kd_mapcount;
2812 map_size = map_count * sizeof(kd_threadmap);
2813 }
2814
2815 /*
2816 * Without the buffers initialized, we cannot construct a CPU map or a
2817 * thread map, and cannot write a header.
2818 */
2819 if (!(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT)) {
2820 return EINVAL;
2821 }
2822
2823 /*
2824 * To write a RAW_VERSION1+ file, we must embed a cpumap in the
2825 * "padding" used to page align the events following the threadmap. If
2826 * the threadmap happens to not require enough padding, we artificially
2827 * increase its footprint until it needs enough padding.
2828 */
2829
2830 assert(vp);
2831 assert(ctx);
2832
2833 pad_size = PAGE_16KB - ((sizeof(RAW_header) + map_size) & PAGE_MASK_64);
2834 cpumap_size = sizeof(kd_cpumap_header) + kd_ctrl_page.kdebug_cpus * sizeof(kd_cpumap);
2835
2836 if (cpumap_size > pad_size) {
2837 /* If the cpu map doesn't fit in the current available pad_size,
2838 * we increase the pad_size by 16K. We do this so that the event
2839 * data is always available on a page aligned boundary for both
2840 * 4k and 16k systems. We enforce this alignment for the event
2841 * data so that we can take advantage of optimized file/disk writes.
2842 */
2843 pad_size += PAGE_16KB;
2844 }
2845
2846 /* The way we are silently embedding a cpumap in the "padding" is by artificially
2847 * increasing the number of thread entries. However, we'll also need to ensure that
2848 * the cpumap is embedded in the last 4K page before when the event data is expected.
2849 * This way the tools can read the data starting the next page boundary on both
2850 * 4K and 16K systems preserving compatibility with older versions of the tools
2851 */
2852 if (pad_size > PAGE_4KB) {
2853 pad_size -= PAGE_4KB;
2854 extra_thread_count = (pad_size / sizeof(kd_threadmap)) + 1;
2855 }
2856
2857 memset(&header, 0, sizeof(header));
2858 header.version_no = RAW_VERSION1;
2859 header.thread_count = map_count + extra_thread_count;
2860
2861 clock_get_calendar_microtime(&secs, &usecs);
2862 header.TOD_secs = secs;
2863 header.TOD_usecs = usecs;
2864
2865 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)&header, sizeof(RAW_header), RAW_file_offset,
2866 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
2867 if (ret) {
2868 goto write_error;
2869 }
2870 RAW_file_offset += sizeof(RAW_header);
2871 RAW_file_written += sizeof(RAW_header);
2872
2873 if (write_thread_map) {
2874 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)kd_mapptr, map_size, RAW_file_offset,
2875 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
2876 if (ret) {
2877 goto write_error;
2878 }
2879
2880 RAW_file_offset += map_size;
2881 RAW_file_written += map_size;
2882 }
2883
2884 if (extra_thread_count) {
2885 pad_size = extra_thread_count * sizeof(kd_threadmap);
2886 pad_buf = kalloc(pad_size);
2887 if (!pad_buf) {
2888 ret = ENOMEM;
2889 goto write_error;
2890 }
2891 memset(pad_buf, 0, pad_size);
2892
2893 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset,
2894 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
2895 kfree(pad_buf, pad_size);
2896 if (ret) {
2897 goto write_error;
2898 }
2899
2900 RAW_file_offset += pad_size;
2901 RAW_file_written += pad_size;
2902 }
2903
2904 pad_size = PAGE_SIZE - (RAW_file_offset & PAGE_MASK_64);
2905 if (pad_size) {
2906 pad_buf = (char *)kalloc(pad_size);
2907 if (!pad_buf) {
2908 ret = ENOMEM;
2909 goto write_error;
2910 }
2911 memset(pad_buf, 0, pad_size);
2912
2913 /*
2914 * embed a cpumap in the padding bytes.
2915 * older code will skip this.
2916 * newer code will know how to read it.
2917 */
2918 uint32_t temp = pad_size;
2919 if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, (uint8_t**)&pad_buf, &temp) != KERN_SUCCESS) {
2920 memset(pad_buf, 0, pad_size);
2921 }
2922
2923 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset,
2924 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
2925 kfree(pad_buf, pad_size);
2926 if (ret) {
2927 goto write_error;
2928 }
2929
2930 RAW_file_offset += pad_size;
2931 RAW_file_written += pad_size;
2932 }
2933
2934 write_error:
2935 return ret;
2936 }
2937
2938 static void
2939 kdbg_clear_thread_map(void)
2940 {
2941 lck_mtx_assert(ktrace_lock, LCK_MTX_ASSERT_OWNED);
2942
2943 if (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) {
2944 assert(kd_mapptr != NULL);
2945 kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize);
2946 kd_mapptr = NULL;
2947 kd_mapsize = 0;
2948 kd_mapcount = 0;
2949 kd_ctrl_page.kdebug_flags &= ~KDBG_MAPINIT;
2950 }
2951 }
2952
2953 /*
2954 * Write out a version 1 header and the thread map, if it is initialized, to a
2955 * vnode. Used by KDWRITEMAP and kdbg_dump_trace_to_file.
2956 *
2957 * Returns write errors from vn_rdwr if a write fails. Returns ENODATA if the
2958 * thread map has not been initialized, but the header will still be written.
2959 * Returns ENOMEM if padding could not be allocated. Returns 0 otherwise.
2960 */
2961 static int
2962 kdbg_write_thread_map(vnode_t vp, vfs_context_t ctx)
2963 {
2964 int ret = 0;
2965 boolean_t map_initialized;
2966
2967 lck_mtx_assert(ktrace_lock, LCK_MTX_ASSERT_OWNED);
2968 assert(ctx != NULL);
2969
2970 map_initialized = (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
2971
2972 ret = kdbg_write_v1_header(map_initialized, vp, ctx);
2973 if (ret == 0) {
2974 if (map_initialized) {
2975 kdbg_clear_thread_map();
2976 } else {
2977 ret = ENODATA;
2978 }
2979 }
2980
2981 return ret;
2982 }
2983
2984 /*
2985 * Copy out the thread map to a user space buffer. Used by KDTHRMAP.
2986 *
2987 * Returns copyout errors if the copyout fails. Returns ENODATA if the thread
2988 * map has not been initialized. Returns EINVAL if the buffer provided is not
2989 * large enough for the entire thread map. Returns 0 otherwise.
2990 */
2991 static int
2992 kdbg_copyout_thread_map(user_addr_t buffer, size_t *buffer_size)
2993 {
2994 boolean_t map_initialized;
2995 size_t map_size;
2996 int ret = 0;
2997
2998 lck_mtx_assert(ktrace_lock, LCK_MTX_ASSERT_OWNED);
2999 assert(buffer_size != NULL);
3000
3001 map_initialized = (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
3002 if (!map_initialized) {
3003 return ENODATA;
3004 }
3005
3006 map_size = kd_mapcount * sizeof(kd_threadmap);
3007 if (*buffer_size < map_size) {
3008 return EINVAL;
3009 }
3010
3011 ret = copyout(kd_mapptr, buffer, map_size);
3012 if (ret == 0) {
3013 kdbg_clear_thread_map();
3014 }
3015
3016 return ret;
3017 }
3018
3019 int
3020 kdbg_readthrmap_v3(user_addr_t buffer, size_t buffer_size, int fd)
3021 {
3022 int ret = 0;
3023 boolean_t map_initialized;
3024 size_t map_size;
3025
3026 lck_mtx_assert(ktrace_lock, LCK_MTX_ASSERT_OWNED);
3027
3028 if ((!fd && !buffer) || (fd && buffer)) {
3029 return EINVAL;
3030 }
3031
3032 map_initialized = (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
3033 map_size = kd_mapcount * sizeof(kd_threadmap);
3034
3035 if (map_initialized && (buffer_size >= map_size))
3036 {
3037 ret = kdbg_write_v3_header(buffer, &buffer_size, fd);
3038
3039 if (ret == 0) {
3040 kdbg_clear_thread_map();
3041 }
3042 } else {
3043 ret = EINVAL;
3044 }
3045
3046 return ret;
3047 }
3048
3049 static void
3050 kdbg_set_nkdbufs(unsigned int value)
3051 {
3052 /*
3053 * We allow a maximum buffer size of 50% of either ram or max mapped
3054 * address, whichever is smaller 'value' is the desired number of trace
3055 * entries
3056 */
3057 unsigned int max_entries = (sane_size / 2) / sizeof(kd_buf);
3058
3059 if (value <= max_entries) {
3060 nkdbufs = value;
3061 } else {
3062 nkdbufs = max_entries;
3063 }
3064 }
3065
3066 /*
3067 * Block until there are `n_storage_threshold` storage units filled with
3068 * events or `timeout_ms` milliseconds have passed. If `locked_wait` is true,
3069 * `ktrace_lock` is held while waiting. This is necessary while waiting to
3070 * write events out of the buffers.
3071 *
3072 * Returns true if the threshold was reached and false otherwise.
3073 *
3074 * Called with `ktrace_lock` locked and interrupts enabled.
3075 */
3076 static boolean_t
3077 kdbg_wait(uint64_t timeout_ms, boolean_t locked_wait)
3078 {
3079 int wait_result = THREAD_AWAKENED;
3080 uint64_t abstime = 0;
3081
3082 if (timeout_ms != 0) {
3083 uint64_t ns = timeout_ms * NSEC_PER_MSEC;
3084 nanoseconds_to_absolutetime(ns, &abstime);
3085 clock_absolutetime_interval_to_deadline(abstime, &abstime);
3086 }
3087
3088 boolean_t s = ml_set_interrupts_enabled(FALSE);
3089 if (!s) {
3090 panic("kdbg_wait() called with interrupts disabled");
3091 }
3092 lck_spin_lock(kdw_spin_lock);
3093
3094 if (!locked_wait) {
3095 /* drop the mutex to allow others to access trace */
3096 lck_mtx_unlock(ktrace_lock);
3097 }
3098
3099 while (wait_result == THREAD_AWAKENED &&
3100 kd_ctrl_page.kds_inuse_count < n_storage_threshold)
3101 {
3102 kds_waiter = 1;
3103
3104 if (abstime) {
3105 wait_result = lck_spin_sleep_deadline(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE, abstime);
3106 } else {
3107 wait_result = lck_spin_sleep(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE);
3108 }
3109
3110 kds_waiter = 0;
3111 }
3112
3113 /* check the count under the spinlock */
3114 boolean_t threshold_exceeded = (kd_ctrl_page.kds_inuse_count >= n_storage_threshold);
3115
3116 lck_spin_unlock(kdw_spin_lock);
3117 ml_set_interrupts_enabled(s);
3118
3119 if (!locked_wait) {
3120 /* pick the mutex back up again */
3121 lck_mtx_lock(ktrace_lock);
3122 }
3123
3124 /* write out whether we've exceeded the threshold */
3125 return threshold_exceeded;
3126 }
3127
3128 /*
3129 * Wakeup a thread waiting using `kdbg_wait` if there are at least
3130 * `n_storage_threshold` storage units in use.
3131 */
3132 static void
3133 kdbg_wakeup(void)
3134 {
3135 boolean_t need_kds_wakeup = FALSE;
3136
3137 /*
3138 * Try to take the lock here to synchronize with the waiter entering
3139 * the blocked state. Use the try mode to prevent deadlocks caused by
3140 * re-entering this routine due to various trace points triggered in the
3141 * lck_spin_sleep_xxxx routines used to actually enter one of our 2 wait
3142 * conditions. No problem if we fail, there will be lots of additional
3143 * events coming in that will eventually succeed in grabbing this lock.
3144 */
3145 boolean_t s = ml_set_interrupts_enabled(FALSE);
3146
3147 if (lck_spin_try_lock(kdw_spin_lock)) {
3148 if (kds_waiter &&
3149 (kd_ctrl_page.kds_inuse_count >= n_storage_threshold))
3150 {
3151 kds_waiter = 0;
3152 need_kds_wakeup = TRUE;
3153 }
3154 lck_spin_unlock(kdw_spin_lock);
3155 }
3156
3157 ml_set_interrupts_enabled(s);
3158
3159 if (need_kds_wakeup == TRUE) {
3160 wakeup(&kds_waiter);
3161 }
3162 }
3163
3164 int
3165 kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
3166 {
3167 int ret = 0;
3168 size_t size = *sizep;
3169 unsigned int value = 0;
3170 kd_regtype kd_Reg;
3171 kbufinfo_t kd_bufinfo;
3172 proc_t p;
3173
3174 if (name[0] == KERN_KDWRITETR ||
3175 name[0] == KERN_KDWRITETR_V3 ||
3176 name[0] == KERN_KDWRITEMAP ||
3177 name[0] == KERN_KDWRITEMAP_V3 ||
3178 name[0] == KERN_KDEFLAGS ||
3179 name[0] == KERN_KDDFLAGS ||
3180 name[0] == KERN_KDENABLE ||
3181 name[0] == KERN_KDSETBUF)
3182 {
3183 if (namelen < 2) {
3184 return EINVAL;
3185 }
3186 value = name[1];
3187 }
3188
3189 kdbg_lock_init();
3190 assert(kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT);
3191
3192 lck_mtx_lock(ktrace_lock);
3193
3194 /*
3195 * Some requests only require "read" access to kdebug trace. Regardless,
3196 * tell ktrace that a configuration or read is occurring (and see if it's
3197 * allowed).
3198 */
3199 if (name[0] != KERN_KDGETBUF &&
3200 name[0] != KERN_KDGETREG &&
3201 name[0] != KERN_KDREADCURTHRMAP)
3202 {
3203 if ((ret = ktrace_configure(KTRACE_KDEBUG))) {
3204 goto out;
3205 }
3206 } else {
3207 if ((ret = ktrace_read_check())) {
3208 goto out;
3209 }
3210 }
3211
3212 switch(name[0]) {
3213 case KERN_KDGETBUF:
3214 if (size < sizeof(kd_bufinfo.nkdbufs)) {
3215 /*
3216 * There is not enough room to return even
3217 * the first element of the info structure.
3218 */
3219 ret = EINVAL;
3220 break;
3221 }
3222
3223 memset(&kd_bufinfo, 0, sizeof(kd_bufinfo));
3224
3225 kd_bufinfo.nkdbufs = nkdbufs;
3226 kd_bufinfo.nkdthreads = kd_mapcount;
3227
3228 if ( (kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) )
3229 kd_bufinfo.nolog = 1;
3230 else
3231 kd_bufinfo.nolog = 0;
3232
3233 kd_bufinfo.flags = kd_ctrl_page.kdebug_flags;
3234 #if defined(__LP64__)
3235 kd_bufinfo.flags |= KDBG_LP64;
3236 #endif
3237 {
3238 int pid = ktrace_get_owning_pid();
3239 kd_bufinfo.bufid = (pid == 0 ? -1 : pid);
3240 }
3241
3242 if (size >= sizeof(kd_bufinfo)) {
3243 /*
3244 * Provide all the info we have
3245 */
3246 if (copyout(&kd_bufinfo, where, sizeof(kd_bufinfo)))
3247 ret = EINVAL;
3248 } else {
3249 /*
3250 * For backwards compatibility, only provide
3251 * as much info as there is room for.
3252 */
3253 if (copyout(&kd_bufinfo, where, size))
3254 ret = EINVAL;
3255 }
3256 break;
3257
3258 case KERN_KDREADCURTHRMAP:
3259 ret = kdbg_readcurthrmap(where, sizep);
3260 break;
3261
3262 case KERN_KDEFLAGS:
3263 value &= KDBG_USERFLAGS;
3264 kd_ctrl_page.kdebug_flags |= value;
3265 break;
3266
3267 case KERN_KDDFLAGS:
3268 value &= KDBG_USERFLAGS;
3269 kd_ctrl_page.kdebug_flags &= ~value;
3270 break;
3271
3272 case KERN_KDENABLE:
3273 /*
3274 * Enable tracing mechanism. Two types:
3275 * KDEBUG_TRACE is the standard one,
3276 * and KDEBUG_PPT which is a carefully
3277 * chosen subset to avoid performance impact.
3278 */
3279 if (value) {
3280 /*
3281 * enable only if buffer is initialized
3282 */
3283 if (!(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) ||
3284 !(value == KDEBUG_ENABLE_TRACE || value == KDEBUG_ENABLE_PPT)) {
3285 ret = EINVAL;
3286 break;
3287 }
3288 kdbg_thrmap_init();
3289
3290 kdbg_set_tracing_enabled(TRUE, value);
3291 }
3292 else
3293 {
3294 if (!kdebug_enable) {
3295 break;
3296 }
3297
3298 kernel_debug_disable();
3299 }
3300 break;
3301
3302 case KERN_KDSETBUF:
3303 kdbg_set_nkdbufs(value);
3304 break;
3305
3306 case KERN_KDSETUP:
3307 ret = kdbg_reinit(FALSE);
3308 break;
3309
3310 case KERN_KDREMOVE:
3311 ktrace_reset(KTRACE_KDEBUG);
3312 break;
3313
3314 case KERN_KDSETREG:
3315 if(size < sizeof(kd_regtype)) {
3316 ret = EINVAL;
3317 break;
3318 }
3319 if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
3320 ret = EINVAL;
3321 break;
3322 }
3323
3324 ret = kdbg_setreg(&kd_Reg);
3325 break;
3326
3327 case KERN_KDGETREG:
3328 ret = EINVAL;
3329 break;
3330
3331 case KERN_KDREADTR:
3332 ret = kdbg_read(where, sizep, NULL, NULL, RAW_VERSION1);
3333 break;
3334
3335 case KERN_KDWRITETR:
3336 case KERN_KDWRITETR_V3:
3337 case KERN_KDWRITEMAP:
3338 case KERN_KDWRITEMAP_V3:
3339 {
3340 struct vfs_context context;
3341 struct fileproc *fp;
3342 size_t number;
3343 vnode_t vp;
3344 int fd;
3345
3346 if (name[0] == KERN_KDWRITETR || name[0] == KERN_KDWRITETR_V3) {
3347 (void)kdbg_wait(size, TRUE);
3348 }
3349 p = current_proc();
3350 fd = value;
3351
3352 proc_fdlock(p);
3353 if ( (ret = fp_lookup(p, fd, &fp, 1)) ) {
3354 proc_fdunlock(p);
3355 break;
3356 }
3357 context.vc_thread = current_thread();
3358 context.vc_ucred = fp->f_fglob->fg_cred;
3359
3360 if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
3361 fp_drop(p, fd, fp, 1);
3362 proc_fdunlock(p);
3363
3364 ret = EBADF;
3365 break;
3366 }
3367 vp = (struct vnode *)fp->f_fglob->fg_data;
3368 proc_fdunlock(p);
3369
3370 if ((ret = vnode_getwithref(vp)) == 0) {
3371 RAW_file_offset = fp->f_fglob->fg_offset;
3372 if (name[0] == KERN_KDWRITETR || name[0] == KERN_KDWRITETR_V3) {
3373 number = nkdbufs * sizeof(kd_buf);
3374
3375 KERNEL_DEBUG_CONSTANT(TRACE_WRITING_EVENTS | DBG_FUNC_START, 0, 0, 0, 0, 0);
3376 if (name[0] == KERN_KDWRITETR_V3)
3377 ret = kdbg_read(0, &number, vp, &context, RAW_VERSION3);
3378 else
3379 ret = kdbg_read(0, &number, vp, &context, RAW_VERSION1);
3380 KERNEL_DEBUG_CONSTANT(TRACE_WRITING_EVENTS | DBG_FUNC_END, number, 0, 0, 0, 0);
3381
3382 *sizep = number;
3383 } else {
3384 number = kd_mapcount * sizeof(kd_threadmap);
3385 if (name[0] == KERN_KDWRITEMAP_V3) {
3386 ret = kdbg_readthrmap_v3(0, number, fd);
3387 } else {
3388 ret = kdbg_write_thread_map(vp, &context);
3389 }
3390 }
3391 fp->f_fglob->fg_offset = RAW_file_offset;
3392 vnode_put(vp);
3393 }
3394 fp_drop(p, fd, fp, 0);
3395
3396 break;
3397 }
3398 case KERN_KDBUFWAIT:
3399 *sizep = kdbg_wait(size, FALSE);
3400 break;
3401
3402 case KERN_KDPIDTR:
3403 if (size < sizeof(kd_regtype)) {
3404 ret = EINVAL;
3405 break;
3406 }
3407 if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
3408 ret = EINVAL;
3409 break;
3410 }
3411
3412 ret = kdbg_setpid(&kd_Reg);
3413 break;
3414
3415 case KERN_KDPIDEX:
3416 if (size < sizeof(kd_regtype)) {
3417 ret = EINVAL;
3418 break;
3419 }
3420 if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
3421 ret = EINVAL;
3422 break;
3423 }
3424
3425 ret = kdbg_setpidex(&kd_Reg);
3426 break;
3427
3428 case KERN_KDCPUMAP:
3429 ret = kdbg_readcpumap(where, sizep);
3430 break;
3431
3432 case KERN_KDTHRMAP:
3433 ret = kdbg_copyout_thread_map(where, sizep);
3434 break;
3435
3436 case KERN_KDSET_TYPEFILTER: {
3437 ret = kdbg_copyin_typefilter(where, size);
3438 break;
3439 }
3440
3441 case KERN_KDTEST:
3442 ret = kdbg_test();
3443 break;
3444
3445 default:
3446 ret = EINVAL;
3447 break;
3448 }
3449 out:
3450 lck_mtx_unlock(ktrace_lock);
3451
3452 return(ret);
3453 }
3454
3455
3456 /*
3457 * This code can run for the most part concurrently with kernel_debug_internal()...
3458 * 'release_storage_unit' will take the kds_spin_lock which may cause us to briefly
3459 * synchronize with the recording side of this puzzle... otherwise, we are able to
3460 * move through the lists w/o use of any locks
3461 */
3462 int
3463 kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx, uint32_t file_version)
3464 {
3465 unsigned int count;
3466 unsigned int cpu, min_cpu;
3467 uint64_t barrier_min = 0, barrier_max = 0, t, earliest_time;
3468 int error = 0;
3469 kd_buf *tempbuf;
3470 uint32_t rcursor;
3471 kd_buf lostevent;
3472 union kds_ptr kdsp;
3473 struct kd_storage *kdsp_actual;
3474 struct kd_bufinfo *kdbp;
3475 struct kd_bufinfo *min_kdbp;
3476 uint32_t tempbuf_count;
3477 uint32_t tempbuf_number;
3478 uint32_t old_kdebug_flags;
3479 uint32_t old_kdebug_slowcheck;
3480 boolean_t lostevents = FALSE;
3481 boolean_t out_of_events = FALSE;
3482 boolean_t wrapped = FALSE;
3483
3484 assert(number);
3485 count = *number/sizeof(kd_buf);
3486 *number = 0;
3487
3488 if (count == 0 || !(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) || kdcopybuf == 0)
3489 return EINVAL;
3490
3491 thread_set_eager_preempt(current_thread());
3492
3493 memset(&lostevent, 0, sizeof(lostevent));
3494 lostevent.debugid = TRACE_LOST_EVENTS;
3495
3496 /*
3497 * Capture the current time. Only sort events that have occured
3498 * before now. Since the IOPs are being flushed here, it is possible
3499 * that events occur on the AP while running live tracing. If we are
3500 * disabled, no new events should occur on the AP.
3501 */
3502 if (kd_ctrl_page.enabled) {
3503 barrier_max = mach_absolute_time() & KDBG_TIMESTAMP_MASK;
3504 }
3505
3506 /*
3507 * Request each IOP to provide us with up to date entries before merging
3508 * buffers together.
3509 */
3510 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_SYNC_FLUSH, NULL);
3511
3512 /*
3513 * Disable wrap so storage units cannot be stolen out from underneath us
3514 * while merging events.
3515 *
3516 * Because we hold ktrace_lock, no other control threads can be playing
3517 * with kdebug_flags. The code that emits new events could be running,
3518 * but it grabs kds_spin_lock if it needs to acquire a new storage
3519 * chunk, which is where it examines kdebug_flags. If it is adding to
3520 * the same chunk we're reading from, check for that below.
3521 */
3522 wrapped = disable_wrap(&old_kdebug_slowcheck, &old_kdebug_flags);
3523
3524 if (count > nkdbufs)
3525 count = nkdbufs;
3526
3527 if ((tempbuf_count = count) > KDCOPYBUF_COUNT) {
3528 tempbuf_count = KDCOPYBUF_COUNT;
3529 }
3530
3531 /*
3532 * If the buffers have wrapped, capture the earliest time where there
3533 * are events for all CPUs and do not emit additional lost events for
3534 * oldest storage units.
3535 */
3536 if (wrapped) {
3537 barrier_min = kd_ctrl_page.oldest_time;
3538 kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
3539 kd_ctrl_page.oldest_time = 0;
3540
3541 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page.kdebug_cpus; cpu++, kdbp++) {
3542 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
3543 continue;
3544 }
3545 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3546 kdsp_actual->kds_lostevents = FALSE;
3547 }
3548 }
3549
3550 while (count) {
3551 tempbuf = kdcopybuf;
3552 tempbuf_number = 0;
3553
3554 if (wrapped) {
3555 /* Trace a single lost events event for wrapping. */
3556 kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, 0);
3557 *tempbuf = lostevent;
3558 wrapped = FALSE;
3559 goto nextevent;
3560 }
3561
3562 /* While space left in merged events scratch buffer. */
3563 while (tempbuf_count) {
3564 earliest_time = UINT64_MAX;
3565 min_kdbp = NULL;
3566 min_cpu = 0;
3567
3568 /* Check each CPU's buffers. */
3569 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page.kdebug_cpus; cpu++, kdbp++) {
3570 /* Skip CPUs without data. */
3571 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
3572 next_cpu:
3573 continue;
3574 }
3575 /* Debugging aid: maintain a copy of the "kdsp"
3576 * index.
3577 */
3578 volatile union kds_ptr kdsp_shadow;
3579
3580 kdsp_shadow = kdsp;
3581
3582 /* From CPU data to buffer header to buffer. */
3583 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3584
3585 volatile struct kd_storage *kdsp_actual_shadow;
3586
3587 kdsp_actual_shadow = kdsp_actual;
3588
3589 /* Skip buffer if there are no events left. */
3590 rcursor = kdsp_actual->kds_readlast;
3591
3592 if (rcursor == kdsp_actual->kds_bufindx) {
3593 continue;
3594 }
3595
3596 t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
3597
3598 /* Ignore events that have aged out due to wrapping. */
3599 while (t < barrier_min) {
3600 rcursor = ++kdsp_actual->kds_readlast;
3601
3602 if (rcursor >= EVENTS_PER_STORAGE_UNIT) {
3603 release_storage_unit(cpu, kdsp.raw);
3604
3605 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
3606 goto next_cpu;
3607 }
3608 kdsp_shadow = kdsp;
3609 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3610 kdsp_actual_shadow = kdsp_actual;
3611 rcursor = kdsp_actual->kds_readlast;
3612 }
3613
3614 t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
3615 }
3616
3617 if ((t > barrier_max) && (barrier_max > 0)) {
3618 /*
3619 * Need to flush IOPs again before we
3620 * can sort any more data from the
3621 * buffers.
3622 */
3623 out_of_events = TRUE;
3624 break;
3625 }
3626 if (t < kdsp_actual->kds_timestamp) {
3627 /*
3628 * indicates we've not yet completed filling
3629 * in this event...
3630 * this should only occur when we're looking
3631 * at the buf that the record head is utilizing
3632 * we'll pick these events up on the next
3633 * call to kdbg_read
3634 * we bail at this point so that we don't
3635 * get an out-of-order timestream by continuing
3636 * to read events from the other CPUs' timestream(s)
3637 */
3638 out_of_events = TRUE;
3639 break;
3640 }
3641 if (t < earliest_time) {
3642 earliest_time = t;
3643 min_kdbp = kdbp;
3644 min_cpu = cpu;
3645 }
3646 }
3647 if (min_kdbp == NULL || out_of_events == TRUE) {
3648 /*
3649 * all buffers ran empty
3650 */
3651 out_of_events = TRUE;
3652 break;
3653 }
3654
3655 kdsp = min_kdbp->kd_list_head;
3656 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3657
3658 /* Copy earliest event into merged events scratch buffer. */
3659 *tempbuf = kdsp_actual->kds_records[kdsp_actual->kds_readlast++];
3660
3661 if (kdsp_actual->kds_readlast == EVENTS_PER_STORAGE_UNIT)
3662 release_storage_unit(min_cpu, kdsp.raw);
3663
3664 /*
3665 * Watch for out of order timestamps
3666 */
3667 if (earliest_time < min_kdbp->kd_prev_timebase) {
3668 /*
3669 * if so, use the previous timestamp + 1 cycle
3670 */
3671 min_kdbp->kd_prev_timebase++;
3672 kdbg_set_timestamp_and_cpu(tempbuf, min_kdbp->kd_prev_timebase, kdbg_get_cpu(tempbuf));
3673 } else
3674 min_kdbp->kd_prev_timebase = earliest_time;
3675 nextevent:
3676 tempbuf_count--;
3677 tempbuf_number++;
3678 tempbuf++;
3679
3680 if ((RAW_file_written += sizeof(kd_buf)) >= RAW_FLUSH_SIZE)
3681 break;
3682 }
3683 if (tempbuf_number) {
3684 if (file_version == RAW_VERSION3) {
3685 if ( !(kdbg_write_v3_event_chunk_header(buffer, V3_RAW_EVENTS, (tempbuf_number * sizeof(kd_buf)), vp, ctx))) {
3686 error = EFAULT;
3687 goto check_error;
3688 }
3689 if (buffer)
3690 buffer += (sizeof(kd_chunk_header_v3) + sizeof(uint64_t));
3691
3692 assert(count >= (sizeof(kd_chunk_header_v3) + sizeof(uint64_t)));
3693 count -= (sizeof(kd_chunk_header_v3) + sizeof(uint64_t));
3694 *number += (sizeof(kd_chunk_header_v3) + sizeof(uint64_t));
3695 }
3696 if (vp) {
3697 size_t write_size = tempbuf_number * sizeof(kd_buf);
3698 error = kdbg_write_to_vnode((caddr_t)kdcopybuf, write_size, vp, ctx, RAW_file_offset);
3699 if (!error)
3700 RAW_file_offset += write_size;
3701
3702 if (RAW_file_written >= RAW_FLUSH_SIZE) {
3703 error = VNOP_FSYNC(vp, MNT_NOWAIT, ctx);
3704
3705 RAW_file_written = 0;
3706 }
3707 } else {
3708 error = copyout(kdcopybuf, buffer, tempbuf_number * sizeof(kd_buf));
3709 buffer += (tempbuf_number * sizeof(kd_buf));
3710 }
3711 check_error:
3712 if (error) {
3713 *number = 0;
3714 error = EINVAL;
3715 break;
3716 }
3717 count -= tempbuf_number;
3718 *number += tempbuf_number;
3719 }
3720 if (out_of_events == TRUE)
3721 /*
3722 * all trace buffers are empty
3723 */
3724 break;
3725
3726 if ((tempbuf_count = count) > KDCOPYBUF_COUNT)
3727 tempbuf_count = KDCOPYBUF_COUNT;
3728 }
3729 if ( !(old_kdebug_flags & KDBG_NOWRAP)) {
3730 enable_wrap(old_kdebug_slowcheck, lostevents);
3731 }
3732 thread_clear_eager_preempt(current_thread());
3733 return (error);
3734 }
3735
3736 static int
3737 kdbg_test(void)
3738 {
3739 #define KDEBUG_TEST_CODE(code) BSDDBG_CODE(DBG_BSD_KDEBUG_TEST, (code))
3740 int code = 0;
3741
3742 KDBG(KDEBUG_TEST_CODE(code)); code++;
3743 KDBG(KDEBUG_TEST_CODE(code), 1); code++;
3744 KDBG(KDEBUG_TEST_CODE(code), 1, 2); code++;
3745 KDBG(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
3746 KDBG(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
3747
3748 KDBG_RELEASE(KDEBUG_TEST_CODE(code)); code++;
3749 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1); code++;
3750 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1, 2); code++;
3751 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
3752 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
3753
3754 KDBG_FILTERED(KDEBUG_TEST_CODE(code)); code++;
3755 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1); code++;
3756 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1, 2); code++;
3757 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
3758 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
3759
3760 KDBG_DEBUG(KDEBUG_TEST_CODE(code)); code++;
3761 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1); code++;
3762 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1, 2); code++;
3763 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
3764 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
3765
3766 return 0;
3767 #undef KDEBUG_TEST_CODE
3768 }
3769
3770 void
3771 kdebug_boot_trace(unsigned int n_events, char *filter_desc)
3772 {
3773 assert(filter_desc != NULL);
3774
3775 #if (defined(__i386__) || defined(__x86_64__))
3776 /* only trace MACH events when outputting kdebug to serial */
3777 if (kdebug_serial) {
3778 n_events = 1;
3779 if (filter_desc[0] == '\0') {
3780 filter_desc[0] = 'C';
3781 filter_desc[1] = '1';
3782 filter_desc[2] = '\0';
3783 }
3784 }
3785 #endif
3786
3787 if (log_leaks && n_events == 0) {
3788 n_events = 200000;
3789 }
3790
3791 kdebug_trace_start(n_events, filter_desc, FALSE);
3792 }
3793
3794 static void
3795 kdbg_set_typefilter_string(const char *filter_desc)
3796 {
3797 char *end = NULL;
3798
3799 lck_mtx_assert(ktrace_lock, LCK_MTX_ASSERT_OWNED);
3800
3801 assert(filter_desc != NULL);
3802
3803 typefilter_reject_all(kdbg_typefilter);
3804 typefilter_allow_class(kdbg_typefilter, DBG_TRACE);
3805
3806 /* if the filter description starts with a number, assume it's a csc */
3807 if (filter_desc[0] >= '0' && filter_desc[0] <= '9'){
3808 unsigned long csc = strtoul(filter_desc, NULL, 0);
3809 if (filter_desc != end && csc <= KDBG_CSC_MAX) {
3810 typefilter_allow_csc(kdbg_typefilter, csc);
3811 }
3812 return;
3813 }
3814
3815 while (filter_desc[0] != '\0') {
3816 unsigned long allow_value;
3817
3818 char filter_type = filter_desc[0];
3819 if (filter_type != 'C' && filter_type != 'S') {
3820 return;
3821 }
3822 filter_desc++;
3823
3824 allow_value = strtoul(filter_desc, &end, 0);
3825 if (filter_desc == end) {
3826 /* cannot parse as integer */
3827 return;
3828 }
3829
3830 switch (filter_type) {
3831 case 'C':
3832 if (allow_value <= KDBG_CLASS_MAX) {
3833 typefilter_allow_class(kdbg_typefilter, allow_value);
3834 } else {
3835 /* illegal class */
3836 return;
3837 }
3838 break;
3839 case 'S':
3840 if (allow_value <= KDBG_CSC_MAX) {
3841 typefilter_allow_csc(kdbg_typefilter, allow_value);
3842 } else {
3843 /* illegal class subclass */
3844 return;
3845 }
3846 break;
3847 default:
3848 return;
3849 }
3850
3851 /* advance to next filter entry */
3852 filter_desc = end;
3853 if (filter_desc[0] == ',') {
3854 filter_desc++;
3855 }
3856 }
3857 }
3858
3859 /*
3860 * This function is meant to be called from the bootstrap thread or coming out
3861 * of acpi_idle_kernel.
3862 */
3863 void
3864 kdebug_trace_start(unsigned int n_events, const char *filter_desc,
3865 boolean_t need_map)
3866 {
3867 uint32_t old1, old2;
3868
3869 if (!n_events) {
3870 return;
3871 }
3872
3873 lck_mtx_lock(ktrace_lock);
3874
3875 kdbg_lock_init();
3876
3877 ktrace_kernel_configure(KTRACE_KDEBUG);
3878
3879 kdbg_set_nkdbufs(n_events);
3880
3881 kernel_debug_string_early("start_kern_tracing");
3882
3883 if (kdbg_reinit(TRUE)) {
3884 printf("error from kdbg_reinit, kernel tracing not started\n");
3885 goto out;
3886 }
3887
3888 /*
3889 * Wrapping is disabled because boot and wake tracing is interested in
3890 * the earliest events, at the expense of later ones.
3891 */
3892 (void)disable_wrap(&old1, &old2);
3893
3894 if (filter_desc && filter_desc[0] != '\0') {
3895 if (kdbg_initialize_typefilter(NULL) == KERN_SUCCESS) {
3896 kdbg_set_typefilter_string(filter_desc);
3897 kdbg_enable_typefilter();
3898 }
3899 }
3900
3901 /*
3902 * Hold off interrupts between getting a thread map and enabling trace
3903 * and until the early traces are recorded.
3904 */
3905 boolean_t s = ml_set_interrupts_enabled(FALSE);
3906
3907 if (need_map == TRUE) {
3908 kdbg_thrmap_init();
3909 }
3910
3911 kdbg_set_tracing_enabled(TRUE, kdebug_serial ?
3912 (KDEBUG_ENABLE_TRACE | KDEBUG_ENABLE_SERIAL) :
3913 KDEBUG_ENABLE_TRACE);
3914
3915 /*
3916 * Transfer all very early events from the static buffer into the real
3917 * buffers.
3918 */
3919 kernel_debug_early_end();
3920
3921 ml_set_interrupts_enabled(s);
3922
3923 printf("kernel tracing started with %u events\n", n_events);
3924
3925 #if KDEBUG_MOJO_TRACE
3926 if (kdebug_serial) {
3927 printf("serial output enabled with %lu named events\n",
3928 sizeof(kd_events)/sizeof(kd_event_t));
3929 }
3930 #endif
3931
3932 out:
3933 lck_mtx_unlock(ktrace_lock);
3934 }
3935
3936 void
3937 kdbg_dump_trace_to_file(const char *filename)
3938 {
3939 vfs_context_t ctx;
3940 vnode_t vp;
3941 size_t write_size;
3942
3943 lck_mtx_lock(ktrace_lock);
3944
3945 if (!(kdebug_enable & KDEBUG_ENABLE_TRACE)) {
3946 goto out;
3947 }
3948
3949 if (ktrace_get_owning_pid() != 0) {
3950 /*
3951 * Another process owns ktrace and is still active, disable tracing to
3952 * capture whatever was being recorded.
3953 */
3954 kdebug_enable = 0;
3955 kd_ctrl_page.enabled = 0;
3956 commpage_update_kdebug_state();
3957 goto out;
3958 }
3959
3960 KERNEL_DEBUG_CONSTANT(TRACE_PANIC | DBG_FUNC_NONE, 0, 0, 0, 0, 0);
3961
3962 kdebug_enable = 0;
3963 kd_ctrl_page.enabled = 0;
3964 commpage_update_kdebug_state();
3965
3966 ctx = vfs_context_kernel();
3967
3968 if (vnode_open(filename, (O_CREAT | FWRITE | O_NOFOLLOW), 0600, 0, &vp, ctx)) {
3969 goto out;
3970 }
3971
3972 kdbg_write_thread_map(vp, ctx);
3973
3974 write_size = nkdbufs * sizeof(kd_buf);
3975 kdbg_read(0, &write_size, vp, ctx, RAW_VERSION1);
3976
3977 vnode_close(vp, FWRITE, ctx);
3978 sync(current_proc(), (void *)NULL, (int *)NULL);
3979
3980 out:
3981 lck_mtx_unlock(ktrace_lock);
3982 }
3983
3984 /* Helper function for filling in the BSD name for an address space
3985 * Defined here because the machine bindings know only Mach threads
3986 * and nothing about BSD processes.
3987 *
3988 * FIXME: need to grab a lock during this?
3989 */
3990 void kdbg_get_task_name(char* name_buf, int len, task_t task)
3991 {
3992 proc_t proc;
3993
3994 /* Note: we can't use thread->task (and functions that rely on it) here
3995 * because it hasn't been initialized yet when this function is called.
3996 * We use the explicitly-passed task parameter instead.
3997 */
3998 proc = get_bsdtask_info(task);
3999 if (proc != PROC_NULL)
4000 snprintf(name_buf, len, "%s/%d", proc->p_comm, proc->p_pid);
4001 else
4002 snprintf(name_buf, len, "%p [!bsd]", task);
4003 }
4004
4005 #if KDEBUG_MOJO_TRACE
4006 static kd_event_t *
4007 binary_search(uint32_t id)
4008 {
4009 int low, high, mid;
4010
4011 low = 0;
4012 high = sizeof(kd_events)/sizeof(kd_event_t) - 1;
4013
4014 while (TRUE)
4015 {
4016 mid = (low + high) / 2;
4017
4018 if (low > high)
4019 return NULL; /* failed */
4020 else if ( low + 1 >= high) {
4021 /* We have a match */
4022 if (kd_events[high].id == id)
4023 return &kd_events[high];
4024 else if (kd_events[low].id == id)
4025 return &kd_events[low];
4026 else
4027 return NULL; /* search failed */
4028 }
4029 else if (id < kd_events[mid].id)
4030 high = mid;
4031 else
4032 low = mid;
4033 }
4034 }
4035
4036 /*
4037 * Look up event id to get name string.
4038 * Using a per-cpu cache of a single entry
4039 * before resorting to a binary search of the full table.
4040 */
4041 #define NCACHE 1
4042 static kd_event_t *last_hit[MAX_CPUS];
4043 static kd_event_t *
4044 event_lookup_cache(uint32_t cpu, uint32_t id)
4045 {
4046 if (last_hit[cpu] == NULL || last_hit[cpu]->id != id)
4047 last_hit[cpu] = binary_search(id);
4048 return last_hit[cpu];
4049 }
4050
4051 static uint64_t kd_last_timstamp;
4052
4053 static void
4054 kdebug_serial_print(
4055 uint32_t cpunum,
4056 uint32_t debugid,
4057 uint64_t timestamp,
4058 uintptr_t arg1,
4059 uintptr_t arg2,
4060 uintptr_t arg3,
4061 uintptr_t arg4,
4062 uintptr_t threadid
4063 )
4064 {
4065 char kprintf_line[192];
4066 char event[40];
4067 uint64_t us = timestamp / NSEC_PER_USEC;
4068 uint64_t us_tenth = (timestamp % NSEC_PER_USEC) / 100;
4069 uint64_t delta = timestamp - kd_last_timstamp;
4070 uint64_t delta_us = delta / NSEC_PER_USEC;
4071 uint64_t delta_us_tenth = (delta % NSEC_PER_USEC) / 100;
4072 uint32_t event_id = debugid & KDBG_EVENTID_MASK;
4073 const char *command;
4074 const char *bra;
4075 const char *ket;
4076 kd_event_t *ep;
4077
4078 /* event time and delta from last */
4079 snprintf(kprintf_line, sizeof(kprintf_line),
4080 "%11llu.%1llu %8llu.%1llu ",
4081 us, us_tenth, delta_us, delta_us_tenth);
4082
4083
4084 /* event (id or name) - start prefixed by "[", end postfixed by "]" */
4085 bra = (debugid & DBG_FUNC_START) ? "[" : " ";
4086 ket = (debugid & DBG_FUNC_END) ? "]" : " ";
4087 ep = event_lookup_cache(cpunum, event_id);
4088 if (ep) {
4089 if (strlen(ep->name) < sizeof(event) - 3)
4090 snprintf(event, sizeof(event), "%s%s%s",
4091 bra, ep->name, ket);
4092 else
4093 snprintf(event, sizeof(event), "%s%x(name too long)%s",
4094 bra, event_id, ket);
4095 } else {
4096 snprintf(event, sizeof(event), "%s%x%s",
4097 bra, event_id, ket);
4098 }
4099 snprintf(kprintf_line + strlen(kprintf_line),
4100 sizeof(kprintf_line) - strlen(kprintf_line),
4101 "%-40s ", event);
4102
4103 /* arg1 .. arg4 with special cases for strings */
4104 switch (event_id) {
4105 case VFS_LOOKUP:
4106 case VFS_LOOKUP_DONE:
4107 if (debugid & DBG_FUNC_START) {
4108 /* arg1 hex then arg2..arg4 chars */
4109 snprintf(kprintf_line + strlen(kprintf_line),
4110 sizeof(kprintf_line) - strlen(kprintf_line),
4111 "%-16lx %-8s%-8s%-8s ",
4112 arg1, (char*)&arg2, (char*)&arg3, (char*)&arg4);
4113 break;
4114 }
4115 /* else fall through for arg1..arg4 chars */
4116 case TRACE_STRING_EXEC:
4117 case TRACE_STRING_NEWTHREAD:
4118 case TRACE_INFO_STRING:
4119 snprintf(kprintf_line + strlen(kprintf_line),
4120 sizeof(kprintf_line) - strlen(kprintf_line),
4121 "%-8s%-8s%-8s%-8s ",
4122 (char*)&arg1, (char*)&arg2, (char*)&arg3, (char*)&arg4);
4123 break;
4124 default:
4125 snprintf(kprintf_line + strlen(kprintf_line),
4126 sizeof(kprintf_line) - strlen(kprintf_line),
4127 "%-16lx %-16lx %-16lx %-16lx",
4128 arg1, arg2, arg3, arg4);
4129 }
4130
4131 /* threadid, cpu and command name */
4132 if (threadid == (uintptr_t)thread_tid(current_thread()) &&
4133 current_proc() &&
4134 current_proc()->p_comm[0])
4135 command = current_proc()->p_comm;
4136 else
4137 command = "-";
4138 snprintf(kprintf_line + strlen(kprintf_line),
4139 sizeof(kprintf_line) - strlen(kprintf_line),
4140 " %-16lx %-2d %s\n",
4141 threadid, cpunum, command);
4142
4143 kprintf("%s", kprintf_line);
4144 kd_last_timstamp = timestamp;
4145 }
4146
4147 #endif