]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kdebug.c
f0ca4b75c881484ed2ab0605c3970eee207ed69a
[apple/xnu.git] / bsd / kern / kdebug.c
1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @Apple_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
21 */
22
23 #include <sys/errno.h>
24 #include <sys/param.h>
25 #include <sys/systm.h>
26 #include <sys/proc_internal.h>
27 #include <sys/vm.h>
28 #include <sys/sysctl.h>
29 #include <sys/kdebug.h>
30 #include <sys/kauth.h>
31 #include <sys/ktrace.h>
32 #include <sys/sysproto.h>
33 #include <sys/bsdtask_info.h>
34 #include <sys/random.h>
35
36 #include <mach/clock_types.h>
37 #include <mach/mach_types.h>
38 #include <mach/mach_time.h>
39 #include <mach/mach_vm.h>
40 #include <machine/atomic.h>
41 #include <machine/machine_routines.h>
42
43 #include <mach/machine.h>
44 #include <mach/vm_map.h>
45
46 #if defined(__i386__) || defined(__x86_64__)
47 #include <i386/rtclock_protos.h>
48 #include <i386/mp.h>
49 #include <i386/machine_routines.h>
50 #include <i386/tsc.h>
51 #endif
52
53 #include <kern/clock.h>
54
55 #include <kern/thread.h>
56 #include <kern/task.h>
57 #include <kern/debug.h>
58 #include <kern/kalloc.h>
59 #include <kern/cpu_number.h>
60 #include <kern/cpu_data.h>
61 #include <kern/assert.h>
62 #include <kern/telemetry.h>
63 #include <kern/sched_prim.h>
64 #include <vm/vm_kern.h>
65 #include <sys/lock.h>
66 #include <kperf/kperf.h>
67 #include <pexpert/device_tree.h>
68
69 #include <sys/malloc.h>
70 #include <sys/mcache.h>
71
72 #include <sys/vnode.h>
73 #include <sys/vnode_internal.h>
74 #include <sys/fcntl.h>
75 #include <sys/file_internal.h>
76 #include <sys/ubc.h>
77 #include <sys/param.h> /* for isset() */
78
79 #include <mach/mach_host.h> /* for host_info() */
80 #include <libkern/OSAtomic.h>
81
82 #include <machine/pal_routines.h>
83 #include <machine/atomic.h>
84
85 extern unsigned int wake_nkdbufs;
86 extern unsigned int trace_wrap;
87
88 /*
89 * IOP(s)
90 *
91 * IOP(s) are auxiliary cores that want to participate in kdebug event logging.
92 * They are registered dynamically. Each is assigned a cpu_id at registration.
93 *
94 * NOTE: IOP trace events may not use the same clock hardware as "normal"
95 * cpus. There is an effort made to synchronize the IOP timebase with the
96 * AP, but it should be understood that there may be discrepancies.
97 *
98 * Once registered, an IOP is permanent, it cannot be unloaded/unregistered.
99 * The current implementation depends on this for thread safety.
100 *
101 * New registrations occur by allocating an kd_iop struct and assigning
102 * a provisional cpu_id of list_head->cpu_id + 1. Then a CAS to claim the
103 * list_head pointer resolves any races.
104 *
105 * You may safely walk the kd_iops list at any time, without holding locks.
106 *
107 * When allocating buffers, the current kd_iops head is captured. Any operations
108 * that depend on the buffer state (such as flushing IOP traces on reads,
109 * etc.) should use the captured list head. This will allow registrations to
110 * take place while trace is in use.
111 */
112
113 typedef struct kd_iop {
114 kd_callback_t callback;
115 uint32_t cpu_id;
116 uint64_t last_timestamp; /* Prevent timer rollback */
117 struct kd_iop* next;
118 } kd_iop_t;
119
120 static kd_iop_t* kd_iops = NULL;
121
122 /*
123 * Typefilter(s)
124 *
125 * A typefilter is a 8KB bitmap that is used to selectively filter events
126 * being recorded. It is able to individually address every class & subclass.
127 *
128 * There is a shared typefilter in the kernel which is lazily allocated. Once
129 * allocated, the shared typefilter is never deallocated. The shared typefilter
130 * is also mapped on demand into userspace processes that invoke kdebug_trace
131 * API from Libsyscall. When mapped into a userspace process, the memory is
132 * read only, and does not have a fixed address.
133 *
134 * It is a requirement that the kernel's shared typefilter always pass DBG_TRACE
135 * events. This is enforced automatically, by having the needed bits set any
136 * time the shared typefilter is mutated.
137 */
138
139 typedef uint8_t* typefilter_t;
140
141 static typefilter_t kdbg_typefilter;
142 static mach_port_t kdbg_typefilter_memory_entry;
143
144 /*
145 * There are 3 combinations of page sizes:
146 *
147 * 4KB / 4KB
148 * 4KB / 16KB
149 * 16KB / 16KB
150 *
151 * The typefilter is exactly 8KB. In the first two scenarios, we would like
152 * to use 2 pages exactly; in the third scenario we must make certain that
153 * a full page is allocated so we do not inadvertantly share 8KB of random
154 * data to userspace. The round_page_32 macro rounds to kernel page size.
155 */
156 #define TYPEFILTER_ALLOC_SIZE MAX(round_page_32(KDBG_TYPEFILTER_BITMAP_SIZE), KDBG_TYPEFILTER_BITMAP_SIZE)
157
158 static typefilter_t
159 typefilter_create(void)
160 {
161 typefilter_t tf;
162 if (KERN_SUCCESS == kmem_alloc(kernel_map, (vm_offset_t*)&tf, TYPEFILTER_ALLOC_SIZE, VM_KERN_MEMORY_DIAG)) {
163 memset(&tf[KDBG_TYPEFILTER_BITMAP_SIZE], 0, TYPEFILTER_ALLOC_SIZE - KDBG_TYPEFILTER_BITMAP_SIZE);
164 return tf;
165 }
166 return NULL;
167 }
168
169 static void
170 typefilter_deallocate(typefilter_t tf)
171 {
172 assert(tf != NULL);
173 assert(tf != kdbg_typefilter);
174 kmem_free(kernel_map, (vm_offset_t)tf, TYPEFILTER_ALLOC_SIZE);
175 }
176
177 static void
178 typefilter_copy(typefilter_t dst, typefilter_t src)
179 {
180 assert(src != NULL);
181 assert(dst != NULL);
182 memcpy(dst, src, KDBG_TYPEFILTER_BITMAP_SIZE);
183 }
184
185 static void
186 typefilter_reject_all(typefilter_t tf)
187 {
188 assert(tf != NULL);
189 memset(tf, 0, KDBG_TYPEFILTER_BITMAP_SIZE);
190 }
191
192 static void
193 typefilter_allow_all(typefilter_t tf)
194 {
195 assert(tf != NULL);
196 memset(tf, ~0, KDBG_TYPEFILTER_BITMAP_SIZE);
197 }
198
199 static void
200 typefilter_allow_class(typefilter_t tf, uint8_t class)
201 {
202 assert(tf != NULL);
203 const uint32_t BYTES_PER_CLASS = 256 / 8; // 256 subclasses, 1 bit each
204 memset(&tf[class * BYTES_PER_CLASS], 0xFF, BYTES_PER_CLASS);
205 }
206
207 static void
208 typefilter_allow_csc(typefilter_t tf, uint16_t csc)
209 {
210 assert(tf != NULL);
211 setbit(tf, csc);
212 }
213
214 static bool
215 typefilter_is_debugid_allowed(typefilter_t tf, uint32_t id)
216 {
217 assert(tf != NULL);
218 return isset(tf, KDBG_EXTRACT_CSC(id));
219 }
220
221 static mach_port_t
222 typefilter_create_memory_entry(typefilter_t tf)
223 {
224 assert(tf != NULL);
225
226 mach_port_t memory_entry = MACH_PORT_NULL;
227 memory_object_size_t size = TYPEFILTER_ALLOC_SIZE;
228
229 mach_make_memory_entry_64(kernel_map,
230 &size,
231 (memory_object_offset_t)tf,
232 VM_PROT_READ,
233 &memory_entry,
234 MACH_PORT_NULL);
235
236 return memory_entry;
237 }
238
239 static int kdbg_copyin_typefilter(user_addr_t addr, size_t size);
240 static void kdbg_enable_typefilter(void);
241 static void kdbg_disable_typefilter(void);
242
243 /*
244 * External prototypes
245 */
246
247 void task_act_iterate_wth_args(task_t, void (*)(thread_t, void *), void *);
248 void commpage_update_kdebug_state(void); /* XXX sign */
249
250 extern int log_leaks;
251
252 /*
253 * This flag is for testing purposes only -- it's highly experimental and tools
254 * have not been updated to support it.
255 */
256 static bool kdbg_continuous_time = false;
257
258 static inline uint64_t
259 kdbg_timestamp(void)
260 {
261 if (kdbg_continuous_time) {
262 return mach_continuous_time();
263 } else {
264 return mach_absolute_time();
265 }
266 }
267
268 static int kdbg_debug = 0;
269
270 int kdbg_control(int *, u_int, user_addr_t, size_t *);
271
272 static int kdbg_read(user_addr_t, size_t *, vnode_t, vfs_context_t, uint32_t);
273 static int kdbg_readcpumap(user_addr_t, size_t *);
274 static int kdbg_readthrmap_v3(user_addr_t, size_t, int);
275 static int kdbg_readcurthrmap(user_addr_t, size_t *);
276 static int kdbg_setreg(kd_regtype *);
277 static int kdbg_setpidex(kd_regtype *);
278 static int kdbg_setpid(kd_regtype *);
279 static void kdbg_thrmap_init(void);
280 static int kdbg_reinit(bool);
281 static int kdbg_bootstrap(bool);
282 static int kdbg_test(size_t flavor);
283
284 static int kdbg_write_v1_header(bool write_thread_map, vnode_t vp, vfs_context_t ctx);
285 static int kdbg_write_thread_map(vnode_t vp, vfs_context_t ctx);
286 static int kdbg_copyout_thread_map(user_addr_t buffer, size_t *buffer_size);
287 static void kdbg_clear_thread_map(void);
288
289 static bool kdbg_wait(uint64_t timeout_ms, bool locked_wait);
290 static void kdbg_wakeup(void);
291
292 int kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count,
293 uint8_t** cpumap, uint32_t* cpumap_size);
294
295 static kd_threadmap *kdbg_thrmap_init_internal(size_t max_count,
296 vm_size_t *map_size, vm_size_t *map_count);
297
298 static bool kdebug_current_proc_enabled(uint32_t debugid);
299 static errno_t kdebug_check_trace_string(uint32_t debugid, uint64_t str_id);
300
301 int kdbg_write_v3_header(user_addr_t, size_t *, int);
302 int kdbg_write_v3_chunk_header(user_addr_t buffer, uint32_t tag,
303 uint32_t sub_tag, uint64_t length,
304 vnode_t vp, vfs_context_t ctx);
305
306 user_addr_t kdbg_write_v3_event_chunk_header(user_addr_t buffer, uint32_t tag,
307 uint64_t length, vnode_t vp,
308 vfs_context_t ctx);
309
310 // Helper functions
311
312 static int create_buffers(bool);
313 static void delete_buffers(void);
314
315 extern int tasks_count;
316 extern int threads_count;
317 extern void IOSleep(int);
318
319 /* trace enable status */
320 unsigned int kdebug_enable = 0;
321
322 /* A static buffer to record events prior to the start of regular logging */
323
324 #define KD_EARLY_BUFFER_SIZE (16 * 1024)
325 #define KD_EARLY_BUFFER_NBUFS (KD_EARLY_BUFFER_SIZE / sizeof(kd_buf))
326 #if defined(__x86_64__)
327 __attribute__((aligned(KD_EARLY_BUFFER_SIZE)))
328 static kd_buf kd_early_buffer[KD_EARLY_BUFFER_NBUFS];
329 #else /* defined(__x86_64__) */
330 /*
331 * On ARM, the space for this is carved out by osfmk/arm/data.s -- clang
332 * has problems aligning to greater than 4K.
333 */
334 extern kd_buf kd_early_buffer[KD_EARLY_BUFFER_NBUFS];
335 #endif /* !defined(__x86_64__) */
336
337 static unsigned int kd_early_index = 0;
338 static bool kd_early_overflow = false;
339 static bool kd_early_done = false;
340
341 #define SLOW_NOLOG 0x01
342 #define SLOW_CHECKS 0x02
343
344 #define EVENTS_PER_STORAGE_UNIT 2048
345 #define MIN_STORAGE_UNITS_PER_CPU 4
346
347 #define POINTER_FROM_KDS_PTR(x) (&kd_bufs[x.buffer_index].kdsb_addr[x.offset])
348
349 union kds_ptr {
350 struct {
351 uint32_t buffer_index:21;
352 uint16_t offset:11;
353 };
354 uint32_t raw;
355 };
356
357 struct kd_storage {
358 union kds_ptr kds_next;
359 uint32_t kds_bufindx;
360 uint32_t kds_bufcnt;
361 uint32_t kds_readlast;
362 bool kds_lostevents;
363 uint64_t kds_timestamp;
364
365 kd_buf kds_records[EVENTS_PER_STORAGE_UNIT];
366 };
367
368 #define MAX_BUFFER_SIZE (1024 * 1024 * 128)
369 #define N_STORAGE_UNITS_PER_BUFFER (MAX_BUFFER_SIZE / sizeof(struct kd_storage))
370 static_assert(N_STORAGE_UNITS_PER_BUFFER <= 0x7ff,
371 "shoudn't overflow kds_ptr.offset");
372
373 struct kd_storage_buffers {
374 struct kd_storage *kdsb_addr;
375 uint32_t kdsb_size;
376 };
377
378 #define KDS_PTR_NULL 0xffffffff
379 struct kd_storage_buffers *kd_bufs = NULL;
380 int n_storage_units = 0;
381 unsigned int n_storage_buffers = 0;
382 int n_storage_threshold = 0;
383 int kds_waiter = 0;
384
385 #pragma pack(0)
386 struct kd_bufinfo {
387 union kds_ptr kd_list_head;
388 union kds_ptr kd_list_tail;
389 bool kd_lostevents;
390 uint32_t _pad;
391 uint64_t kd_prev_timebase;
392 uint32_t num_bufs;
393 } __attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE)));
394
395
396 /*
397 * In principle, this control block can be shared in DRAM with other
398 * coprocessors and runtimes, for configuring what tracing is enabled.
399 */
400 struct kd_ctrl_page_t {
401 union kds_ptr kds_free_list;
402 uint32_t enabled :1;
403 uint32_t _pad0 :31;
404 int kds_inuse_count;
405 uint32_t kdebug_flags;
406 uint32_t kdebug_slowcheck;
407 uint64_t oldest_time;
408 /*
409 * The number of kd_bufinfo structs allocated may not match the current
410 * number of active cpus. We capture the iops list head at initialization
411 * which we could use to calculate the number of cpus we allocated data for,
412 * unless it happens to be null. To avoid that case, we explicitly also
413 * capture a cpu count.
414 */
415 kd_iop_t* kdebug_iops;
416 uint32_t kdebug_cpus;
417 } kd_ctrl_page = {
418 .kds_free_list = {.raw = KDS_PTR_NULL},
419 .kdebug_slowcheck = SLOW_NOLOG,
420 .oldest_time = 0
421 };
422
423 #pragma pack()
424
425 struct kd_bufinfo *kdbip = NULL;
426
427 #define KDCOPYBUF_COUNT 8192
428 #define KDCOPYBUF_SIZE (KDCOPYBUF_COUNT * sizeof(kd_buf))
429
430 #define PAGE_4KB 4096
431 #define PAGE_16KB 16384
432
433 kd_buf *kdcopybuf = NULL;
434
435 unsigned int nkdbufs = 0;
436 unsigned int kdlog_beg = 0;
437 unsigned int kdlog_end = 0;
438 unsigned int kdlog_value1 = 0;
439 unsigned int kdlog_value2 = 0;
440 unsigned int kdlog_value3 = 0;
441 unsigned int kdlog_value4 = 0;
442
443 static lck_spin_t * kdw_spin_lock;
444 static lck_spin_t * kds_spin_lock;
445
446 kd_threadmap *kd_mapptr = 0;
447 vm_size_t kd_mapsize = 0;
448 vm_size_t kd_mapcount = 0;
449
450 off_t RAW_file_offset = 0;
451 int RAW_file_written = 0;
452
453 #define RAW_FLUSH_SIZE (2 * 1024 * 1024)
454
455 /*
456 * A globally increasing counter for identifying strings in trace. Starts at
457 * 1 because 0 is a reserved return value.
458 */
459 __attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE)))
460 static uint64_t g_curr_str_id = 1;
461
462 #define STR_ID_SIG_OFFSET (48)
463 #define STR_ID_MASK ((1ULL << STR_ID_SIG_OFFSET) - 1)
464 #define STR_ID_SIG_MASK (~STR_ID_MASK)
465
466 /*
467 * A bit pattern for identifying string IDs generated by
468 * kdebug_trace_string(2).
469 */
470 static uint64_t g_str_id_signature = (0x70acULL << STR_ID_SIG_OFFSET);
471
472 #define INTERRUPT 0x01050000
473 #define MACH_vmfault 0x01300008
474 #define BSC_SysCall 0x040c0000
475 #define MACH_SysCall 0x010c0000
476
477 struct kd_task_name {
478 task_t ktn_task;
479 pid_t ktn_pid;
480 char ktn_name[20];
481 };
482
483 struct kd_resolver {
484 kd_threadmap *krs_map;
485 vm_size_t krs_count;
486 vm_size_t krs_maxcount;
487 struct kd_task_name *krs_task;
488 };
489
490 /*
491 * TRACE file formats...
492 *
493 * RAW_VERSION0
494 *
495 * uint32_t #threadmaps
496 * kd_threadmap[]
497 * kd_buf[]
498 *
499 * RAW_VERSION1
500 *
501 * RAW_header, with version_no set to RAW_VERSION1
502 * kd_threadmap[]
503 * Empty space to pad alignment to the nearest page boundary.
504 * kd_buf[]
505 *
506 * RAW_VERSION1+
507 *
508 * RAW_header, with version_no set to RAW_VERSION1
509 * kd_threadmap[]
510 * kd_cpumap_header, with version_no set to RAW_VERSION1
511 * kd_cpumap[]
512 * Empty space to pad alignment to the nearest page boundary.
513 * kd_buf[]
514 *
515 * V1+ implementation details...
516 *
517 * It would have been nice to add the cpumap data "correctly", but there were
518 * several obstacles. Existing code attempts to parse both V1 and V0 files.
519 * Due to the fact that V0 has no versioning or header, the test looks like
520 * this:
521 *
522 * // Read header
523 * if (header.version_no != RAW_VERSION1) { // Assume V0 }
524 *
525 * If we add a VERSION2 file format, all existing code is going to treat that
526 * as a VERSION0 file when reading it, and crash terribly when trying to read
527 * RAW_VERSION2 threadmap entries.
528 *
529 * To differentiate between a V1 and V1+ file, read as V1 until you reach
530 * the padding bytes. Then:
531 *
532 * boolean_t is_v1plus = FALSE;
533 * if (padding_bytes >= sizeof(kd_cpumap_header)) {
534 * kd_cpumap_header header = // read header;
535 * if (header.version_no == RAW_VERSION1) {
536 * is_v1plus = TRUE;
537 * }
538 * }
539 *
540 */
541
542 #define RAW_VERSION3 0x00001000
543
544 // Version 3 header
545 // The header chunk has the tag 0x00001000 which also serves as a magic word
546 // that identifies the file as a version 3 trace file. The header payload is
547 // a set of fixed fields followed by a variable number of sub-chunks:
548 /*
549 * ____________________________________________________________________________
550 | Offset | Size | Field |
551 | ----------------------------------------------------------------------------
552 | 0 | 4 | Tag (0x00001000) |
553 | 4 | 4 | Sub-tag. Represents the version of the header. |
554 | 8 | 8 | Length of header payload (40+8x) |
555 | 16 | 8 | Time base info. Two 32-bit numbers, numer/denom, |
556 | | | for converting timestamps to nanoseconds. |
557 | 24 | 8 | Timestamp of trace start. |
558 | 32 | 8 | Wall time seconds since Unix epoch. |
559 | | | As returned by gettimeofday(). |
560 | 40 | 4 | Wall time microseconds. As returned by gettimeofday(). |
561 | 44 | 4 | Local time zone offset in minutes. ( " ) |
562 | 48 | 4 | Type of daylight savings time correction to apply. ( " ) |
563 | 52 | 4 | Flags. 1 = 64-bit. Remaining bits should be written |
564 | | | as 0 and ignored when reading. |
565 | 56 | 8x | Variable number of sub-chunks. None are required. |
566 | | | Ignore unknown chunks. |
567 | ----------------------------------------------------------------------------
568 */
569 // NOTE: The header sub-chunks are considered part of the header chunk,
570 // so they must be included in the header chunk’s length field.
571 // The CPU map is an optional sub-chunk of the header chunk. It provides
572 // information about the CPUs that are referenced from the trace events.
573 typedef struct {
574 uint32_t tag;
575 uint32_t sub_tag;
576 uint64_t length;
577 uint32_t timebase_numer;
578 uint32_t timebase_denom;
579 uint64_t timestamp;
580 uint64_t walltime_secs;
581 uint32_t walltime_usecs;
582 uint32_t timezone_minuteswest;
583 uint32_t timezone_dst;
584 uint32_t flags;
585 } __attribute__((packed)) kd_header_v3;
586
587 typedef struct {
588 uint32_t tag;
589 uint32_t sub_tag;
590 uint64_t length;
591 } __attribute__((packed)) kd_chunk_header_v3;
592
593 #define V3_CONFIG 0x00001b00
594 #define V3_CPU_MAP 0x00001c00
595 #define V3_THREAD_MAP 0x00001d00
596 #define V3_RAW_EVENTS 0x00001e00
597 #define V3_NULL_CHUNK 0x00002000
598
599 // The current version of all kernel managed chunks is 1. The
600 // V3_CURRENT_CHUNK_VERSION is added to ease the simple case
601 // when most/all the kernel managed chunks have the same version.
602
603 #define V3_CURRENT_CHUNK_VERSION 1
604 #define V3_HEADER_VERSION V3_CURRENT_CHUNK_VERSION
605 #define V3_CPUMAP_VERSION V3_CURRENT_CHUNK_VERSION
606 #define V3_THRMAP_VERSION V3_CURRENT_CHUNK_VERSION
607 #define V3_EVENT_DATA_VERSION V3_CURRENT_CHUNK_VERSION
608
609 typedef struct krt krt_t;
610
611 static uint32_t
612 kdbg_cpu_count(bool early_trace)
613 {
614 if (early_trace) {
615 #if defined(__x86_64__)
616 return max_ncpus;
617 #else /* defined(__x86_64__) */
618 return ml_get_cpu_count();
619 #endif /* !defined(__x86_64__) */
620 }
621
622 #if defined(__x86_64__)
623 host_basic_info_data_t hinfo;
624 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
625 host_info((host_t)1 /* BSD_HOST */, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
626 assert(hinfo.logical_cpu_max > 0);
627 return hinfo.logical_cpu_max;
628 #else /* defined(__x86_64__) */
629 return ml_get_topology_info()->max_cpu_id + 1;
630 #endif /* !defined(__x86_64__) */
631 }
632
633 #if MACH_ASSERT
634
635 static bool
636 kdbg_iop_list_is_valid(kd_iop_t* iop)
637 {
638 if (iop) {
639 /* Is list sorted by cpu_id? */
640 kd_iop_t* temp = iop;
641 do {
642 assert(!temp->next || temp->next->cpu_id == temp->cpu_id - 1);
643 assert(temp->next || (temp->cpu_id == kdbg_cpu_count(false) || temp->cpu_id == kdbg_cpu_count(true)));
644 } while ((temp = temp->next));
645
646 /* Does each entry have a function and a name? */
647 temp = iop;
648 do {
649 assert(temp->callback.func);
650 assert(strlen(temp->callback.iop_name) < sizeof(temp->callback.iop_name));
651 } while ((temp = temp->next));
652 }
653
654 return true;
655 }
656
657 #endif /* MACH_ASSERT */
658
659 static void
660 kdbg_iop_list_callback(kd_iop_t* iop, kd_callback_type type, void* arg)
661 {
662 while (iop) {
663 iop->callback.func(iop->callback.context, type, arg);
664 iop = iop->next;
665 }
666 }
667
668 static lck_grp_t *kdebug_lck_grp = NULL;
669
670 static void
671 kdbg_set_tracing_enabled(bool enabled, uint32_t trace_type)
672 {
673 /*
674 * Drain any events from IOPs before making the state change. On
675 * enabling, this removes any stale events from before tracing. On
676 * disabling, this saves any events up to the point tracing is disabled.
677 */
678 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_SYNC_FLUSH,
679 NULL);
680
681 int s = ml_set_interrupts_enabled(false);
682 lck_spin_lock_grp(kds_spin_lock, kdebug_lck_grp);
683
684 if (enabled) {
685 /*
686 * The oldest valid time is now; reject past events from IOPs.
687 */
688 kd_ctrl_page.oldest_time = kdbg_timestamp();
689 kdebug_enable |= trace_type;
690 kd_ctrl_page.kdebug_slowcheck &= ~SLOW_NOLOG;
691 kd_ctrl_page.enabled = 1;
692 commpage_update_kdebug_state();
693 } else {
694 kdebug_enable &= ~(KDEBUG_ENABLE_TRACE | KDEBUG_ENABLE_PPT);
695 kd_ctrl_page.kdebug_slowcheck |= SLOW_NOLOG;
696 kd_ctrl_page.enabled = 0;
697 commpage_update_kdebug_state();
698 }
699 lck_spin_unlock(kds_spin_lock);
700 ml_set_interrupts_enabled(s);
701
702 if (enabled) {
703 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops,
704 KD_CALLBACK_KDEBUG_ENABLED, NULL);
705 } else {
706 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops,
707 KD_CALLBACK_KDEBUG_DISABLED, NULL);
708 }
709 }
710
711 static void
712 kdbg_set_flags(int slowflag, int enableflag, bool enabled)
713 {
714 int s = ml_set_interrupts_enabled(false);
715 lck_spin_lock_grp(kds_spin_lock, kdebug_lck_grp);
716
717 if (enabled) {
718 kd_ctrl_page.kdebug_slowcheck |= slowflag;
719 kdebug_enable |= enableflag;
720 } else {
721 kd_ctrl_page.kdebug_slowcheck &= ~slowflag;
722 kdebug_enable &= ~enableflag;
723 }
724
725 lck_spin_unlock(kds_spin_lock);
726 ml_set_interrupts_enabled(s);
727 }
728
729 /*
730 * Disable wrapping and return true if trace wrapped, false otherwise.
731 */
732 static bool
733 disable_wrap(uint32_t *old_slowcheck, uint32_t *old_flags)
734 {
735 bool wrapped;
736 int s = ml_set_interrupts_enabled(false);
737 lck_spin_lock_grp(kds_spin_lock, kdebug_lck_grp);
738
739 *old_slowcheck = kd_ctrl_page.kdebug_slowcheck;
740 *old_flags = kd_ctrl_page.kdebug_flags;
741
742 wrapped = kd_ctrl_page.kdebug_flags & KDBG_WRAPPED;
743 kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
744 kd_ctrl_page.kdebug_flags |= KDBG_NOWRAP;
745
746 lck_spin_unlock(kds_spin_lock);
747 ml_set_interrupts_enabled(s);
748
749 return wrapped;
750 }
751
752 static void
753 enable_wrap(uint32_t old_slowcheck)
754 {
755 int s = ml_set_interrupts_enabled(false);
756 lck_spin_lock_grp(kds_spin_lock, kdebug_lck_grp);
757
758 kd_ctrl_page.kdebug_flags &= ~KDBG_NOWRAP;
759
760 if (!(old_slowcheck & SLOW_NOLOG)) {
761 kd_ctrl_page.kdebug_slowcheck &= ~SLOW_NOLOG;
762 }
763
764 lck_spin_unlock(kds_spin_lock);
765 ml_set_interrupts_enabled(s);
766 }
767
768 static int
769 create_buffers(bool early_trace)
770 {
771 unsigned int i;
772 unsigned int p_buffer_size;
773 unsigned int f_buffer_size;
774 unsigned int f_buffers;
775 int error = 0;
776
777 /*
778 * For the duration of this allocation, trace code will only reference
779 * kdebug_iops. Any iops registered after this enabling will not be
780 * messaged until the buffers are reallocated.
781 *
782 * TLDR; Must read kd_iops once and only once!
783 */
784 kd_ctrl_page.kdebug_iops = kd_iops;
785
786 assert(kdbg_iop_list_is_valid(kd_ctrl_page.kdebug_iops));
787
788 /*
789 * If the list is valid, it is sorted, newest -> oldest. Each iop entry
790 * has a cpu_id of "the older entry + 1", so the highest cpu_id will
791 * be the list head + 1.
792 */
793
794 kd_ctrl_page.kdebug_cpus = kd_ctrl_page.kdebug_iops ? kd_ctrl_page.kdebug_iops->cpu_id + 1 : kdbg_cpu_count(early_trace);
795
796 if (kmem_alloc(kernel_map, (vm_offset_t *)&kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
797 error = ENOSPC;
798 goto out;
799 }
800
801 if (nkdbufs < (kd_ctrl_page.kdebug_cpus * EVENTS_PER_STORAGE_UNIT * MIN_STORAGE_UNITS_PER_CPU)) {
802 n_storage_units = kd_ctrl_page.kdebug_cpus * MIN_STORAGE_UNITS_PER_CPU;
803 } else {
804 n_storage_units = nkdbufs / EVENTS_PER_STORAGE_UNIT;
805 }
806
807 nkdbufs = n_storage_units * EVENTS_PER_STORAGE_UNIT;
808
809 f_buffers = n_storage_units / N_STORAGE_UNITS_PER_BUFFER;
810 n_storage_buffers = f_buffers;
811
812 f_buffer_size = N_STORAGE_UNITS_PER_BUFFER * sizeof(struct kd_storage);
813 p_buffer_size = (n_storage_units % N_STORAGE_UNITS_PER_BUFFER) * sizeof(struct kd_storage);
814
815 if (p_buffer_size) {
816 n_storage_buffers++;
817 }
818
819 kd_bufs = NULL;
820
821 if (kdcopybuf == 0) {
822 if (kmem_alloc(kernel_map, (vm_offset_t *)&kdcopybuf, (vm_size_t)KDCOPYBUF_SIZE, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
823 error = ENOSPC;
824 goto out;
825 }
826 }
827 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers)), VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
828 error = ENOSPC;
829 goto out;
830 }
831 bzero(kd_bufs, n_storage_buffers * sizeof(struct kd_storage_buffers));
832
833 for (i = 0; i < f_buffers; i++) {
834 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)f_buffer_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
835 error = ENOSPC;
836 goto out;
837 }
838 bzero(kd_bufs[i].kdsb_addr, f_buffer_size);
839
840 kd_bufs[i].kdsb_size = f_buffer_size;
841 }
842 if (p_buffer_size) {
843 if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)p_buffer_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
844 error = ENOSPC;
845 goto out;
846 }
847 bzero(kd_bufs[i].kdsb_addr, p_buffer_size);
848
849 kd_bufs[i].kdsb_size = p_buffer_size;
850 }
851 n_storage_units = 0;
852
853 for (i = 0; i < n_storage_buffers; i++) {
854 struct kd_storage *kds;
855 uint16_t n_elements;
856 static_assert(N_STORAGE_UNITS_PER_BUFFER <= UINT16_MAX);
857 assert(kd_bufs[i].kdsb_size <= N_STORAGE_UNITS_PER_BUFFER *
858 sizeof(struct kd_storage));
859
860 n_elements = kd_bufs[i].kdsb_size / sizeof(struct kd_storage);
861 kds = kd_bufs[i].kdsb_addr;
862
863 for (uint16_t n = 0; n < n_elements; n++) {
864 kds[n].kds_next.buffer_index = kd_ctrl_page.kds_free_list.buffer_index;
865 kds[n].kds_next.offset = kd_ctrl_page.kds_free_list.offset;
866
867 kd_ctrl_page.kds_free_list.buffer_index = i;
868 kd_ctrl_page.kds_free_list.offset = n;
869 }
870 n_storage_units += n_elements;
871 }
872
873 bzero((char *)kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus);
874
875 for (i = 0; i < kd_ctrl_page.kdebug_cpus; i++) {
876 kdbip[i].kd_list_head.raw = KDS_PTR_NULL;
877 kdbip[i].kd_list_tail.raw = KDS_PTR_NULL;
878 kdbip[i].kd_lostevents = false;
879 kdbip[i].num_bufs = 0;
880 }
881
882 kd_ctrl_page.kdebug_flags |= KDBG_BUFINIT;
883
884 kd_ctrl_page.kds_inuse_count = 0;
885 n_storage_threshold = n_storage_units / 2;
886 out:
887 if (error) {
888 delete_buffers();
889 }
890
891 return error;
892 }
893
894 static void
895 delete_buffers(void)
896 {
897 unsigned int i;
898
899 if (kd_bufs) {
900 for (i = 0; i < n_storage_buffers; i++) {
901 if (kd_bufs[i].kdsb_addr) {
902 kmem_free(kernel_map, (vm_offset_t)kd_bufs[i].kdsb_addr, (vm_size_t)kd_bufs[i].kdsb_size);
903 }
904 }
905 kmem_free(kernel_map, (vm_offset_t)kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers)));
906
907 kd_bufs = NULL;
908 n_storage_buffers = 0;
909 }
910 if (kdcopybuf) {
911 kmem_free(kernel_map, (vm_offset_t)kdcopybuf, KDCOPYBUF_SIZE);
912
913 kdcopybuf = NULL;
914 }
915 kd_ctrl_page.kds_free_list.raw = KDS_PTR_NULL;
916
917 if (kdbip) {
918 kmem_free(kernel_map, (vm_offset_t)kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus);
919
920 kdbip = NULL;
921 }
922 kd_ctrl_page.kdebug_iops = NULL;
923 kd_ctrl_page.kdebug_cpus = 0;
924 kd_ctrl_page.kdebug_flags &= ~KDBG_BUFINIT;
925 }
926
927 void
928 release_storage_unit(int cpu, uint32_t kdsp_raw)
929 {
930 int s = 0;
931 struct kd_storage *kdsp_actual;
932 struct kd_bufinfo *kdbp;
933 union kds_ptr kdsp;
934
935 kdsp.raw = kdsp_raw;
936
937 s = ml_set_interrupts_enabled(false);
938 lck_spin_lock_grp(kds_spin_lock, kdebug_lck_grp);
939
940 kdbp = &kdbip[cpu];
941
942 if (kdsp.raw == kdbp->kd_list_head.raw) {
943 /*
944 * it's possible for the storage unit pointed to
945 * by kdsp to have already been stolen... so
946 * check to see if it's still the head of the list
947 * now that we're behind the lock that protects
948 * adding and removing from the queue...
949 * since we only ever release and steal units from
950 * that position, if it's no longer the head
951 * we having nothing to do in this context
952 */
953 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
954 kdbp->kd_list_head = kdsp_actual->kds_next;
955
956 kdsp_actual->kds_next = kd_ctrl_page.kds_free_list;
957 kd_ctrl_page.kds_free_list = kdsp;
958
959 kd_ctrl_page.kds_inuse_count--;
960 }
961 lck_spin_unlock(kds_spin_lock);
962 ml_set_interrupts_enabled(s);
963 }
964
965 bool
966 allocate_storage_unit(int cpu)
967 {
968 union kds_ptr kdsp;
969 struct kd_storage *kdsp_actual, *kdsp_next_actual;
970 struct kd_bufinfo *kdbp, *kdbp_vict, *kdbp_try;
971 uint64_t oldest_ts, ts;
972 bool retval = true;
973 int s = 0;
974
975 s = ml_set_interrupts_enabled(false);
976 lck_spin_lock_grp(kds_spin_lock, kdebug_lck_grp);
977
978 kdbp = &kdbip[cpu];
979
980 /* If someone beat us to the allocate, return success */
981 if (kdbp->kd_list_tail.raw != KDS_PTR_NULL) {
982 kdsp_actual = POINTER_FROM_KDS_PTR(kdbp->kd_list_tail);
983
984 if (kdsp_actual->kds_bufindx < EVENTS_PER_STORAGE_UNIT) {
985 goto out;
986 }
987 }
988
989 if ((kdsp = kd_ctrl_page.kds_free_list).raw != KDS_PTR_NULL) {
990 /*
991 * If there's a free page, grab it from the free list.
992 */
993 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
994 kd_ctrl_page.kds_free_list = kdsp_actual->kds_next;
995
996 kd_ctrl_page.kds_inuse_count++;
997 } else {
998 /*
999 * Otherwise, we're going to lose events and repurpose the oldest
1000 * storage unit we can find.
1001 */
1002 if (kd_ctrl_page.kdebug_flags & KDBG_NOWRAP) {
1003 kd_ctrl_page.kdebug_slowcheck |= SLOW_NOLOG;
1004 kdbp->kd_lostevents = true;
1005 retval = false;
1006 goto out;
1007 }
1008 kdbp_vict = NULL;
1009 oldest_ts = UINT64_MAX;
1010
1011 for (kdbp_try = &kdbip[0]; kdbp_try < &kdbip[kd_ctrl_page.kdebug_cpus]; kdbp_try++) {
1012 if (kdbp_try->kd_list_head.raw == KDS_PTR_NULL) {
1013 /*
1014 * no storage unit to steal
1015 */
1016 continue;
1017 }
1018
1019 kdsp_actual = POINTER_FROM_KDS_PTR(kdbp_try->kd_list_head);
1020
1021 if (kdsp_actual->kds_bufcnt < EVENTS_PER_STORAGE_UNIT) {
1022 /*
1023 * make sure we don't steal the storage unit
1024 * being actively recorded to... need to
1025 * move on because we don't want an out-of-order
1026 * set of events showing up later
1027 */
1028 continue;
1029 }
1030
1031 /*
1032 * When wrapping, steal the storage unit with the
1033 * earliest timestamp on its last event, instead of the
1034 * earliest timestamp on the first event. This allows a
1035 * storage unit with more recent events to be preserved,
1036 * even if the storage unit contains events that are
1037 * older than those found in other CPUs.
1038 */
1039 ts = kdbg_get_timestamp(&kdsp_actual->kds_records[EVENTS_PER_STORAGE_UNIT - 1]);
1040 if (ts < oldest_ts) {
1041 oldest_ts = ts;
1042 kdbp_vict = kdbp_try;
1043 }
1044 }
1045 if (kdbp_vict == NULL) {
1046 kdebug_enable = 0;
1047 kd_ctrl_page.enabled = 0;
1048 commpage_update_kdebug_state();
1049 retval = false;
1050 goto out;
1051 }
1052 kdsp = kdbp_vict->kd_list_head;
1053 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
1054 kdbp_vict->kd_list_head = kdsp_actual->kds_next;
1055
1056 if (kdbp_vict->kd_list_head.raw != KDS_PTR_NULL) {
1057 kdsp_next_actual = POINTER_FROM_KDS_PTR(kdbp_vict->kd_list_head);
1058 kdsp_next_actual->kds_lostevents = true;
1059 } else {
1060 kdbp_vict->kd_lostevents = true;
1061 }
1062
1063 if (kd_ctrl_page.oldest_time < oldest_ts) {
1064 kd_ctrl_page.oldest_time = oldest_ts;
1065 }
1066 kd_ctrl_page.kdebug_flags |= KDBG_WRAPPED;
1067 }
1068 kdsp_actual->kds_timestamp = kdbg_timestamp();
1069 kdsp_actual->kds_next.raw = KDS_PTR_NULL;
1070 kdsp_actual->kds_bufcnt = 0;
1071 kdsp_actual->kds_readlast = 0;
1072
1073 kdsp_actual->kds_lostevents = kdbp->kd_lostevents;
1074 kdbp->kd_lostevents = false;
1075 kdsp_actual->kds_bufindx = 0;
1076
1077 if (kdbp->kd_list_head.raw == KDS_PTR_NULL) {
1078 kdbp->kd_list_head = kdsp;
1079 } else {
1080 POINTER_FROM_KDS_PTR(kdbp->kd_list_tail)->kds_next = kdsp;
1081 }
1082 kdbp->kd_list_tail = kdsp;
1083 out:
1084 lck_spin_unlock(kds_spin_lock);
1085 ml_set_interrupts_enabled(s);
1086
1087 return retval;
1088 }
1089
1090 int
1091 kernel_debug_register_callback(kd_callback_t callback)
1092 {
1093 kd_iop_t* iop;
1094 if (kmem_alloc(kernel_map, (vm_offset_t *)&iop, sizeof(kd_iop_t), VM_KERN_MEMORY_DIAG) == KERN_SUCCESS) {
1095 memcpy(&iop->callback, &callback, sizeof(kd_callback_t));
1096
1097 /*
1098 * <rdar://problem/13351477> Some IOP clients are not providing a name.
1099 *
1100 * Remove when fixed.
1101 */
1102 {
1103 bool is_valid_name = false;
1104 for (uint32_t length = 0; length < sizeof(callback.iop_name); ++length) {
1105 /* This is roughly isprintable(c) */
1106 if (callback.iop_name[length] > 0x20 && callback.iop_name[length] < 0x7F) {
1107 continue;
1108 }
1109 if (callback.iop_name[length] == 0) {
1110 if (length) {
1111 is_valid_name = true;
1112 }
1113 break;
1114 }
1115 }
1116
1117 if (!is_valid_name) {
1118 strlcpy(iop->callback.iop_name, "IOP-???", sizeof(iop->callback.iop_name));
1119 }
1120 }
1121
1122 iop->last_timestamp = 0;
1123
1124 do {
1125 /*
1126 * We use two pieces of state, the old list head
1127 * pointer, and the value of old_list_head->cpu_id.
1128 * If we read kd_iops more than once, it can change
1129 * between reads.
1130 *
1131 * TLDR; Must not read kd_iops more than once per loop.
1132 */
1133 iop->next = kd_iops;
1134 iop->cpu_id = iop->next ? (iop->next->cpu_id + 1) : kdbg_cpu_count(false);
1135
1136 /*
1137 * Header says OSCompareAndSwapPtr has a memory barrier
1138 */
1139 } while (!OSCompareAndSwapPtr(iop->next, iop, (void* volatile*)&kd_iops));
1140
1141 return iop->cpu_id;
1142 }
1143
1144 return 0;
1145 }
1146
1147 void
1148 kernel_debug_enter(
1149 uint32_t coreid,
1150 uint32_t debugid,
1151 uint64_t timestamp,
1152 uintptr_t arg1,
1153 uintptr_t arg2,
1154 uintptr_t arg3,
1155 uintptr_t arg4,
1156 uintptr_t threadid
1157 )
1158 {
1159 uint32_t bindx;
1160 kd_buf *kd;
1161 struct kd_bufinfo *kdbp;
1162 struct kd_storage *kdsp_actual;
1163 union kds_ptr kds_raw;
1164
1165 if (kd_ctrl_page.kdebug_slowcheck) {
1166 if ((kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) || !(kdebug_enable & (KDEBUG_ENABLE_TRACE | KDEBUG_ENABLE_PPT))) {
1167 goto out1;
1168 }
1169
1170 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
1171 if (typefilter_is_debugid_allowed(kdbg_typefilter, debugid)) {
1172 goto record_event;
1173 }
1174 goto out1;
1175 } else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
1176 if (debugid >= kdlog_beg && debugid <= kdlog_end) {
1177 goto record_event;
1178 }
1179 goto out1;
1180 } else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
1181 if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 &&
1182 (debugid & KDBG_EVENTID_MASK) != kdlog_value2 &&
1183 (debugid & KDBG_EVENTID_MASK) != kdlog_value3 &&
1184 (debugid & KDBG_EVENTID_MASK) != kdlog_value4) {
1185 goto out1;
1186 }
1187 }
1188 }
1189
1190 record_event:
1191 if (timestamp < kd_ctrl_page.oldest_time) {
1192 goto out1;
1193 }
1194
1195 disable_preemption();
1196
1197 if (kd_ctrl_page.enabled == 0) {
1198 goto out;
1199 }
1200
1201 kdbp = &kdbip[coreid];
1202 timestamp &= KDBG_TIMESTAMP_MASK;
1203
1204 retry_q:
1205 kds_raw = kdbp->kd_list_tail;
1206
1207 if (kds_raw.raw != KDS_PTR_NULL) {
1208 kdsp_actual = POINTER_FROM_KDS_PTR(kds_raw);
1209 bindx = kdsp_actual->kds_bufindx;
1210 } else {
1211 kdsp_actual = NULL;
1212 bindx = EVENTS_PER_STORAGE_UNIT;
1213 }
1214
1215 if (kdsp_actual == NULL || bindx >= EVENTS_PER_STORAGE_UNIT) {
1216 if (allocate_storage_unit(coreid) == false) {
1217 /*
1218 * this can only happen if wrapping
1219 * has been disabled
1220 */
1221 goto out;
1222 }
1223 goto retry_q;
1224 }
1225 if (!OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx)) {
1226 goto retry_q;
1227 }
1228
1229 // IOP entries can be allocated before xnu allocates and inits the buffer
1230 if (timestamp < kdsp_actual->kds_timestamp) {
1231 kdsp_actual->kds_timestamp = timestamp;
1232 }
1233
1234 kd = &kdsp_actual->kds_records[bindx];
1235
1236 kd->debugid = debugid;
1237 kd->arg1 = arg1;
1238 kd->arg2 = arg2;
1239 kd->arg3 = arg3;
1240 kd->arg4 = arg4;
1241 kd->arg5 = threadid;
1242
1243 kdbg_set_timestamp_and_cpu(kd, timestamp, coreid);
1244
1245 OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
1246 out:
1247 enable_preemption();
1248 out1:
1249 if ((kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold)) {
1250 kdbg_wakeup();
1251 }
1252 }
1253
1254 /*
1255 * Check if the given debug ID is allowed to be traced on the current process.
1256 *
1257 * Returns true if allowed and false otherwise.
1258 */
1259 static inline bool
1260 kdebug_debugid_procfilt_allowed(uint32_t debugid)
1261 {
1262 uint32_t procfilt_flags = kd_ctrl_page.kdebug_flags &
1263 (KDBG_PIDCHECK | KDBG_PIDEXCLUDE);
1264
1265 if (!procfilt_flags) {
1266 return true;
1267 }
1268
1269 /*
1270 * DBG_TRACE and MACH_SCHED tracepoints ignore the process filter.
1271 */
1272 if ((debugid & 0xffff0000) == MACHDBG_CODE(DBG_MACH_SCHED, 0) ||
1273 (debugid >> 24 == DBG_TRACE)) {
1274 return true;
1275 }
1276
1277 struct proc *curproc = current_proc();
1278 /*
1279 * If the process is missing (early in boot), allow it.
1280 */
1281 if (!curproc) {
1282 return true;
1283 }
1284
1285 if (procfilt_flags & KDBG_PIDCHECK) {
1286 /*
1287 * Allow only processes marked with the kdebug bit.
1288 */
1289 return curproc->p_kdebug;
1290 } else if (procfilt_flags & KDBG_PIDEXCLUDE) {
1291 /*
1292 * Exclude any process marked with the kdebug bit.
1293 */
1294 return !curproc->p_kdebug;
1295 } else {
1296 panic("kdebug: invalid procfilt flags %x", kd_ctrl_page.kdebug_flags);
1297 __builtin_unreachable();
1298 }
1299 }
1300
1301 static void
1302 kernel_debug_internal(
1303 uint32_t debugid,
1304 uintptr_t arg1,
1305 uintptr_t arg2,
1306 uintptr_t arg3,
1307 uintptr_t arg4,
1308 uintptr_t arg5,
1309 uint64_t flags)
1310 {
1311 uint64_t now;
1312 uint32_t bindx;
1313 kd_buf *kd;
1314 int cpu;
1315 struct kd_bufinfo *kdbp;
1316 struct kd_storage *kdsp_actual;
1317 union kds_ptr kds_raw;
1318 bool only_filter = flags & KDBG_FLAG_FILTERED;
1319 bool observe_procfilt = !(flags & KDBG_FLAG_NOPROCFILT);
1320
1321 if (kd_ctrl_page.kdebug_slowcheck) {
1322 if ((kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) ||
1323 !(kdebug_enable & (KDEBUG_ENABLE_TRACE | KDEBUG_ENABLE_PPT))) {
1324 goto out1;
1325 }
1326
1327 if (!ml_at_interrupt_context() && observe_procfilt &&
1328 !kdebug_debugid_procfilt_allowed(debugid)) {
1329 goto out1;
1330 }
1331
1332 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
1333 if (typefilter_is_debugid_allowed(kdbg_typefilter, debugid)) {
1334 goto record_event;
1335 }
1336
1337 goto out1;
1338 } else if (only_filter) {
1339 goto out1;
1340 } else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
1341 /* Always record trace system info */
1342 if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE) {
1343 goto record_event;
1344 }
1345
1346 if (debugid < kdlog_beg || debugid > kdlog_end) {
1347 goto out1;
1348 }
1349 } else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
1350 /* Always record trace system info */
1351 if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE) {
1352 goto record_event;
1353 }
1354
1355 if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 &&
1356 (debugid & KDBG_EVENTID_MASK) != kdlog_value2 &&
1357 (debugid & KDBG_EVENTID_MASK) != kdlog_value3 &&
1358 (debugid & KDBG_EVENTID_MASK) != kdlog_value4) {
1359 goto out1;
1360 }
1361 }
1362 } else if (only_filter) {
1363 goto out1;
1364 }
1365
1366 record_event:
1367 disable_preemption();
1368
1369 if (kd_ctrl_page.enabled == 0) {
1370 goto out;
1371 }
1372
1373 cpu = cpu_number();
1374 kdbp = &kdbip[cpu];
1375
1376 retry_q:
1377 kds_raw = kdbp->kd_list_tail;
1378
1379 if (kds_raw.raw != KDS_PTR_NULL) {
1380 kdsp_actual = POINTER_FROM_KDS_PTR(kds_raw);
1381 bindx = kdsp_actual->kds_bufindx;
1382 } else {
1383 kdsp_actual = NULL;
1384 bindx = EVENTS_PER_STORAGE_UNIT;
1385 }
1386
1387 if (kdsp_actual == NULL || bindx >= EVENTS_PER_STORAGE_UNIT) {
1388 if (allocate_storage_unit(cpu) == false) {
1389 /*
1390 * this can only happen if wrapping
1391 * has been disabled
1392 */
1393 goto out;
1394 }
1395 goto retry_q;
1396 }
1397
1398 now = kdbg_timestamp() & KDBG_TIMESTAMP_MASK;
1399
1400 if (!OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx)) {
1401 goto retry_q;
1402 }
1403
1404 kd = &kdsp_actual->kds_records[bindx];
1405
1406 kd->debugid = debugid;
1407 kd->arg1 = arg1;
1408 kd->arg2 = arg2;
1409 kd->arg3 = arg3;
1410 kd->arg4 = arg4;
1411 kd->arg5 = arg5;
1412
1413 kdbg_set_timestamp_and_cpu(kd, now, cpu);
1414
1415 OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
1416
1417 #if KPERF
1418 kperf_kdebug_callback(debugid, __builtin_frame_address(0));
1419 #endif
1420 out:
1421 enable_preemption();
1422 out1:
1423 if (kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold) {
1424 uint32_t etype;
1425 uint32_t stype;
1426
1427 etype = debugid & KDBG_EVENTID_MASK;
1428 stype = debugid & KDBG_CSC_MASK;
1429
1430 if (etype == INTERRUPT || etype == MACH_vmfault ||
1431 stype == BSC_SysCall || stype == MACH_SysCall) {
1432 kdbg_wakeup();
1433 }
1434 }
1435 }
1436
1437 __attribute__((noinline))
1438 void
1439 kernel_debug(
1440 uint32_t debugid,
1441 uintptr_t arg1,
1442 uintptr_t arg2,
1443 uintptr_t arg3,
1444 uintptr_t arg4,
1445 __unused uintptr_t arg5)
1446 {
1447 kernel_debug_internal(debugid, arg1, arg2, arg3, arg4,
1448 (uintptr_t)thread_tid(current_thread()), 0);
1449 }
1450
1451 __attribute__((noinline))
1452 void
1453 kernel_debug1(
1454 uint32_t debugid,
1455 uintptr_t arg1,
1456 uintptr_t arg2,
1457 uintptr_t arg3,
1458 uintptr_t arg4,
1459 uintptr_t arg5)
1460 {
1461 kernel_debug_internal(debugid, arg1, arg2, arg3, arg4, arg5, 0);
1462 }
1463
1464 __attribute__((noinline))
1465 void
1466 kernel_debug_flags(
1467 uint32_t debugid,
1468 uintptr_t arg1,
1469 uintptr_t arg2,
1470 uintptr_t arg3,
1471 uintptr_t arg4,
1472 uint64_t flags)
1473 {
1474 kernel_debug_internal(debugid, arg1, arg2, arg3, arg4,
1475 (uintptr_t)thread_tid(current_thread()), flags);
1476 }
1477
1478 __attribute__((noinline))
1479 void
1480 kernel_debug_filtered(
1481 uint32_t debugid,
1482 uintptr_t arg1,
1483 uintptr_t arg2,
1484 uintptr_t arg3,
1485 uintptr_t arg4)
1486 {
1487 kernel_debug_flags(debugid, arg1, arg2, arg3, arg4, KDBG_FLAG_FILTERED);
1488 }
1489
1490 void
1491 kernel_debug_string_early(const char *message)
1492 {
1493 uintptr_t arg[4] = {0, 0, 0, 0};
1494
1495 /* Stuff the message string in the args and log it. */
1496 strncpy((char *)arg, message, MIN(sizeof(arg), strlen(message)));
1497 KERNEL_DEBUG_EARLY(
1498 TRACE_INFO_STRING,
1499 arg[0], arg[1], arg[2], arg[3]);
1500 }
1501
1502 #define SIMPLE_STR_LEN (64)
1503 static_assert(SIMPLE_STR_LEN % sizeof(uintptr_t) == 0);
1504
1505 void
1506 kernel_debug_string_simple(uint32_t eventid, const char *str)
1507 {
1508 if (!kdebug_enable) {
1509 return;
1510 }
1511
1512 /* array of uintptr_ts simplifies emitting the string as arguments */
1513 uintptr_t str_buf[(SIMPLE_STR_LEN / sizeof(uintptr_t)) + 1] = { 0 };
1514 size_t len = strlcpy((char *)str_buf, str, SIMPLE_STR_LEN + 1);
1515
1516 uintptr_t thread_id = (uintptr_t)thread_tid(current_thread());
1517 uint32_t debugid = eventid | DBG_FUNC_START;
1518
1519 /* string can fit in a single tracepoint */
1520 if (len <= (4 * sizeof(uintptr_t))) {
1521 debugid |= DBG_FUNC_END;
1522 }
1523
1524 kernel_debug_internal(debugid, str_buf[0],
1525 str_buf[1],
1526 str_buf[2],
1527 str_buf[3], thread_id, 0);
1528
1529 debugid &= KDBG_EVENTID_MASK;
1530 int i = 4;
1531 size_t written = 4 * sizeof(uintptr_t);
1532
1533 for (; written < len; i += 4, written += 4 * sizeof(uintptr_t)) {
1534 /* if this is the last tracepoint to be emitted */
1535 if ((written + (4 * sizeof(uintptr_t))) >= len) {
1536 debugid |= DBG_FUNC_END;
1537 }
1538 kernel_debug_internal(debugid, str_buf[i],
1539 str_buf[i + 1],
1540 str_buf[i + 2],
1541 str_buf[i + 3], thread_id, 0);
1542 }
1543 }
1544
1545 extern int master_cpu; /* MACH_KERNEL_PRIVATE */
1546 /*
1547 * Used prior to start_kern_tracing() being called.
1548 * Log temporarily into a static buffer.
1549 */
1550 void
1551 kernel_debug_early(
1552 uint32_t debugid,
1553 uintptr_t arg1,
1554 uintptr_t arg2,
1555 uintptr_t arg3,
1556 uintptr_t arg4)
1557 {
1558 #if defined(__x86_64__)
1559 extern int early_boot;
1560 /*
1561 * Note that "early" isn't early enough in some cases where
1562 * we're invoked before gsbase is set on x86, hence the
1563 * check of "early_boot".
1564 */
1565 if (early_boot) {
1566 return;
1567 }
1568 #endif
1569
1570 /* If early tracing is over, use the normal path. */
1571 if (kd_early_done) {
1572 KDBG_RELEASE(debugid, arg1, arg2, arg3, arg4);
1573 return;
1574 }
1575
1576 /* Do nothing if the buffer is full or we're not on the boot cpu. */
1577 kd_early_overflow = kd_early_index >= KD_EARLY_BUFFER_NBUFS;
1578 if (kd_early_overflow || cpu_number() != master_cpu) {
1579 return;
1580 }
1581
1582 kd_early_buffer[kd_early_index].debugid = debugid;
1583 kd_early_buffer[kd_early_index].timestamp = mach_absolute_time();
1584 kd_early_buffer[kd_early_index].arg1 = arg1;
1585 kd_early_buffer[kd_early_index].arg2 = arg2;
1586 kd_early_buffer[kd_early_index].arg3 = arg3;
1587 kd_early_buffer[kd_early_index].arg4 = arg4;
1588 kd_early_buffer[kd_early_index].arg5 = 0;
1589 kd_early_index++;
1590 }
1591
1592 /*
1593 * Transfer the contents of the temporary buffer into the trace buffers.
1594 * Precede that by logging the rebase time (offset) - the TSC-based time (in ns)
1595 * when mach_absolute_time is set to 0.
1596 */
1597 static void
1598 kernel_debug_early_end(void)
1599 {
1600 if (cpu_number() != master_cpu) {
1601 panic("kernel_debug_early_end() not call on boot processor");
1602 }
1603
1604 /* reset the current oldest time to allow early events */
1605 kd_ctrl_page.oldest_time = 0;
1606
1607 #if defined(__x86_64__)
1608 /* Fake sentinel marking the start of kernel time relative to TSC */
1609 kernel_debug_enter(0, TRACE_TIMESTAMPS, 0,
1610 (uint32_t)(tsc_rebase_abs_time >> 32), (uint32_t)tsc_rebase_abs_time,
1611 tsc_at_boot, 0, 0);
1612 #endif /* defined(__x86_64__) */
1613 for (unsigned int i = 0; i < kd_early_index; i++) {
1614 kernel_debug_enter(0,
1615 kd_early_buffer[i].debugid,
1616 kd_early_buffer[i].timestamp,
1617 kd_early_buffer[i].arg1,
1618 kd_early_buffer[i].arg2,
1619 kd_early_buffer[i].arg3,
1620 kd_early_buffer[i].arg4,
1621 0);
1622 }
1623
1624 /* Cut events-lost event on overflow */
1625 if (kd_early_overflow) {
1626 KDBG_RELEASE(TRACE_LOST_EVENTS, 1);
1627 }
1628
1629 kd_early_done = true;
1630
1631 /* This trace marks the start of kernel tracing */
1632 kernel_debug_string_early("early trace done");
1633 }
1634
1635 void
1636 kernel_debug_disable(void)
1637 {
1638 if (kdebug_enable) {
1639 kdbg_set_tracing_enabled(false, 0);
1640 }
1641 }
1642
1643 /*
1644 * Returns non-zero if debugid is in a reserved class.
1645 */
1646 static int
1647 kdebug_validate_debugid(uint32_t debugid)
1648 {
1649 uint8_t debugid_class;
1650
1651 debugid_class = KDBG_EXTRACT_CLASS(debugid);
1652 switch (debugid_class) {
1653 case DBG_TRACE:
1654 return EPERM;
1655 }
1656
1657 return 0;
1658 }
1659
1660 /*
1661 * Support syscall SYS_kdebug_typefilter.
1662 */
1663 int
1664 kdebug_typefilter(__unused struct proc* p,
1665 struct kdebug_typefilter_args* uap,
1666 __unused int *retval)
1667 {
1668 int ret = KERN_SUCCESS;
1669
1670 if (uap->addr == USER_ADDR_NULL ||
1671 uap->size == USER_ADDR_NULL) {
1672 return EINVAL;
1673 }
1674
1675 /*
1676 * The atomic load is to close a race window with setting the typefilter
1677 * and memory entry values. A description follows:
1678 *
1679 * Thread 1 (writer)
1680 *
1681 * Allocate Typefilter
1682 * Allocate MemoryEntry
1683 * Write Global MemoryEntry Ptr
1684 * Atomic Store (Release) Global Typefilter Ptr
1685 *
1686 * Thread 2 (reader, AKA us)
1687 *
1688 * if ((Atomic Load (Acquire) Global Typefilter Ptr) == NULL)
1689 * return;
1690 *
1691 * Without the atomic store, it isn't guaranteed that the write of
1692 * Global MemoryEntry Ptr is visible before we can see the write of
1693 * Global Typefilter Ptr.
1694 *
1695 * Without the atomic load, it isn't guaranteed that the loads of
1696 * Global MemoryEntry Ptr aren't speculated.
1697 *
1698 * The global pointers transition from NULL -> valid once and only once,
1699 * and never change after becoming valid. This means that having passed
1700 * the first atomic load test of Global Typefilter Ptr, this function
1701 * can then safely use the remaining global state without atomic checks.
1702 */
1703 if (!os_atomic_load(&kdbg_typefilter, acquire)) {
1704 return EINVAL;
1705 }
1706
1707 assert(kdbg_typefilter_memory_entry);
1708
1709 mach_vm_offset_t user_addr = 0;
1710 vm_map_t user_map = current_map();
1711
1712 ret = mach_to_bsd_errno(
1713 mach_vm_map_kernel(user_map, // target map
1714 &user_addr, // [in, out] target address
1715 TYPEFILTER_ALLOC_SIZE, // initial size
1716 0, // mask (alignment?)
1717 VM_FLAGS_ANYWHERE, // flags
1718 VM_MAP_KERNEL_FLAGS_NONE,
1719 VM_KERN_MEMORY_NONE,
1720 kdbg_typefilter_memory_entry, // port (memory entry!)
1721 0, // offset (in memory entry)
1722 false, // should copy
1723 VM_PROT_READ, // cur_prot
1724 VM_PROT_READ, // max_prot
1725 VM_INHERIT_SHARE)); // inherit behavior on fork
1726
1727 if (ret == KERN_SUCCESS) {
1728 vm_size_t user_ptr_size = vm_map_is_64bit(user_map) ? 8 : 4;
1729 ret = copyout(CAST_DOWN(void *, &user_addr), uap->addr, user_ptr_size );
1730
1731 if (ret != KERN_SUCCESS) {
1732 mach_vm_deallocate(user_map, user_addr, TYPEFILTER_ALLOC_SIZE);
1733 }
1734 }
1735
1736 return ret;
1737 }
1738
1739 /*
1740 * Support syscall SYS_kdebug_trace. U64->K32 args may get truncated in kdebug_trace64
1741 */
1742 int
1743 kdebug_trace(struct proc *p, struct kdebug_trace_args *uap, int32_t *retval)
1744 {
1745 struct kdebug_trace64_args uap64;
1746
1747 uap64.code = uap->code;
1748 uap64.arg1 = uap->arg1;
1749 uap64.arg2 = uap->arg2;
1750 uap64.arg3 = uap->arg3;
1751 uap64.arg4 = uap->arg4;
1752
1753 return kdebug_trace64(p, &uap64, retval);
1754 }
1755
1756 /*
1757 * Support syscall SYS_kdebug_trace64. 64-bit args on K32 will get truncated
1758 * to fit in 32-bit record format.
1759 *
1760 * It is intentional that error conditions are not checked until kdebug is
1761 * enabled. This is to match the userspace wrapper behavior, which is optimizing
1762 * for non-error case performance.
1763 */
1764 int
1765 kdebug_trace64(__unused struct proc *p, struct kdebug_trace64_args *uap, __unused int32_t *retval)
1766 {
1767 int err;
1768
1769 if (__probable(kdebug_enable == 0)) {
1770 return 0;
1771 }
1772
1773 if ((err = kdebug_validate_debugid(uap->code)) != 0) {
1774 return err;
1775 }
1776
1777 kernel_debug_internal(uap->code, (uintptr_t)uap->arg1,
1778 (uintptr_t)uap->arg2, (uintptr_t)uap->arg3, (uintptr_t)uap->arg4,
1779 (uintptr_t)thread_tid(current_thread()), 0);
1780
1781 return 0;
1782 }
1783
1784 /*
1785 * Adding enough padding to contain a full tracepoint for the last
1786 * portion of the string greatly simplifies the logic of splitting the
1787 * string between tracepoints. Full tracepoints can be generated using
1788 * the buffer itself, without having to manually add zeros to pad the
1789 * arguments.
1790 */
1791
1792 /* 2 string args in first tracepoint and 9 string data tracepoints */
1793 #define STR_BUF_ARGS (2 + (9 * 4))
1794 /* times the size of each arg on K64 */
1795 #define MAX_STR_LEN (STR_BUF_ARGS * sizeof(uint64_t))
1796 /* on K32, ending straddles a tracepoint, so reserve blanks */
1797 #define STR_BUF_SIZE (MAX_STR_LEN + (2 * sizeof(uint32_t)))
1798
1799 /*
1800 * This function does no error checking and assumes that it is called with
1801 * the correct arguments, including that the buffer pointed to by str is at
1802 * least STR_BUF_SIZE bytes. However, str must be aligned to word-size and
1803 * be NUL-terminated. In cases where a string can fit evenly into a final
1804 * tracepoint without its NUL-terminator, this function will not end those
1805 * strings with a NUL in trace. It's up to clients to look at the function
1806 * qualifier for DBG_FUNC_END in this case, to end the string.
1807 */
1808 static uint64_t
1809 kernel_debug_string_internal(uint32_t debugid, uint64_t str_id, void *vstr,
1810 size_t str_len)
1811 {
1812 /* str must be word-aligned */
1813 uintptr_t *str = vstr;
1814 size_t written = 0;
1815 uintptr_t thread_id;
1816 int i;
1817 uint32_t trace_debugid = TRACEDBG_CODE(DBG_TRACE_STRING,
1818 TRACE_STRING_GLOBAL);
1819
1820 thread_id = (uintptr_t)thread_tid(current_thread());
1821
1822 /* if the ID is being invalidated, just emit that */
1823 if (str_id != 0 && str_len == 0) {
1824 kernel_debug_internal(trace_debugid | DBG_FUNC_START | DBG_FUNC_END,
1825 (uintptr_t)debugid, (uintptr_t)str_id, 0, 0, thread_id, 0);
1826 return str_id;
1827 }
1828
1829 /* generate an ID, if necessary */
1830 if (str_id == 0) {
1831 str_id = OSIncrementAtomic64((SInt64 *)&g_curr_str_id);
1832 str_id = (str_id & STR_ID_MASK) | g_str_id_signature;
1833 }
1834
1835 trace_debugid |= DBG_FUNC_START;
1836 /* string can fit in a single tracepoint */
1837 if (str_len <= (2 * sizeof(uintptr_t))) {
1838 trace_debugid |= DBG_FUNC_END;
1839 }
1840
1841 kernel_debug_internal(trace_debugid, (uintptr_t)debugid, (uintptr_t)str_id,
1842 str[0], str[1], thread_id, 0);
1843
1844 trace_debugid &= KDBG_EVENTID_MASK;
1845 i = 2;
1846 written += 2 * sizeof(uintptr_t);
1847
1848 for (; written < str_len; i += 4, written += 4 * sizeof(uintptr_t)) {
1849 if ((written + (4 * sizeof(uintptr_t))) >= str_len) {
1850 trace_debugid |= DBG_FUNC_END;
1851 }
1852 kernel_debug_internal(trace_debugid, str[i],
1853 str[i + 1],
1854 str[i + 2],
1855 str[i + 3], thread_id, 0);
1856 }
1857
1858 return str_id;
1859 }
1860
1861 /*
1862 * Returns true if the current process can emit events, and false otherwise.
1863 * Trace system and scheduling events circumvent this check, as do events
1864 * emitted in interrupt context.
1865 */
1866 static bool
1867 kdebug_current_proc_enabled(uint32_t debugid)
1868 {
1869 /* can't determine current process in interrupt context */
1870 if (ml_at_interrupt_context()) {
1871 return true;
1872 }
1873
1874 /* always emit trace system and scheduling events */
1875 if ((KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE ||
1876 (debugid & KDBG_CSC_MASK) == MACHDBG_CODE(DBG_MACH_SCHED, 0))) {
1877 return true;
1878 }
1879
1880 if (kd_ctrl_page.kdebug_flags & KDBG_PIDCHECK) {
1881 proc_t cur_proc = current_proc();
1882
1883 /* only the process with the kdebug bit set is allowed */
1884 if (cur_proc && !(cur_proc->p_kdebug)) {
1885 return false;
1886 }
1887 } else if (kd_ctrl_page.kdebug_flags & KDBG_PIDEXCLUDE) {
1888 proc_t cur_proc = current_proc();
1889
1890 /* every process except the one with the kdebug bit set is allowed */
1891 if (cur_proc && cur_proc->p_kdebug) {
1892 return false;
1893 }
1894 }
1895
1896 return true;
1897 }
1898
1899 bool
1900 kdebug_debugid_enabled(uint32_t debugid)
1901 {
1902 /* if no filtering is enabled */
1903 if (!kd_ctrl_page.kdebug_slowcheck) {
1904 return true;
1905 }
1906
1907 return kdebug_debugid_explicitly_enabled(debugid);
1908 }
1909
1910 bool
1911 kdebug_debugid_explicitly_enabled(uint32_t debugid)
1912 {
1913 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
1914 return typefilter_is_debugid_allowed(kdbg_typefilter, debugid);
1915 } else if (KDBG_EXTRACT_CLASS(debugid) == DBG_TRACE) {
1916 return true;
1917 } else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
1918 if (debugid < kdlog_beg || debugid > kdlog_end) {
1919 return false;
1920 }
1921 } else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
1922 if ((debugid & KDBG_EVENTID_MASK) != kdlog_value1 &&
1923 (debugid & KDBG_EVENTID_MASK) != kdlog_value2 &&
1924 (debugid & KDBG_EVENTID_MASK) != kdlog_value3 &&
1925 (debugid & KDBG_EVENTID_MASK) != kdlog_value4) {
1926 return false;
1927 }
1928 }
1929
1930 return true;
1931 }
1932
1933 bool
1934 kdebug_using_continuous_time(void)
1935 {
1936 return kdebug_enable & KDEBUG_ENABLE_CONT_TIME;
1937 }
1938
1939 /*
1940 * Returns 0 if a string can be traced with these arguments. Returns errno
1941 * value if error occurred.
1942 */
1943 static errno_t
1944 kdebug_check_trace_string(uint32_t debugid, uint64_t str_id)
1945 {
1946 /* if there are function qualifiers on the debugid */
1947 if (debugid & ~KDBG_EVENTID_MASK) {
1948 return EINVAL;
1949 }
1950
1951 if (kdebug_validate_debugid(debugid)) {
1952 return EPERM;
1953 }
1954
1955 if (str_id != 0 && (str_id & STR_ID_SIG_MASK) != g_str_id_signature) {
1956 return EINVAL;
1957 }
1958
1959 return 0;
1960 }
1961
1962 /*
1963 * Implementation of KPI kernel_debug_string.
1964 */
1965 int
1966 kernel_debug_string(uint32_t debugid, uint64_t *str_id, const char *str)
1967 {
1968 /* arguments to tracepoints must be word-aligned */
1969 __attribute__((aligned(sizeof(uintptr_t)))) char str_buf[STR_BUF_SIZE];
1970 static_assert(sizeof(str_buf) > MAX_STR_LEN);
1971 vm_size_t len_copied;
1972 int err;
1973
1974 assert(str_id);
1975
1976 if (__probable(kdebug_enable == 0)) {
1977 return 0;
1978 }
1979
1980 if (!kdebug_current_proc_enabled(debugid)) {
1981 return 0;
1982 }
1983
1984 if (!kdebug_debugid_enabled(debugid)) {
1985 return 0;
1986 }
1987
1988 if ((err = kdebug_check_trace_string(debugid, *str_id)) != 0) {
1989 return err;
1990 }
1991
1992 if (str == NULL) {
1993 if (str_id == 0) {
1994 return EINVAL;
1995 }
1996
1997 *str_id = kernel_debug_string_internal(debugid, *str_id, NULL, 0);
1998 return 0;
1999 }
2000
2001 memset(str_buf, 0, sizeof(str_buf));
2002 len_copied = strlcpy(str_buf, str, MAX_STR_LEN + 1);
2003 *str_id = kernel_debug_string_internal(debugid, *str_id, str_buf,
2004 len_copied);
2005 return 0;
2006 }
2007
2008 /*
2009 * Support syscall kdebug_trace_string.
2010 */
2011 int
2012 kdebug_trace_string(__unused struct proc *p,
2013 struct kdebug_trace_string_args *uap,
2014 uint64_t *retval)
2015 {
2016 __attribute__((aligned(sizeof(uintptr_t)))) char str_buf[STR_BUF_SIZE];
2017 static_assert(sizeof(str_buf) > MAX_STR_LEN);
2018 size_t len_copied;
2019 int err;
2020
2021 if (__probable(kdebug_enable == 0)) {
2022 return 0;
2023 }
2024
2025 if (!kdebug_current_proc_enabled(uap->debugid)) {
2026 return 0;
2027 }
2028
2029 if (!kdebug_debugid_enabled(uap->debugid)) {
2030 return 0;
2031 }
2032
2033 if ((err = kdebug_check_trace_string(uap->debugid, uap->str_id)) != 0) {
2034 return err;
2035 }
2036
2037 if (uap->str == USER_ADDR_NULL) {
2038 if (uap->str_id == 0) {
2039 return EINVAL;
2040 }
2041
2042 *retval = kernel_debug_string_internal(uap->debugid, uap->str_id,
2043 NULL, 0);
2044 return 0;
2045 }
2046
2047 memset(str_buf, 0, sizeof(str_buf));
2048 err = copyinstr(uap->str, str_buf, MAX_STR_LEN + 1, &len_copied);
2049
2050 /* it's alright to truncate the string, so allow ENAMETOOLONG */
2051 if (err == ENAMETOOLONG) {
2052 str_buf[MAX_STR_LEN] = '\0';
2053 } else if (err) {
2054 return err;
2055 }
2056
2057 if (len_copied <= 1) {
2058 return EINVAL;
2059 }
2060
2061 /* convert back to a length */
2062 len_copied--;
2063
2064 *retval = kernel_debug_string_internal(uap->debugid, uap->str_id, str_buf,
2065 len_copied);
2066 return 0;
2067 }
2068
2069 static void
2070 kdbg_lock_init(void)
2071 {
2072 static lck_grp_attr_t *kdebug_lck_grp_attr = NULL;
2073 static lck_attr_t *kdebug_lck_attr = NULL;
2074
2075 if (kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT) {
2076 return;
2077 }
2078
2079 assert(kdebug_lck_grp_attr == NULL);
2080 kdebug_lck_grp_attr = lck_grp_attr_alloc_init();
2081 kdebug_lck_grp = lck_grp_alloc_init("kdebug", kdebug_lck_grp_attr);
2082 kdebug_lck_attr = lck_attr_alloc_init();
2083
2084 kds_spin_lock = lck_spin_alloc_init(kdebug_lck_grp, kdebug_lck_attr);
2085 kdw_spin_lock = lck_spin_alloc_init(kdebug_lck_grp, kdebug_lck_attr);
2086
2087 kd_ctrl_page.kdebug_flags |= KDBG_LOCKINIT;
2088 }
2089
2090 int
2091 kdbg_bootstrap(bool early_trace)
2092 {
2093 kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
2094
2095 return create_buffers(early_trace);
2096 }
2097
2098 int
2099 kdbg_reinit(bool early_trace)
2100 {
2101 int ret = 0;
2102
2103 /*
2104 * Disable trace collecting
2105 * First make sure we're not in
2106 * the middle of cutting a trace
2107 */
2108 kernel_debug_disable();
2109
2110 /*
2111 * make sure the SLOW_NOLOG is seen
2112 * by everyone that might be trying
2113 * to cut a trace..
2114 */
2115 IOSleep(100);
2116
2117 delete_buffers();
2118
2119 kdbg_clear_thread_map();
2120 ret = kdbg_bootstrap(early_trace);
2121
2122 RAW_file_offset = 0;
2123 RAW_file_written = 0;
2124
2125 return ret;
2126 }
2127
2128 void
2129 kdbg_trace_data(struct proc *proc, long *arg_pid, long *arg_uniqueid)
2130 {
2131 if (!proc) {
2132 *arg_pid = 0;
2133 *arg_uniqueid = 0;
2134 } else {
2135 *arg_pid = proc->p_pid;
2136 /* Fit in a trace point */
2137 *arg_uniqueid = (long)proc->p_uniqueid;
2138 if ((uint64_t) *arg_uniqueid != proc->p_uniqueid) {
2139 *arg_uniqueid = 0;
2140 }
2141 }
2142 }
2143
2144
2145 void
2146 kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3,
2147 long *arg4)
2148 {
2149 if (!proc) {
2150 *arg1 = 0;
2151 *arg2 = 0;
2152 *arg3 = 0;
2153 *arg4 = 0;
2154 return;
2155 }
2156
2157 const char *procname = proc_best_name(proc);
2158 size_t namelen = strlen(procname);
2159
2160 long args[4] = { 0 };
2161
2162 if (namelen > sizeof(args)) {
2163 namelen = sizeof(args);
2164 }
2165
2166 strncpy((char *)args, procname, namelen);
2167
2168 *arg1 = args[0];
2169 *arg2 = args[1];
2170 *arg3 = args[2];
2171 *arg4 = args[3];
2172 }
2173
2174 /*
2175 *
2176 * Writes a cpumap for the given iops_list/cpu_count to the provided buffer.
2177 *
2178 * You may provide a buffer and size, or if you set the buffer to NULL, a
2179 * buffer of sufficient size will be allocated.
2180 *
2181 * If you provide a buffer and it is too small, sets cpumap_size to the number
2182 * of bytes required and returns EINVAL.
2183 *
2184 * On success, if you provided a buffer, cpumap_size is set to the number of
2185 * bytes written. If you did not provide a buffer, cpumap is set to the newly
2186 * allocated buffer and cpumap_size is set to the number of bytes allocated.
2187 *
2188 * NOTE: It may seem redundant to pass both iops and a cpu_count.
2189 *
2190 * We may be reporting data from "now", or from the "past".
2191 *
2192 * The "past" data would be for kdbg_readcpumap().
2193 *
2194 * If we do not pass both iops and cpu_count, and iops is NULL, this function
2195 * will need to read "now" state to get the number of cpus, which would be in
2196 * error if we were reporting "past" state.
2197 */
2198
2199 int
2200 kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count, uint8_t** cpumap, uint32_t* cpumap_size)
2201 {
2202 assert(cpumap);
2203 assert(cpumap_size);
2204 assert(cpu_count);
2205 assert(!iops || iops->cpu_id + 1 == cpu_count);
2206
2207 uint32_t bytes_needed = sizeof(kd_cpumap_header) + cpu_count * sizeof(kd_cpumap);
2208 uint32_t bytes_available = *cpumap_size;
2209 *cpumap_size = bytes_needed;
2210
2211 if (*cpumap == NULL) {
2212 if (kmem_alloc(kernel_map, (vm_offset_t*)cpumap, (vm_size_t)*cpumap_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
2213 return ENOMEM;
2214 }
2215 bzero(*cpumap, *cpumap_size);
2216 } else if (bytes_available < bytes_needed) {
2217 return EINVAL;
2218 }
2219
2220 kd_cpumap_header* header = (kd_cpumap_header*)(uintptr_t)*cpumap;
2221
2222 header->version_no = RAW_VERSION1;
2223 header->cpu_count = cpu_count;
2224
2225 kd_cpumap* cpus = (kd_cpumap*)&header[1];
2226
2227 int32_t index = cpu_count - 1;
2228 while (iops) {
2229 cpus[index].cpu_id = iops->cpu_id;
2230 cpus[index].flags = KDBG_CPUMAP_IS_IOP;
2231 strlcpy(cpus[index].name, iops->callback.iop_name, sizeof(cpus->name));
2232
2233 iops = iops->next;
2234 index--;
2235 }
2236
2237 while (index >= 0) {
2238 cpus[index].cpu_id = index;
2239 cpus[index].flags = 0;
2240 strlcpy(cpus[index].name, "AP", sizeof(cpus->name));
2241
2242 index--;
2243 }
2244
2245 return KERN_SUCCESS;
2246 }
2247
2248 void
2249 kdbg_thrmap_init(void)
2250 {
2251 ktrace_assert_lock_held();
2252
2253 if (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) {
2254 return;
2255 }
2256
2257 kd_mapptr = kdbg_thrmap_init_internal(0, &kd_mapsize, &kd_mapcount);
2258
2259 if (kd_mapptr) {
2260 kd_ctrl_page.kdebug_flags |= KDBG_MAPINIT;
2261 }
2262 }
2263
2264 static void
2265 kd_resolve_map(thread_t thread, void *opaque)
2266 {
2267 struct kd_resolver *resolve = opaque;
2268
2269 if (resolve->krs_count < resolve->krs_maxcount) {
2270 kd_threadmap *map = &resolve->krs_map[resolve->krs_count];
2271 struct kd_task_name *task_name = resolve->krs_task;
2272 map->thread = (uintptr_t)thread_tid(thread);
2273
2274 (void)strlcpy(map->command, task_name->ktn_name, sizeof(map->command));
2275 /*
2276 * Kernel threads should still be marked with non-zero valid bit.
2277 */
2278 pid_t pid = resolve->krs_task->ktn_pid;
2279 map->valid = pid == 0 ? 1 : pid;
2280 resolve->krs_count++;
2281 }
2282 }
2283
2284 static vm_size_t
2285 kd_resolve_tasks(struct kd_task_name *task_names, vm_size_t ntasks)
2286 {
2287 vm_size_t i = 0;
2288 proc_t p = PROC_NULL;
2289
2290 proc_list_lock();
2291 ALLPROC_FOREACH(p) {
2292 if (i >= ntasks) {
2293 break;
2294 }
2295 /*
2296 * Only record processes that can be referenced and are not exiting.
2297 */
2298 if (p->task && (p->p_lflag & P_LEXIT) == 0) {
2299 task_reference(p->task);
2300 task_names[i].ktn_task = p->task;
2301 task_names[i].ktn_pid = p->p_pid;
2302 (void)strlcpy(task_names[i].ktn_name, proc_best_name(p),
2303 sizeof(task_names[i].ktn_name));
2304 i++;
2305 }
2306 }
2307 proc_list_unlock();
2308
2309 return i;
2310 }
2311
2312 static vm_size_t
2313 kd_resolve_threads(kd_threadmap *map, struct kd_task_name *task_names,
2314 vm_size_t ntasks, vm_size_t nthreads)
2315 {
2316 struct kd_resolver resolver = {
2317 .krs_map = map, .krs_count = 0, .krs_maxcount = nthreads,
2318 };
2319
2320 for (int i = 0; i < ntasks; i++) {
2321 struct kd_task_name *cur_task = &task_names[i];
2322 resolver.krs_task = cur_task;
2323 task_act_iterate_wth_args(cur_task->ktn_task, kd_resolve_map,
2324 &resolver);
2325 task_deallocate(cur_task->ktn_task);
2326 }
2327
2328 return resolver.krs_count;
2329 }
2330
2331 static kd_threadmap *
2332 kdbg_thrmap_init_internal(size_t maxthreads, vm_size_t *mapsize,
2333 vm_size_t *mapcount)
2334 {
2335 kd_threadmap *thread_map = NULL;
2336 struct kd_task_name *task_names;
2337 vm_size_t names_size = 0;
2338
2339 assert(mapsize != NULL);
2340 assert(mapcount != NULL);
2341
2342 vm_size_t nthreads = threads_count;
2343 vm_size_t ntasks = tasks_count;
2344
2345 /*
2346 * Allow 25% more threads and tasks to be created between now and taking the
2347 * proc_list_lock.
2348 */
2349 if (os_add_overflow(nthreads, nthreads / 4, &nthreads) ||
2350 os_add_overflow(ntasks, ntasks / 4, &ntasks)) {
2351 return NULL;
2352 }
2353
2354 *mapcount = nthreads;
2355 if (os_mul_overflow(nthreads, sizeof(kd_threadmap), mapsize)) {
2356 return NULL;
2357 }
2358 if (os_mul_overflow(ntasks, sizeof(task_names[0]), &names_size)) {
2359 return NULL;
2360 }
2361
2362 /*
2363 * Wait until the out-parameters have been filled with the needed size to
2364 * do the bounds checking on the provided maximum.
2365 */
2366 if (maxthreads != 0 && maxthreads < nthreads) {
2367 return NULL;
2368 }
2369
2370 thread_map = kalloc_tag(*mapsize, VM_KERN_MEMORY_DIAG);
2371 bzero(thread_map, *mapsize);
2372 task_names = kheap_alloc(KHEAP_TEMP, names_size, Z_WAITOK | Z_ZERO);
2373 ntasks = kd_resolve_tasks(task_names, ntasks);
2374 *mapcount = kd_resolve_threads(thread_map, task_names, ntasks, nthreads);
2375 kheap_free(KHEAP_TEMP, task_names, names_size);
2376 return thread_map;
2377 }
2378
2379 static void
2380 kdbg_clear(void)
2381 {
2382 /*
2383 * Clean up the trace buffer
2384 * First make sure we're not in
2385 * the middle of cutting a trace
2386 */
2387 kernel_debug_disable();
2388 kdbg_disable_typefilter();
2389
2390 /*
2391 * make sure the SLOW_NOLOG is seen
2392 * by everyone that might be trying
2393 * to cut a trace..
2394 */
2395 IOSleep(100);
2396
2397 /* reset kdebug state for each process */
2398 if (kd_ctrl_page.kdebug_flags & (KDBG_PIDCHECK | KDBG_PIDEXCLUDE)) {
2399 proc_list_lock();
2400 proc_t p;
2401 ALLPROC_FOREACH(p) {
2402 p->p_kdebug = 0;
2403 }
2404 proc_list_unlock();
2405 }
2406
2407 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2408 kd_ctrl_page.kdebug_flags &= ~(KDBG_NOWRAP | KDBG_RANGECHECK | KDBG_VALCHECK);
2409 kd_ctrl_page.kdebug_flags &= ~(KDBG_PIDCHECK | KDBG_PIDEXCLUDE);
2410
2411 kd_ctrl_page.oldest_time = 0;
2412
2413 delete_buffers();
2414 nkdbufs = 0;
2415
2416 /* Clean up the thread map buffer */
2417 kdbg_clear_thread_map();
2418
2419 RAW_file_offset = 0;
2420 RAW_file_written = 0;
2421 }
2422
2423 void
2424 kdebug_reset(void)
2425 {
2426 ktrace_assert_lock_held();
2427
2428 kdbg_lock_init();
2429
2430 kdbg_clear();
2431 if (kdbg_typefilter) {
2432 typefilter_reject_all(kdbg_typefilter);
2433 typefilter_allow_class(kdbg_typefilter, DBG_TRACE);
2434 }
2435 }
2436
2437 void
2438 kdebug_free_early_buf(void)
2439 {
2440 #if defined(__x86_64__)
2441 /*
2442 * Make Intel aware that the early buffer is no longer being used. ARM
2443 * handles this as part of the BOOTDATA segment.
2444 */
2445 ml_static_mfree((vm_offset_t)&kd_early_buffer, sizeof(kd_early_buffer));
2446 #endif /* defined(__x86_64__) */
2447 }
2448
2449 int
2450 kdbg_setpid(kd_regtype *kdr)
2451 {
2452 pid_t pid;
2453 int flag, ret = 0;
2454 struct proc *p;
2455
2456 pid = (pid_t)kdr->value1;
2457 flag = (int)kdr->value2;
2458
2459 if (pid >= 0) {
2460 if ((p = proc_find(pid)) == NULL) {
2461 ret = ESRCH;
2462 } else {
2463 if (flag == 1) {
2464 /*
2465 * turn on pid check for this and all pids
2466 */
2467 kd_ctrl_page.kdebug_flags |= KDBG_PIDCHECK;
2468 kd_ctrl_page.kdebug_flags &= ~KDBG_PIDEXCLUDE;
2469 kdbg_set_flags(SLOW_CHECKS, 0, true);
2470
2471 p->p_kdebug = 1;
2472 } else {
2473 /*
2474 * turn off pid check for this pid value
2475 * Don't turn off all pid checking though
2476 *
2477 * kd_ctrl_page.kdebug_flags &= ~KDBG_PIDCHECK;
2478 */
2479 p->p_kdebug = 0;
2480 }
2481 proc_rele(p);
2482 }
2483 } else {
2484 ret = EINVAL;
2485 }
2486
2487 return ret;
2488 }
2489
2490 /* This is for pid exclusion in the trace buffer */
2491 int
2492 kdbg_setpidex(kd_regtype *kdr)
2493 {
2494 pid_t pid;
2495 int flag, ret = 0;
2496 struct proc *p;
2497
2498 pid = (pid_t)kdr->value1;
2499 flag = (int)kdr->value2;
2500
2501 if (pid >= 0) {
2502 if ((p = proc_find(pid)) == NULL) {
2503 ret = ESRCH;
2504 } else {
2505 if (flag == 1) {
2506 /*
2507 * turn on pid exclusion
2508 */
2509 kd_ctrl_page.kdebug_flags |= KDBG_PIDEXCLUDE;
2510 kd_ctrl_page.kdebug_flags &= ~KDBG_PIDCHECK;
2511 kdbg_set_flags(SLOW_CHECKS, 0, true);
2512
2513 p->p_kdebug = 1;
2514 } else {
2515 /*
2516 * turn off pid exclusion for this pid value
2517 * Don't turn off all pid exclusion though
2518 *
2519 * kd_ctrl_page.kdebug_flags &= ~KDBG_PIDEXCLUDE;
2520 */
2521 p->p_kdebug = 0;
2522 }
2523 proc_rele(p);
2524 }
2525 } else {
2526 ret = EINVAL;
2527 }
2528
2529 return ret;
2530 }
2531
2532 /*
2533 * The following functions all operate on the "global" typefilter singleton.
2534 */
2535
2536 /*
2537 * The tf param is optional, you may pass either a valid typefilter or NULL.
2538 * If you pass a valid typefilter, you release ownership of that typefilter.
2539 */
2540 static int
2541 kdbg_initialize_typefilter(typefilter_t tf)
2542 {
2543 ktrace_assert_lock_held();
2544 assert(!kdbg_typefilter);
2545 assert(!kdbg_typefilter_memory_entry);
2546 typefilter_t deallocate_tf = NULL;
2547
2548 if (!tf && ((tf = deallocate_tf = typefilter_create()) == NULL)) {
2549 return ENOMEM;
2550 }
2551
2552 if ((kdbg_typefilter_memory_entry = typefilter_create_memory_entry(tf)) == MACH_PORT_NULL) {
2553 if (deallocate_tf) {
2554 typefilter_deallocate(deallocate_tf);
2555 }
2556 return ENOMEM;
2557 }
2558
2559 /*
2560 * The atomic store closes a race window with
2561 * the kdebug_typefilter syscall, which assumes
2562 * that any non-null kdbg_typefilter means a
2563 * valid memory_entry is available.
2564 */
2565 os_atomic_store(&kdbg_typefilter, tf, release);
2566
2567 return KERN_SUCCESS;
2568 }
2569
2570 static int
2571 kdbg_copyin_typefilter(user_addr_t addr, size_t size)
2572 {
2573 int ret = ENOMEM;
2574 typefilter_t tf;
2575
2576 ktrace_assert_lock_held();
2577
2578 if (size != KDBG_TYPEFILTER_BITMAP_SIZE) {
2579 return EINVAL;
2580 }
2581
2582 if ((tf = typefilter_create())) {
2583 if ((ret = copyin(addr, tf, KDBG_TYPEFILTER_BITMAP_SIZE)) == 0) {
2584 /* The kernel typefilter must always allow DBG_TRACE */
2585 typefilter_allow_class(tf, DBG_TRACE);
2586
2587 /*
2588 * If this is the first typefilter; claim it.
2589 * Otherwise copy and deallocate.
2590 *
2591 * Allocating a typefilter for the copyin allows
2592 * the kernel to hold the invariant that DBG_TRACE
2593 * must always be allowed.
2594 */
2595 if (!kdbg_typefilter) {
2596 if ((ret = kdbg_initialize_typefilter(tf))) {
2597 return ret;
2598 }
2599 tf = NULL;
2600 } else {
2601 typefilter_copy(kdbg_typefilter, tf);
2602 }
2603
2604 kdbg_enable_typefilter();
2605 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_TYPEFILTER_CHANGED, kdbg_typefilter);
2606 }
2607
2608 if (tf) {
2609 typefilter_deallocate(tf);
2610 }
2611 }
2612
2613 return ret;
2614 }
2615
2616 /*
2617 * Enable the flags in the control page for the typefilter. Assumes that
2618 * kdbg_typefilter has already been allocated, so events being written
2619 * don't see a bad typefilter.
2620 */
2621 static void
2622 kdbg_enable_typefilter(void)
2623 {
2624 assert(kdbg_typefilter);
2625 kd_ctrl_page.kdebug_flags &= ~(KDBG_RANGECHECK | KDBG_VALCHECK);
2626 kd_ctrl_page.kdebug_flags |= KDBG_TYPEFILTER_CHECK;
2627 kdbg_set_flags(SLOW_CHECKS, 0, true);
2628 commpage_update_kdebug_state();
2629 }
2630
2631 /*
2632 * Disable the flags in the control page for the typefilter. The typefilter
2633 * may be safely deallocated shortly after this function returns.
2634 */
2635 static void
2636 kdbg_disable_typefilter(void)
2637 {
2638 bool notify_iops = kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK;
2639 kd_ctrl_page.kdebug_flags &= ~KDBG_TYPEFILTER_CHECK;
2640
2641 if ((kd_ctrl_page.kdebug_flags & (KDBG_PIDCHECK | KDBG_PIDEXCLUDE))) {
2642 kdbg_set_flags(SLOW_CHECKS, 0, true);
2643 } else {
2644 kdbg_set_flags(SLOW_CHECKS, 0, false);
2645 }
2646 commpage_update_kdebug_state();
2647
2648 if (notify_iops) {
2649 /*
2650 * Notify IOPs that the typefilter will now allow everything.
2651 * Otherwise, they won't know a typefilter is no longer in
2652 * effect.
2653 */
2654 typefilter_allow_all(kdbg_typefilter);
2655 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops,
2656 KD_CALLBACK_TYPEFILTER_CHANGED, kdbg_typefilter);
2657 }
2658 }
2659
2660 uint32_t
2661 kdebug_commpage_state(void)
2662 {
2663 if (kdebug_enable) {
2664 if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
2665 return KDEBUG_COMMPAGE_ENABLE_TYPEFILTER | KDEBUG_COMMPAGE_ENABLE_TRACE;
2666 }
2667
2668 return KDEBUG_COMMPAGE_ENABLE_TRACE;
2669 }
2670
2671 return 0;
2672 }
2673
2674 int
2675 kdbg_setreg(kd_regtype * kdr)
2676 {
2677 int ret = 0;
2678 unsigned int val_1, val_2, val;
2679 switch (kdr->type) {
2680 case KDBG_CLASSTYPE:
2681 val_1 = (kdr->value1 & 0xff);
2682 val_2 = (kdr->value2 & 0xff);
2683 kdlog_beg = (val_1 << 24);
2684 kdlog_end = (val_2 << 24);
2685 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2686 kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK; /* Turn off specific value check */
2687 kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_CLASSTYPE);
2688 kdbg_set_flags(SLOW_CHECKS, 0, true);
2689 break;
2690 case KDBG_SUBCLSTYPE:
2691 val_1 = (kdr->value1 & 0xff);
2692 val_2 = (kdr->value2 & 0xff);
2693 val = val_2 + 1;
2694 kdlog_beg = ((val_1 << 24) | (val_2 << 16));
2695 kdlog_end = ((val_1 << 24) | (val << 16));
2696 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2697 kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK; /* Turn off specific value check */
2698 kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_SUBCLSTYPE);
2699 kdbg_set_flags(SLOW_CHECKS, 0, true);
2700 break;
2701 case KDBG_RANGETYPE:
2702 kdlog_beg = (kdr->value1);
2703 kdlog_end = (kdr->value2);
2704 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2705 kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK; /* Turn off specific value check */
2706 kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_RANGETYPE);
2707 kdbg_set_flags(SLOW_CHECKS, 0, true);
2708 break;
2709 case KDBG_VALCHECK:
2710 kdlog_value1 = (kdr->value1);
2711 kdlog_value2 = (kdr->value2);
2712 kdlog_value3 = (kdr->value3);
2713 kdlog_value4 = (kdr->value4);
2714 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2715 kd_ctrl_page.kdebug_flags &= ~KDBG_RANGECHECK; /* Turn off range check */
2716 kd_ctrl_page.kdebug_flags |= KDBG_VALCHECK; /* Turn on specific value check */
2717 kdbg_set_flags(SLOW_CHECKS, 0, true);
2718 break;
2719 case KDBG_TYPENONE:
2720 kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
2721
2722 if ((kd_ctrl_page.kdebug_flags & (KDBG_RANGECHECK | KDBG_VALCHECK |
2723 KDBG_PIDCHECK | KDBG_PIDEXCLUDE |
2724 KDBG_TYPEFILTER_CHECK))) {
2725 kdbg_set_flags(SLOW_CHECKS, 0, true);
2726 } else {
2727 kdbg_set_flags(SLOW_CHECKS, 0, false);
2728 }
2729
2730 kdlog_beg = 0;
2731 kdlog_end = 0;
2732 break;
2733 default:
2734 ret = EINVAL;
2735 break;
2736 }
2737 return ret;
2738 }
2739
2740 static int
2741 kdbg_write_to_vnode(caddr_t buffer, size_t size, vnode_t vp, vfs_context_t ctx, off_t file_offset)
2742 {
2743 assert(size < INT_MAX);
2744 return vn_rdwr(UIO_WRITE, vp, buffer, (int)size, file_offset, UIO_SYSSPACE, IO_NODELOCKED | IO_UNIT,
2745 vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
2746 }
2747
2748 int
2749 kdbg_write_v3_chunk_header(user_addr_t buffer, uint32_t tag, uint32_t sub_tag, uint64_t length, vnode_t vp, vfs_context_t ctx)
2750 {
2751 int ret = KERN_SUCCESS;
2752 kd_chunk_header_v3 header = {
2753 .tag = tag,
2754 .sub_tag = sub_tag,
2755 .length = length,
2756 };
2757
2758 // Check that only one of them is valid
2759 assert(!buffer ^ !vp);
2760 assert((vp == NULL) || (ctx != NULL));
2761
2762 // Write the 8-byte future_chunk_timestamp field in the payload
2763 if (buffer || vp) {
2764 if (vp) {
2765 ret = kdbg_write_to_vnode((caddr_t)&header, sizeof(kd_chunk_header_v3), vp, ctx, RAW_file_offset);
2766 if (ret) {
2767 goto write_error;
2768 }
2769 RAW_file_offset += (sizeof(kd_chunk_header_v3));
2770 } else {
2771 ret = copyout(&header, buffer, sizeof(kd_chunk_header_v3));
2772 if (ret) {
2773 goto write_error;
2774 }
2775 }
2776 }
2777 write_error:
2778 return ret;
2779 }
2780
2781 static int
2782 kdbg_write_v3_chunk_to_fd(uint32_t tag, uint32_t sub_tag, uint64_t length, void *payload, uint64_t payload_size, int fd)
2783 {
2784 proc_t p;
2785 struct vfs_context context;
2786 struct fileproc *fp;
2787 vnode_t vp;
2788 p = current_proc();
2789
2790 if (fp_get_ftype(p, fd, DTYPE_VNODE, EBADF, &fp)) {
2791 return EBADF;
2792 }
2793
2794 vp = fp->fp_glob->fg_data;
2795 context.vc_thread = current_thread();
2796 context.vc_ucred = fp->fp_glob->fg_cred;
2797
2798 if ((vnode_getwithref(vp)) == 0) {
2799 RAW_file_offset = fp->fp_glob->fg_offset;
2800
2801 kd_chunk_header_v3 chunk_header = {
2802 .tag = tag,
2803 .sub_tag = sub_tag,
2804 .length = length,
2805 };
2806
2807 int ret = kdbg_write_to_vnode((caddr_t) &chunk_header, sizeof(kd_chunk_header_v3), vp, &context, RAW_file_offset);
2808 if (!ret) {
2809 RAW_file_offset += sizeof(kd_chunk_header_v3);
2810 }
2811
2812 ret = kdbg_write_to_vnode((caddr_t) payload, (size_t) payload_size, vp, &context, RAW_file_offset);
2813 if (!ret) {
2814 RAW_file_offset += payload_size;
2815 }
2816
2817 fp->fp_glob->fg_offset = RAW_file_offset;
2818 vnode_put(vp);
2819 }
2820
2821 fp_drop(p, fd, fp, 0);
2822 return KERN_SUCCESS;
2823 }
2824
2825 user_addr_t
2826 kdbg_write_v3_event_chunk_header(user_addr_t buffer, uint32_t tag, uint64_t length, vnode_t vp, vfs_context_t ctx)
2827 {
2828 uint64_t future_chunk_timestamp = 0;
2829 length += sizeof(uint64_t);
2830
2831 if (kdbg_write_v3_chunk_header(buffer, tag, V3_EVENT_DATA_VERSION, length, vp, ctx)) {
2832 return 0;
2833 }
2834 if (buffer) {
2835 buffer += sizeof(kd_chunk_header_v3);
2836 }
2837
2838 // Check that only one of them is valid
2839 assert(!buffer ^ !vp);
2840 assert((vp == NULL) || (ctx != NULL));
2841
2842 // Write the 8-byte future_chunk_timestamp field in the payload
2843 if (buffer || vp) {
2844 if (vp) {
2845 int ret = kdbg_write_to_vnode((caddr_t)&future_chunk_timestamp, sizeof(uint64_t), vp, ctx, RAW_file_offset);
2846 if (!ret) {
2847 RAW_file_offset += (sizeof(uint64_t));
2848 }
2849 } else {
2850 if (copyout(&future_chunk_timestamp, buffer, sizeof(uint64_t))) {
2851 return 0;
2852 }
2853 }
2854 }
2855
2856 return buffer + sizeof(uint64_t);
2857 }
2858
2859 int
2860 kdbg_write_v3_header(user_addr_t user_header, size_t *user_header_size, int fd)
2861 {
2862 int ret = KERN_SUCCESS;
2863
2864 uint8_t* cpumap = 0;
2865 uint32_t cpumap_size = 0;
2866 uint32_t thrmap_size = 0;
2867
2868 size_t bytes_needed = 0;
2869
2870 // Check that only one of them is valid
2871 assert(!user_header ^ !fd);
2872 assert(user_header_size);
2873
2874 if (!(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT)) {
2875 ret = EINVAL;
2876 goto bail;
2877 }
2878
2879 if (!(user_header || fd)) {
2880 ret = EINVAL;
2881 goto bail;
2882 }
2883
2884 // Initialize the cpu map
2885 ret = kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, &cpumap, &cpumap_size);
2886 if (ret != KERN_SUCCESS) {
2887 goto bail;
2888 }
2889
2890 // Check if a thread map is initialized
2891 if (!kd_mapptr) {
2892 ret = EINVAL;
2893 goto bail;
2894 }
2895 if (os_mul_overflow(kd_mapcount, sizeof(kd_threadmap), &thrmap_size)) {
2896 ret = ERANGE;
2897 goto bail;
2898 }
2899
2900 mach_timebase_info_data_t timebase = {0, 0};
2901 clock_timebase_info(&timebase);
2902
2903 // Setup the header.
2904 // See v3 header description in sys/kdebug.h for more inforamtion.
2905 kd_header_v3 header = {
2906 .tag = RAW_VERSION3,
2907 .sub_tag = V3_HEADER_VERSION,
2908 .length = (sizeof(kd_header_v3) + cpumap_size - sizeof(kd_cpumap_header)),
2909 .timebase_numer = timebase.numer,
2910 .timebase_denom = timebase.denom,
2911 .timestamp = 0, /* FIXME rdar://problem/22053009 */
2912 .walltime_secs = 0,
2913 .walltime_usecs = 0,
2914 .timezone_minuteswest = 0,
2915 .timezone_dst = 0,
2916 #if defined(__LP64__)
2917 .flags = 1,
2918 #else
2919 .flags = 0,
2920 #endif
2921 };
2922
2923 // If its a buffer, check if we have enough space to copy the header and the maps.
2924 if (user_header) {
2925 bytes_needed = (size_t)header.length + thrmap_size + (2 * sizeof(kd_chunk_header_v3));
2926 if (*user_header_size < bytes_needed) {
2927 ret = EINVAL;
2928 goto bail;
2929 }
2930 }
2931
2932 // Start writing the header
2933 if (fd) {
2934 void *hdr_ptr = (void *)(((uintptr_t) &header) + sizeof(kd_chunk_header_v3));
2935 size_t payload_size = (sizeof(kd_header_v3) - sizeof(kd_chunk_header_v3));
2936
2937 ret = kdbg_write_v3_chunk_to_fd(RAW_VERSION3, V3_HEADER_VERSION, header.length, hdr_ptr, payload_size, fd);
2938 if (ret) {
2939 goto bail;
2940 }
2941 } else {
2942 if (copyout(&header, user_header, sizeof(kd_header_v3))) {
2943 ret = EFAULT;
2944 goto bail;
2945 }
2946 // Update the user pointer
2947 user_header += sizeof(kd_header_v3);
2948 }
2949
2950 // Write a cpu map. This is a sub chunk of the header
2951 cpumap = (uint8_t*)((uintptr_t) cpumap + sizeof(kd_cpumap_header));
2952 size_t payload_size = (size_t)(cpumap_size - sizeof(kd_cpumap_header));
2953 if (fd) {
2954 ret = kdbg_write_v3_chunk_to_fd(V3_CPU_MAP, V3_CPUMAP_VERSION, payload_size, (void *)cpumap, payload_size, fd);
2955 if (ret) {
2956 goto bail;
2957 }
2958 } else {
2959 ret = kdbg_write_v3_chunk_header(user_header, V3_CPU_MAP, V3_CPUMAP_VERSION, payload_size, NULL, NULL);
2960 if (ret) {
2961 goto bail;
2962 }
2963 user_header += sizeof(kd_chunk_header_v3);
2964 if (copyout(cpumap, user_header, payload_size)) {
2965 ret = EFAULT;
2966 goto bail;
2967 }
2968 // Update the user pointer
2969 user_header += payload_size;
2970 }
2971
2972 // Write a thread map
2973 if (fd) {
2974 ret = kdbg_write_v3_chunk_to_fd(V3_THREAD_MAP, V3_THRMAP_VERSION, thrmap_size, (void *)kd_mapptr, thrmap_size, fd);
2975 if (ret) {
2976 goto bail;
2977 }
2978 } else {
2979 ret = kdbg_write_v3_chunk_header(user_header, V3_THREAD_MAP, V3_THRMAP_VERSION, thrmap_size, NULL, NULL);
2980 if (ret) {
2981 goto bail;
2982 }
2983 user_header += sizeof(kd_chunk_header_v3);
2984 if (copyout(kd_mapptr, user_header, thrmap_size)) {
2985 ret = EFAULT;
2986 goto bail;
2987 }
2988 user_header += thrmap_size;
2989 }
2990
2991 if (fd) {
2992 RAW_file_written += bytes_needed;
2993 }
2994
2995 *user_header_size = bytes_needed;
2996 bail:
2997 if (cpumap) {
2998 kmem_free(kernel_map, (vm_offset_t)cpumap, cpumap_size);
2999 }
3000 return ret;
3001 }
3002
3003 int
3004 kdbg_readcpumap(user_addr_t user_cpumap, size_t *user_cpumap_size)
3005 {
3006 uint8_t* cpumap = NULL;
3007 uint32_t cpumap_size = 0;
3008 int ret = KERN_SUCCESS;
3009
3010 if (kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) {
3011 if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, &cpumap, &cpumap_size) == KERN_SUCCESS) {
3012 if (user_cpumap) {
3013 size_t bytes_to_copy = (*user_cpumap_size >= cpumap_size) ? cpumap_size : *user_cpumap_size;
3014 if (copyout(cpumap, user_cpumap, (size_t)bytes_to_copy)) {
3015 ret = EFAULT;
3016 }
3017 }
3018 *user_cpumap_size = cpumap_size;
3019 kmem_free(kernel_map, (vm_offset_t)cpumap, cpumap_size);
3020 } else {
3021 ret = EINVAL;
3022 }
3023 } else {
3024 ret = EINVAL;
3025 }
3026
3027 return ret;
3028 }
3029
3030 int
3031 kdbg_readcurthrmap(user_addr_t buffer, size_t *bufsize)
3032 {
3033 kd_threadmap *mapptr;
3034 vm_size_t mapsize;
3035 vm_size_t mapcount;
3036 int ret = 0;
3037 size_t count = *bufsize / sizeof(kd_threadmap);
3038
3039 *bufsize = 0;
3040
3041 if ((mapptr = kdbg_thrmap_init_internal(count, &mapsize, &mapcount))) {
3042 if (copyout(mapptr, buffer, mapcount * sizeof(kd_threadmap))) {
3043 ret = EFAULT;
3044 } else {
3045 *bufsize = (mapcount * sizeof(kd_threadmap));
3046 }
3047
3048 kfree(mapptr, mapsize);
3049 } else {
3050 ret = EINVAL;
3051 }
3052
3053 return ret;
3054 }
3055
3056 static int
3057 kdbg_write_v1_header(bool write_thread_map, vnode_t vp, vfs_context_t ctx)
3058 {
3059 int ret = 0;
3060 RAW_header header;
3061 clock_sec_t secs;
3062 clock_usec_t usecs;
3063 char *pad_buf;
3064 uint32_t pad_size;
3065 uint32_t extra_thread_count = 0;
3066 uint32_t cpumap_size;
3067 size_t map_size = 0;
3068 uint32_t map_count = 0;
3069
3070 if (write_thread_map) {
3071 assert(kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
3072 if (kd_mapcount > UINT32_MAX) {
3073 return ERANGE;
3074 }
3075 map_count = (uint32_t)kd_mapcount;
3076 if (os_mul_overflow(map_count, sizeof(kd_threadmap), &map_size)) {
3077 return ERANGE;
3078 }
3079 if (map_size >= INT_MAX) {
3080 return ERANGE;
3081 }
3082 }
3083
3084 /*
3085 * Without the buffers initialized, we cannot construct a CPU map or a
3086 * thread map, and cannot write a header.
3087 */
3088 if (!(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT)) {
3089 return EINVAL;
3090 }
3091
3092 /*
3093 * To write a RAW_VERSION1+ file, we must embed a cpumap in the
3094 * "padding" used to page align the events following the threadmap. If
3095 * the threadmap happens to not require enough padding, we artificially
3096 * increase its footprint until it needs enough padding.
3097 */
3098
3099 assert(vp);
3100 assert(ctx);
3101
3102 pad_size = PAGE_16KB - ((sizeof(RAW_header) + map_size) & PAGE_MASK);
3103 cpumap_size = sizeof(kd_cpumap_header) + kd_ctrl_page.kdebug_cpus * sizeof(kd_cpumap);
3104
3105 if (cpumap_size > pad_size) {
3106 /* If the cpu map doesn't fit in the current available pad_size,
3107 * we increase the pad_size by 16K. We do this so that the event
3108 * data is always available on a page aligned boundary for both
3109 * 4k and 16k systems. We enforce this alignment for the event
3110 * data so that we can take advantage of optimized file/disk writes.
3111 */
3112 pad_size += PAGE_16KB;
3113 }
3114
3115 /* The way we are silently embedding a cpumap in the "padding" is by artificially
3116 * increasing the number of thread entries. However, we'll also need to ensure that
3117 * the cpumap is embedded in the last 4K page before when the event data is expected.
3118 * This way the tools can read the data starting the next page boundary on both
3119 * 4K and 16K systems preserving compatibility with older versions of the tools
3120 */
3121 if (pad_size > PAGE_4KB) {
3122 pad_size -= PAGE_4KB;
3123 extra_thread_count = (pad_size / sizeof(kd_threadmap)) + 1;
3124 }
3125
3126 memset(&header, 0, sizeof(header));
3127 header.version_no = RAW_VERSION1;
3128 header.thread_count = map_count + extra_thread_count;
3129
3130 clock_get_calendar_microtime(&secs, &usecs);
3131 header.TOD_secs = secs;
3132 header.TOD_usecs = usecs;
3133
3134 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)&header, (int)sizeof(RAW_header), RAW_file_offset,
3135 UIO_SYSSPACE, IO_NODELOCKED | IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
3136 if (ret) {
3137 goto write_error;
3138 }
3139 RAW_file_offset += sizeof(RAW_header);
3140 RAW_file_written += sizeof(RAW_header);
3141
3142 if (write_thread_map) {
3143 assert(map_size < INT_MAX);
3144 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)kd_mapptr, (int)map_size, RAW_file_offset,
3145 UIO_SYSSPACE, IO_NODELOCKED | IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
3146 if (ret) {
3147 goto write_error;
3148 }
3149
3150 RAW_file_offset += map_size;
3151 RAW_file_written += map_size;
3152 }
3153
3154 if (extra_thread_count) {
3155 pad_size = extra_thread_count * sizeof(kd_threadmap);
3156 pad_buf = kheap_alloc(KHEAP_TEMP, pad_size, Z_WAITOK | Z_ZERO);
3157 if (!pad_buf) {
3158 ret = ENOMEM;
3159 goto write_error;
3160 }
3161
3162 assert(pad_size < INT_MAX);
3163 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, (int)pad_size, RAW_file_offset,
3164 UIO_SYSSPACE, IO_NODELOCKED | IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
3165 kheap_free(KHEAP_TEMP, pad_buf, pad_size);
3166 if (ret) {
3167 goto write_error;
3168 }
3169
3170 RAW_file_offset += pad_size;
3171 RAW_file_written += pad_size;
3172 }
3173
3174 pad_size = PAGE_SIZE - (RAW_file_offset & PAGE_MASK);
3175 if (pad_size) {
3176 pad_buf = (char *)kheap_alloc(KHEAP_TEMP, pad_size, Z_WAITOK | Z_ZERO);
3177 if (!pad_buf) {
3178 ret = ENOMEM;
3179 goto write_error;
3180 }
3181
3182 /*
3183 * embed a cpumap in the padding bytes.
3184 * older code will skip this.
3185 * newer code will know how to read it.
3186 */
3187 uint32_t temp = pad_size;
3188 if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, (uint8_t**)&pad_buf, &temp) != KERN_SUCCESS) {
3189 memset(pad_buf, 0, pad_size);
3190 }
3191
3192 assert(pad_size < INT_MAX);
3193 ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, (int)pad_size, RAW_file_offset,
3194 UIO_SYSSPACE, IO_NODELOCKED | IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
3195 kheap_free(KHEAP_TEMP, pad_buf, pad_size);
3196 if (ret) {
3197 goto write_error;
3198 }
3199
3200 RAW_file_offset += pad_size;
3201 RAW_file_written += pad_size;
3202 }
3203
3204 write_error:
3205 return ret;
3206 }
3207
3208 static void
3209 kdbg_clear_thread_map(void)
3210 {
3211 ktrace_assert_lock_held();
3212
3213 if (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) {
3214 assert(kd_mapptr != NULL);
3215 kfree(kd_mapptr, kd_mapsize);
3216 kd_mapptr = NULL;
3217 kd_mapsize = 0;
3218 kd_mapcount = 0;
3219 kd_ctrl_page.kdebug_flags &= ~KDBG_MAPINIT;
3220 }
3221 }
3222
3223 /*
3224 * Write out a version 1 header and the thread map, if it is initialized, to a
3225 * vnode. Used by KDWRITEMAP and kdbg_dump_trace_to_file.
3226 *
3227 * Returns write errors from vn_rdwr if a write fails. Returns ENODATA if the
3228 * thread map has not been initialized, but the header will still be written.
3229 * Returns ENOMEM if padding could not be allocated. Returns 0 otherwise.
3230 */
3231 static int
3232 kdbg_write_thread_map(vnode_t vp, vfs_context_t ctx)
3233 {
3234 int ret = 0;
3235 bool map_initialized;
3236
3237 ktrace_assert_lock_held();
3238 assert(ctx != NULL);
3239
3240 map_initialized = (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
3241
3242 ret = kdbg_write_v1_header(map_initialized, vp, ctx);
3243 if (ret == 0) {
3244 if (map_initialized) {
3245 kdbg_clear_thread_map();
3246 } else {
3247 ret = ENODATA;
3248 }
3249 }
3250
3251 return ret;
3252 }
3253
3254 /*
3255 * Copy out the thread map to a user space buffer. Used by KDTHRMAP.
3256 *
3257 * Returns copyout errors if the copyout fails. Returns ENODATA if the thread
3258 * map has not been initialized. Returns EINVAL if the buffer provided is not
3259 * large enough for the entire thread map. Returns 0 otherwise.
3260 */
3261 static int
3262 kdbg_copyout_thread_map(user_addr_t buffer, size_t *buffer_size)
3263 {
3264 bool map_initialized;
3265 size_t map_size;
3266 int ret = 0;
3267
3268 ktrace_assert_lock_held();
3269 assert(buffer_size != NULL);
3270
3271 map_initialized = (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
3272 if (!map_initialized) {
3273 return ENODATA;
3274 }
3275
3276 map_size = kd_mapcount * sizeof(kd_threadmap);
3277 if (*buffer_size < map_size) {
3278 return EINVAL;
3279 }
3280
3281 ret = copyout(kd_mapptr, buffer, map_size);
3282 if (ret == 0) {
3283 kdbg_clear_thread_map();
3284 }
3285
3286 return ret;
3287 }
3288
3289 int
3290 kdbg_readthrmap_v3(user_addr_t buffer, size_t buffer_size, int fd)
3291 {
3292 int ret = 0;
3293 bool map_initialized;
3294 size_t map_size;
3295
3296 ktrace_assert_lock_held();
3297
3298 if ((!fd && !buffer) || (fd && buffer)) {
3299 return EINVAL;
3300 }
3301
3302 map_initialized = (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT);
3303 map_size = kd_mapcount * sizeof(kd_threadmap);
3304
3305 if (map_initialized && (buffer_size >= map_size)) {
3306 ret = kdbg_write_v3_header(buffer, &buffer_size, fd);
3307
3308 if (ret == 0) {
3309 kdbg_clear_thread_map();
3310 }
3311 } else {
3312 ret = EINVAL;
3313 }
3314
3315 return ret;
3316 }
3317
3318 static void
3319 kdbg_set_nkdbufs(unsigned int req_nkdbufs)
3320 {
3321 /*
3322 * Only allow allocation up to half the available memory (sane_size).
3323 */
3324 uint64_t max_nkdbufs = (sane_size / 2) / sizeof(kd_buf);
3325 nkdbufs = (req_nkdbufs > max_nkdbufs) ? (unsigned int)max_nkdbufs :
3326 req_nkdbufs;
3327 }
3328
3329 /*
3330 * Block until there are `n_storage_threshold` storage units filled with
3331 * events or `timeout_ms` milliseconds have passed. If `locked_wait` is true,
3332 * `ktrace_lock` is held while waiting. This is necessary while waiting to
3333 * write events out of the buffers.
3334 *
3335 * Returns true if the threshold was reached and false otherwise.
3336 *
3337 * Called with `ktrace_lock` locked and interrupts enabled.
3338 */
3339 static bool
3340 kdbg_wait(uint64_t timeout_ms, bool locked_wait)
3341 {
3342 int wait_result = THREAD_AWAKENED;
3343 uint64_t abstime = 0;
3344
3345 ktrace_assert_lock_held();
3346
3347 if (timeout_ms != 0) {
3348 uint64_t ns = timeout_ms * NSEC_PER_MSEC;
3349 nanoseconds_to_absolutetime(ns, &abstime);
3350 clock_absolutetime_interval_to_deadline(abstime, &abstime);
3351 }
3352
3353 bool s = ml_set_interrupts_enabled(false);
3354 if (!s) {
3355 panic("kdbg_wait() called with interrupts disabled");
3356 }
3357 lck_spin_lock_grp(kdw_spin_lock, kdebug_lck_grp);
3358
3359 if (!locked_wait) {
3360 /* drop the mutex to allow others to access trace */
3361 ktrace_unlock();
3362 }
3363
3364 while (wait_result == THREAD_AWAKENED &&
3365 kd_ctrl_page.kds_inuse_count < n_storage_threshold) {
3366 kds_waiter = 1;
3367
3368 if (abstime) {
3369 wait_result = lck_spin_sleep_deadline(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE, abstime);
3370 } else {
3371 wait_result = lck_spin_sleep(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE);
3372 }
3373
3374 kds_waiter = 0;
3375 }
3376
3377 /* check the count under the spinlock */
3378 bool threshold_exceeded = (kd_ctrl_page.kds_inuse_count >= n_storage_threshold);
3379
3380 lck_spin_unlock(kdw_spin_lock);
3381 ml_set_interrupts_enabled(s);
3382
3383 if (!locked_wait) {
3384 /* pick the mutex back up again */
3385 ktrace_lock();
3386 }
3387
3388 /* write out whether we've exceeded the threshold */
3389 return threshold_exceeded;
3390 }
3391
3392 /*
3393 * Wakeup a thread waiting using `kdbg_wait` if there are at least
3394 * `n_storage_threshold` storage units in use.
3395 */
3396 static void
3397 kdbg_wakeup(void)
3398 {
3399 bool need_kds_wakeup = false;
3400
3401 /*
3402 * Try to take the lock here to synchronize with the waiter entering
3403 * the blocked state. Use the try mode to prevent deadlocks caused by
3404 * re-entering this routine due to various trace points triggered in the
3405 * lck_spin_sleep_xxxx routines used to actually enter one of our 2 wait
3406 * conditions. No problem if we fail, there will be lots of additional
3407 * events coming in that will eventually succeed in grabbing this lock.
3408 */
3409 bool s = ml_set_interrupts_enabled(false);
3410
3411 if (lck_spin_try_lock(kdw_spin_lock)) {
3412 if (kds_waiter &&
3413 (kd_ctrl_page.kds_inuse_count >= n_storage_threshold)) {
3414 kds_waiter = 0;
3415 need_kds_wakeup = true;
3416 }
3417 lck_spin_unlock(kdw_spin_lock);
3418 }
3419
3420 ml_set_interrupts_enabled(s);
3421
3422 if (need_kds_wakeup == true) {
3423 wakeup(&kds_waiter);
3424 }
3425 }
3426
3427 int
3428 kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
3429 {
3430 int ret = 0;
3431 size_t size = *sizep;
3432 unsigned int value = 0;
3433 kd_regtype kd_Reg;
3434 kbufinfo_t kd_bufinfo;
3435 proc_t p;
3436
3437 if (name[0] == KERN_KDWRITETR ||
3438 name[0] == KERN_KDWRITETR_V3 ||
3439 name[0] == KERN_KDWRITEMAP ||
3440 name[0] == KERN_KDWRITEMAP_V3 ||
3441 name[0] == KERN_KDEFLAGS ||
3442 name[0] == KERN_KDDFLAGS ||
3443 name[0] == KERN_KDENABLE ||
3444 name[0] == KERN_KDSETBUF) {
3445 if (namelen < 2) {
3446 return EINVAL;
3447 }
3448 value = name[1];
3449 }
3450
3451 kdbg_lock_init();
3452 assert(kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT);
3453
3454 ktrace_lock();
3455
3456 /*
3457 * Some requests only require "read" access to kdebug trace. Regardless,
3458 * tell ktrace that a configuration or read is occurring (and see if it's
3459 * allowed).
3460 */
3461 if (name[0] != KERN_KDGETBUF &&
3462 name[0] != KERN_KDGETREG &&
3463 name[0] != KERN_KDREADCURTHRMAP) {
3464 if ((ret = ktrace_configure(KTRACE_KDEBUG))) {
3465 goto out;
3466 }
3467 } else {
3468 if ((ret = ktrace_read_check())) {
3469 goto out;
3470 }
3471 }
3472
3473 switch (name[0]) {
3474 case KERN_KDGETBUF:
3475 if (size < sizeof(kd_bufinfo.nkdbufs)) {
3476 /*
3477 * There is not enough room to return even
3478 * the first element of the info structure.
3479 */
3480 ret = EINVAL;
3481 break;
3482 }
3483
3484 memset(&kd_bufinfo, 0, sizeof(kd_bufinfo));
3485
3486 kd_bufinfo.nkdbufs = nkdbufs;
3487 kd_bufinfo.nkdthreads = kd_mapcount < INT_MAX ? (int)kd_mapcount :
3488 INT_MAX;
3489 if ((kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG)) {
3490 kd_bufinfo.nolog = 1;
3491 } else {
3492 kd_bufinfo.nolog = 0;
3493 }
3494
3495 kd_bufinfo.flags = kd_ctrl_page.kdebug_flags;
3496 #if defined(__LP64__)
3497 kd_bufinfo.flags |= KDBG_LP64;
3498 #endif
3499 {
3500 int pid = ktrace_get_owning_pid();
3501 kd_bufinfo.bufid = (pid == 0 ? -1 : pid);
3502 }
3503
3504 if (size >= sizeof(kd_bufinfo)) {
3505 /*
3506 * Provide all the info we have
3507 */
3508 if (copyout(&kd_bufinfo, where, sizeof(kd_bufinfo))) {
3509 ret = EINVAL;
3510 }
3511 } else {
3512 /*
3513 * For backwards compatibility, only provide
3514 * as much info as there is room for.
3515 */
3516 if (copyout(&kd_bufinfo, where, size)) {
3517 ret = EINVAL;
3518 }
3519 }
3520 break;
3521
3522 case KERN_KDREADCURTHRMAP:
3523 ret = kdbg_readcurthrmap(where, sizep);
3524 break;
3525
3526 case KERN_KDEFLAGS:
3527 value &= KDBG_USERFLAGS;
3528 kd_ctrl_page.kdebug_flags |= value;
3529 break;
3530
3531 case KERN_KDDFLAGS:
3532 value &= KDBG_USERFLAGS;
3533 kd_ctrl_page.kdebug_flags &= ~value;
3534 break;
3535
3536 case KERN_KDENABLE:
3537 /*
3538 * Enable tracing mechanism. Two types:
3539 * KDEBUG_TRACE is the standard one,
3540 * and KDEBUG_PPT which is a carefully
3541 * chosen subset to avoid performance impact.
3542 */
3543 if (value) {
3544 /*
3545 * enable only if buffer is initialized
3546 */
3547 if (!(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) ||
3548 !(value == KDEBUG_ENABLE_TRACE || value == KDEBUG_ENABLE_PPT)) {
3549 ret = EINVAL;
3550 break;
3551 }
3552 kdbg_thrmap_init();
3553
3554 kdbg_set_tracing_enabled(true, value);
3555 } else {
3556 if (!kdebug_enable) {
3557 break;
3558 }
3559
3560 kernel_debug_disable();
3561 }
3562 break;
3563
3564 case KERN_KDSETBUF:
3565 kdbg_set_nkdbufs(value);
3566 break;
3567
3568 case KERN_KDSETUP:
3569 ret = kdbg_reinit(false);
3570 break;
3571
3572 case KERN_KDREMOVE:
3573 ktrace_reset(KTRACE_KDEBUG);
3574 break;
3575
3576 case KERN_KDSETREG:
3577 if (size < sizeof(kd_regtype)) {
3578 ret = EINVAL;
3579 break;
3580 }
3581 if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
3582 ret = EINVAL;
3583 break;
3584 }
3585
3586 ret = kdbg_setreg(&kd_Reg);
3587 break;
3588
3589 case KERN_KDGETREG:
3590 ret = EINVAL;
3591 break;
3592
3593 case KERN_KDREADTR:
3594 ret = kdbg_read(where, sizep, NULL, NULL, RAW_VERSION1);
3595 break;
3596
3597 case KERN_KDWRITETR:
3598 case KERN_KDWRITETR_V3:
3599 case KERN_KDWRITEMAP:
3600 case KERN_KDWRITEMAP_V3:
3601 {
3602 struct vfs_context context;
3603 struct fileproc *fp;
3604 size_t number;
3605 vnode_t vp;
3606 int fd;
3607
3608 if (name[0] == KERN_KDWRITETR || name[0] == KERN_KDWRITETR_V3) {
3609 (void)kdbg_wait(size, true);
3610 }
3611 p = current_proc();
3612 fd = value;
3613
3614
3615 if (fp_get_ftype(p, fd, DTYPE_VNODE, EBADF, &fp)) {
3616 ret = EBADF;
3617 break;
3618 }
3619
3620 vp = fp->fp_glob->fg_data;
3621 context.vc_thread = current_thread();
3622 context.vc_ucred = fp->fp_glob->fg_cred;
3623
3624 if ((ret = vnode_getwithref(vp)) == 0) {
3625 RAW_file_offset = fp->fp_glob->fg_offset;
3626 if (name[0] == KERN_KDWRITETR || name[0] == KERN_KDWRITETR_V3) {
3627 number = nkdbufs * sizeof(kd_buf);
3628
3629 KDBG_RELEASE(TRACE_WRITING_EVENTS | DBG_FUNC_START);
3630 if (name[0] == KERN_KDWRITETR_V3) {
3631 ret = kdbg_read(0, &number, vp, &context, RAW_VERSION3);
3632 } else {
3633 ret = kdbg_read(0, &number, vp, &context, RAW_VERSION1);
3634 }
3635 KDBG_RELEASE(TRACE_WRITING_EVENTS | DBG_FUNC_END, number);
3636
3637 *sizep = number;
3638 } else {
3639 number = kd_mapcount * sizeof(kd_threadmap);
3640 if (name[0] == KERN_KDWRITEMAP_V3) {
3641 ret = kdbg_readthrmap_v3(0, number, fd);
3642 } else {
3643 ret = kdbg_write_thread_map(vp, &context);
3644 }
3645 }
3646 fp->fp_glob->fg_offset = RAW_file_offset;
3647 vnode_put(vp);
3648 }
3649 fp_drop(p, fd, fp, 0);
3650
3651 break;
3652 }
3653 case KERN_KDBUFWAIT:
3654 *sizep = kdbg_wait(size, false);
3655 break;
3656
3657 case KERN_KDPIDTR:
3658 if (size < sizeof(kd_regtype)) {
3659 ret = EINVAL;
3660 break;
3661 }
3662 if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
3663 ret = EINVAL;
3664 break;
3665 }
3666
3667 ret = kdbg_setpid(&kd_Reg);
3668 break;
3669
3670 case KERN_KDPIDEX:
3671 if (size < sizeof(kd_regtype)) {
3672 ret = EINVAL;
3673 break;
3674 }
3675 if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
3676 ret = EINVAL;
3677 break;
3678 }
3679
3680 ret = kdbg_setpidex(&kd_Reg);
3681 break;
3682
3683 case KERN_KDCPUMAP:
3684 ret = kdbg_readcpumap(where, sizep);
3685 break;
3686
3687 case KERN_KDTHRMAP:
3688 ret = kdbg_copyout_thread_map(where, sizep);
3689 break;
3690
3691 case KERN_KDSET_TYPEFILTER: {
3692 ret = kdbg_copyin_typefilter(where, size);
3693 break;
3694 }
3695
3696 case KERN_KDTEST:
3697 ret = kdbg_test(size);
3698 break;
3699
3700 default:
3701 ret = EINVAL;
3702 break;
3703 }
3704 out:
3705 ktrace_unlock();
3706
3707 return ret;
3708 }
3709
3710
3711 /*
3712 * This code can run for the most part concurrently with kernel_debug_internal()...
3713 * 'release_storage_unit' will take the kds_spin_lock which may cause us to briefly
3714 * synchronize with the recording side of this puzzle... otherwise, we are able to
3715 * move through the lists w/o use of any locks
3716 */
3717 int
3718 kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx, uint32_t file_version)
3719 {
3720 size_t count;
3721 unsigned int cpu, min_cpu;
3722 uint64_t barrier_min = 0, barrier_max = 0, t, earliest_time;
3723 int error = 0;
3724 kd_buf *tempbuf;
3725 uint32_t rcursor;
3726 kd_buf lostevent;
3727 union kds_ptr kdsp;
3728 bool traced_retrograde = false;
3729 struct kd_storage *kdsp_actual;
3730 struct kd_bufinfo *kdbp;
3731 struct kd_bufinfo *min_kdbp;
3732 size_t tempbuf_count;
3733 uint32_t tempbuf_number;
3734 uint32_t old_kdebug_flags;
3735 uint32_t old_kdebug_slowcheck;
3736 bool out_of_events = false;
3737 bool wrapped = false;
3738
3739 assert(number != NULL);
3740 count = *number / sizeof(kd_buf);
3741 *number = 0;
3742
3743 ktrace_assert_lock_held();
3744
3745 if (count == 0 || !(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) || kdcopybuf == 0) {
3746 return EINVAL;
3747 }
3748
3749 thread_set_eager_preempt(current_thread());
3750
3751 memset(&lostevent, 0, sizeof(lostevent));
3752 lostevent.debugid = TRACE_LOST_EVENTS;
3753
3754 /*
3755 * Request each IOP to provide us with up to date entries before merging
3756 * buffers together.
3757 */
3758 kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_SYNC_FLUSH, NULL);
3759
3760 /*
3761 * Capture the current time. Only sort events that have occured
3762 * before now. Since the IOPs are being flushed here, it is possible
3763 * that events occur on the AP while running live tracing.
3764 */
3765 barrier_max = kdbg_timestamp() & KDBG_TIMESTAMP_MASK;
3766
3767 /*
3768 * Disable wrap so storage units cannot be stolen out from underneath us
3769 * while merging events.
3770 *
3771 * Because we hold ktrace_lock, no other control threads can be playing
3772 * with kdebug_flags. The code that emits new events could be running,
3773 * but it grabs kds_spin_lock if it needs to acquire a new storage
3774 * chunk, which is where it examines kdebug_flags. If it is adding to
3775 * the same chunk we're reading from, check for that below.
3776 */
3777 wrapped = disable_wrap(&old_kdebug_slowcheck, &old_kdebug_flags);
3778
3779 if (count > nkdbufs) {
3780 count = nkdbufs;
3781 }
3782
3783 if ((tempbuf_count = count) > KDCOPYBUF_COUNT) {
3784 tempbuf_count = KDCOPYBUF_COUNT;
3785 }
3786
3787 /*
3788 * If the buffers have wrapped, do not emit additional lost events for the
3789 * oldest storage units.
3790 */
3791 if (wrapped) {
3792 kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
3793
3794 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page.kdebug_cpus; cpu++, kdbp++) {
3795 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
3796 continue;
3797 }
3798 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3799 kdsp_actual->kds_lostevents = false;
3800 }
3801 }
3802 /*
3803 * Capture the earliest time where there are events for all CPUs and don't
3804 * emit events with timestamps prior.
3805 */
3806 barrier_min = kd_ctrl_page.oldest_time;
3807
3808 while (count) {
3809 tempbuf = kdcopybuf;
3810 tempbuf_number = 0;
3811
3812 if (wrapped) {
3813 /*
3814 * Emit a lost events tracepoint to indicate that previous events
3815 * were lost -- the thread map cannot be trusted. A new one must
3816 * be taken so tools can analyze the trace in a backwards-facing
3817 * fashion.
3818 */
3819 kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, 0);
3820 *tempbuf = lostevent;
3821 wrapped = false;
3822 goto nextevent;
3823 }
3824
3825 /* While space left in merged events scratch buffer. */
3826 while (tempbuf_count) {
3827 bool lostevents = false;
3828 int lostcpu = 0;
3829 earliest_time = UINT64_MAX;
3830 min_kdbp = NULL;
3831 min_cpu = 0;
3832
3833 /* Check each CPU's buffers for the earliest event. */
3834 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page.kdebug_cpus; cpu++, kdbp++) {
3835 /* Skip CPUs without data in their oldest storage unit. */
3836 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
3837 next_cpu:
3838 continue;
3839 }
3840 /* From CPU data to buffer header to buffer. */
3841 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3842
3843 next_event:
3844 /* The next event to be read from this buffer. */
3845 rcursor = kdsp_actual->kds_readlast;
3846
3847 /* Skip this buffer if there are no events left. */
3848 if (rcursor == kdsp_actual->kds_bufindx) {
3849 continue;
3850 }
3851
3852 /*
3853 * Check that this storage unit wasn't stolen and events were
3854 * lost. This must have happened while wrapping was disabled
3855 * in this function.
3856 */
3857 if (kdsp_actual->kds_lostevents) {
3858 lostevents = true;
3859 kdsp_actual->kds_lostevents = false;
3860
3861 /*
3862 * The earliest event we can trust is the first one in this
3863 * stolen storage unit.
3864 */
3865 uint64_t lost_time =
3866 kdbg_get_timestamp(&kdsp_actual->kds_records[0]);
3867 if (kd_ctrl_page.oldest_time < lost_time) {
3868 /*
3869 * If this is the first time we've seen lost events for
3870 * this gap, record its timestamp as the oldest
3871 * timestamp we're willing to merge for the lost events
3872 * tracepoint.
3873 */
3874 kd_ctrl_page.oldest_time = barrier_min = lost_time;
3875 lostcpu = cpu;
3876 }
3877 }
3878
3879 t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
3880
3881 if (t > barrier_max) {
3882 if (kdbg_debug) {
3883 printf("kdebug: FUTURE EVENT: debugid %#8x: "
3884 "time %lld from CPU %u "
3885 "(barrier at time %lld, read %lu events)\n",
3886 kdsp_actual->kds_records[rcursor].debugid,
3887 t, cpu, barrier_max, *number + tempbuf_number);
3888 }
3889 goto next_cpu;
3890 }
3891 if (t < kdsp_actual->kds_timestamp) {
3892 /*
3893 * This indicates the event emitter hasn't completed
3894 * filling in the event (becuase we're looking at the
3895 * buffer that the record head is using). The max barrier
3896 * timestamp should have saved us from seeing these kinds
3897 * of things, but other CPUs might be slow on the up-take.
3898 *
3899 * Bail out so we don't get out-of-order events by
3900 * continuing to read events from other CPUs' events.
3901 */
3902 out_of_events = true;
3903 break;
3904 }
3905
3906 /*
3907 * Ignore events that have aged out due to wrapping or storage
3908 * unit exhaustion while merging events.
3909 */
3910 if (t < barrier_min) {
3911 kdsp_actual->kds_readlast++;
3912 if (kdbg_debug) {
3913 printf("kdebug: PAST EVENT: debugid %#8x: "
3914 "time %lld from CPU %u "
3915 "(barrier at time %lld)\n",
3916 kdsp_actual->kds_records[rcursor].debugid,
3917 t, cpu, barrier_min);
3918 }
3919
3920 if (kdsp_actual->kds_readlast >= EVENTS_PER_STORAGE_UNIT) {
3921 release_storage_unit(cpu, kdsp.raw);
3922
3923 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
3924 goto next_cpu;
3925 }
3926 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3927 }
3928
3929 goto next_event;
3930 }
3931
3932 /*
3933 * Don't worry about merging any events -- just walk through
3934 * the CPUs and find the latest timestamp of lost events.
3935 */
3936 if (lostevents) {
3937 continue;
3938 }
3939
3940 if (t < earliest_time) {
3941 earliest_time = t;
3942 min_kdbp = kdbp;
3943 min_cpu = cpu;
3944 }
3945 }
3946 if (lostevents) {
3947 /*
3948 * If any lost events were hit in the buffers, emit an event
3949 * with the latest timestamp.
3950 */
3951 kdbg_set_timestamp_and_cpu(&lostevent, barrier_min, lostcpu);
3952 *tempbuf = lostevent;
3953 tempbuf->arg1 = 1;
3954 goto nextevent;
3955 }
3956 if (min_kdbp == NULL) {
3957 /* All buffers ran empty. */
3958 out_of_events = true;
3959 }
3960 if (out_of_events) {
3961 break;
3962 }
3963
3964 kdsp = min_kdbp->kd_list_head;
3965 kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
3966
3967 /* Copy earliest event into merged events scratch buffer. */
3968 *tempbuf = kdsp_actual->kds_records[kdsp_actual->kds_readlast++];
3969
3970 if (kdsp_actual->kds_readlast == EVENTS_PER_STORAGE_UNIT) {
3971 release_storage_unit(min_cpu, kdsp.raw);
3972 }
3973
3974 /*
3975 * Watch for out of order timestamps (from IOPs).
3976 */
3977 if (earliest_time < min_kdbp->kd_prev_timebase) {
3978 /*
3979 * If we haven't already, emit a retrograde events event.
3980 * Otherwise, ignore this event.
3981 */
3982 if (traced_retrograde) {
3983 continue;
3984 }
3985 if (kdbg_debug) {
3986 printf("kdebug: RETRO EVENT: debugid %#8x: "
3987 "time %lld from CPU %u "
3988 "(barrier at time %lld)\n",
3989 kdsp_actual->kds_records[rcursor].debugid,
3990 t, cpu, barrier_min);
3991 }
3992
3993 kdbg_set_timestamp_and_cpu(tempbuf, min_kdbp->kd_prev_timebase, kdbg_get_cpu(tempbuf));
3994 tempbuf->arg1 = tempbuf->debugid;
3995 tempbuf->arg2 = (kd_buf_argtype)earliest_time;
3996 tempbuf->arg3 = 0;
3997 tempbuf->arg4 = 0;
3998 tempbuf->debugid = TRACE_RETROGRADE_EVENTS;
3999 traced_retrograde = true;
4000 } else {
4001 min_kdbp->kd_prev_timebase = earliest_time;
4002 }
4003 nextevent:
4004 tempbuf_count--;
4005 tempbuf_number++;
4006 tempbuf++;
4007
4008 if ((RAW_file_written += sizeof(kd_buf)) >= RAW_FLUSH_SIZE) {
4009 break;
4010 }
4011 }
4012 if (tempbuf_number) {
4013 /*
4014 * Remember the latest timestamp of events that we've merged so we
4015 * don't think we've lost events later.
4016 */
4017 uint64_t latest_time = kdbg_get_timestamp(tempbuf - 1);
4018 if (kd_ctrl_page.oldest_time < latest_time) {
4019 kd_ctrl_page.oldest_time = latest_time;
4020 }
4021 if (file_version == RAW_VERSION3) {
4022 if (!(kdbg_write_v3_event_chunk_header(buffer, V3_RAW_EVENTS, (tempbuf_number * sizeof(kd_buf)), vp, ctx))) {
4023 error = EFAULT;
4024 goto check_error;
4025 }
4026 if (buffer) {
4027 buffer += (sizeof(kd_chunk_header_v3) + sizeof(uint64_t));
4028 }
4029
4030 assert(count >= (sizeof(kd_chunk_header_v3) + sizeof(uint64_t)));
4031 count -= (sizeof(kd_chunk_header_v3) + sizeof(uint64_t));
4032 *number += (sizeof(kd_chunk_header_v3) + sizeof(uint64_t));
4033 }
4034 if (vp) {
4035 size_t write_size = tempbuf_number * sizeof(kd_buf);
4036 error = kdbg_write_to_vnode((caddr_t)kdcopybuf, write_size, vp, ctx, RAW_file_offset);
4037 if (!error) {
4038 RAW_file_offset += write_size;
4039 }
4040
4041 if (RAW_file_written >= RAW_FLUSH_SIZE) {
4042 error = VNOP_FSYNC(vp, MNT_NOWAIT, ctx);
4043
4044 RAW_file_written = 0;
4045 }
4046 } else {
4047 error = copyout(kdcopybuf, buffer, tempbuf_number * sizeof(kd_buf));
4048 buffer += (tempbuf_number * sizeof(kd_buf));
4049 }
4050 check_error:
4051 if (error) {
4052 *number = 0;
4053 error = EINVAL;
4054 break;
4055 }
4056 count -= tempbuf_number;
4057 *number += tempbuf_number;
4058 }
4059 if (out_of_events == true) {
4060 /*
4061 * all trace buffers are empty
4062 */
4063 break;
4064 }
4065
4066 if ((tempbuf_count = count) > KDCOPYBUF_COUNT) {
4067 tempbuf_count = KDCOPYBUF_COUNT;
4068 }
4069 }
4070 if (!(old_kdebug_flags & KDBG_NOWRAP)) {
4071 enable_wrap(old_kdebug_slowcheck);
4072 }
4073 thread_clear_eager_preempt(current_thread());
4074 return error;
4075 }
4076
4077 #define KDEBUG_TEST_CODE(code) BSDDBG_CODE(DBG_BSD_KDEBUG_TEST, (code))
4078
4079 /*
4080 * A test IOP for the SYNC_FLUSH callback.
4081 */
4082
4083 static int sync_flush_iop = 0;
4084
4085 static void
4086 sync_flush_callback(void * __unused context, kd_callback_type reason,
4087 void * __unused arg)
4088 {
4089 assert(sync_flush_iop > 0);
4090
4091 if (reason == KD_CALLBACK_SYNC_FLUSH) {
4092 kernel_debug_enter(sync_flush_iop, KDEBUG_TEST_CODE(0xff),
4093 kdbg_timestamp(), 0, 0, 0, 0, 0);
4094 }
4095 }
4096
4097 static struct kd_callback sync_flush_kdcb = {
4098 .func = sync_flush_callback,
4099 .iop_name = "test_sf",
4100 };
4101
4102 static int
4103 kdbg_test(size_t flavor)
4104 {
4105 int code = 0;
4106 int dummy_iop = 0;
4107
4108 switch (flavor) {
4109 case 1:
4110 /* try each macro */
4111 KDBG(KDEBUG_TEST_CODE(code)); code++;
4112 KDBG(KDEBUG_TEST_CODE(code), 1); code++;
4113 KDBG(KDEBUG_TEST_CODE(code), 1, 2); code++;
4114 KDBG(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
4115 KDBG(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
4116
4117 KDBG_RELEASE(KDEBUG_TEST_CODE(code)); code++;
4118 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1); code++;
4119 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1, 2); code++;
4120 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
4121 KDBG_RELEASE(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
4122
4123 KDBG_FILTERED(KDEBUG_TEST_CODE(code)); code++;
4124 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1); code++;
4125 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1, 2); code++;
4126 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
4127 KDBG_FILTERED(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
4128
4129 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code)); code++;
4130 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code), 1); code++;
4131 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code), 1, 2); code++;
4132 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
4133 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
4134
4135 KDBG_DEBUG(KDEBUG_TEST_CODE(code)); code++;
4136 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1); code++;
4137 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1, 2); code++;
4138 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1, 2, 3); code++;
4139 KDBG_DEBUG(KDEBUG_TEST_CODE(code), 1, 2, 3, 4); code++;
4140 break;
4141
4142 case 2:
4143 if (kd_ctrl_page.kdebug_iops) {
4144 /* avoid the assertion in kernel_debug_enter for a valid IOP */
4145 dummy_iop = kd_ctrl_page.kdebug_iops[0].cpu_id;
4146 }
4147
4148 /* ensure old timestamps are not emitted from kernel_debug_enter */
4149 kernel_debug_enter(dummy_iop, KDEBUG_TEST_CODE(code),
4150 100 /* very old timestamp */, 0, 0, 0, 0, 0);
4151 code++;
4152 kernel_debug_enter(dummy_iop, KDEBUG_TEST_CODE(code),
4153 kdbg_timestamp(), 0, 0, 0, 0, 0);
4154 code++;
4155 break;
4156
4157 case 3:
4158 if (kd_ctrl_page.kdebug_iops) {
4159 dummy_iop = kd_ctrl_page.kdebug_iops[0].cpu_id;
4160 }
4161 kernel_debug_enter(dummy_iop, KDEBUG_TEST_CODE(code),
4162 kdbg_timestamp() * 2 /* !!! */, 0, 0, 0, 0, 0);
4163 break;
4164
4165 case 4:
4166 if (!sync_flush_iop) {
4167 sync_flush_iop = kernel_debug_register_callback(
4168 sync_flush_kdcb);
4169 assert(sync_flush_iop > 0);
4170 }
4171 break;
4172
4173 default:
4174 return ENOTSUP;
4175 }
4176
4177 return 0;
4178 }
4179
4180 #undef KDEBUG_TEST_CODE
4181
4182 void
4183 kdebug_init(unsigned int n_events, char *filter_desc, enum kdebug_opts opts)
4184 {
4185 assert(filter_desc != NULL);
4186
4187 if (log_leaks && n_events == 0) {
4188 n_events = 200000;
4189 }
4190
4191 kdebug_trace_start(n_events, filter_desc, opts);
4192 }
4193
4194 static void
4195 kdbg_set_typefilter_string(const char *filter_desc)
4196 {
4197 char *end = NULL;
4198
4199 ktrace_assert_lock_held();
4200
4201 assert(filter_desc != NULL);
4202
4203 typefilter_reject_all(kdbg_typefilter);
4204 typefilter_allow_class(kdbg_typefilter, DBG_TRACE);
4205
4206 /* if the filter description starts with a number, assume it's a csc */
4207 if (filter_desc[0] >= '0' && filter_desc[0] <= '9') {
4208 unsigned long csc = strtoul(filter_desc, NULL, 0);
4209 if (filter_desc != end && csc <= KDBG_CSC_MAX) {
4210 typefilter_allow_csc(kdbg_typefilter, (uint16_t)csc);
4211 }
4212 return;
4213 }
4214
4215 while (filter_desc[0] != '\0') {
4216 unsigned long allow_value;
4217
4218 char filter_type = filter_desc[0];
4219 if (filter_type != 'C' && filter_type != 'S') {
4220 printf("kdebug: unexpected filter type `%c'\n", filter_type);
4221 return;
4222 }
4223 filter_desc++;
4224
4225 allow_value = strtoul(filter_desc, &end, 0);
4226 if (filter_desc == end) {
4227 printf("kdebug: cannot parse `%s' as integer\n", filter_desc);
4228 return;
4229 }
4230
4231 switch (filter_type) {
4232 case 'C':
4233 if (allow_value > KDBG_CLASS_MAX) {
4234 printf("kdebug: class 0x%lx is invalid\n", allow_value);
4235 return;
4236 }
4237 printf("kdebug: C 0x%lx\n", allow_value);
4238 typefilter_allow_class(kdbg_typefilter, (uint8_t)allow_value);
4239 break;
4240 case 'S':
4241 if (allow_value > KDBG_CSC_MAX) {
4242 printf("kdebug: class-subclass 0x%lx is invalid\n", allow_value);
4243 return;
4244 }
4245 printf("kdebug: S 0x%lx\n", allow_value);
4246 typefilter_allow_csc(kdbg_typefilter, (uint16_t)allow_value);
4247 break;
4248 default:
4249 __builtin_unreachable();
4250 }
4251
4252 /* advance to next filter entry */
4253 filter_desc = end;
4254 if (filter_desc[0] == ',') {
4255 filter_desc++;
4256 }
4257 }
4258 }
4259
4260 uint64_t
4261 kdebug_wake(void)
4262 {
4263 if (!wake_nkdbufs) {
4264 return 0;
4265 }
4266 uint64_t start = mach_absolute_time();
4267 kdebug_trace_start(wake_nkdbufs, NULL, trace_wrap ? KDOPT_WRAPPING : 0);
4268 return mach_absolute_time() - start;
4269 }
4270
4271 /*
4272 * This function is meant to be called from the bootstrap thread or kdebug_wake.
4273 */
4274 void
4275 kdebug_trace_start(unsigned int n_events, const char *filter_desc,
4276 enum kdebug_opts opts)
4277 {
4278 if (!n_events) {
4279 kd_early_done = true;
4280 return;
4281 }
4282
4283 ktrace_start_single_threaded();
4284
4285 kdbg_lock_init();
4286
4287 ktrace_kernel_configure(KTRACE_KDEBUG);
4288
4289 kdbg_set_nkdbufs(n_events);
4290
4291 kernel_debug_string_early("start_kern_tracing");
4292
4293 if (kdbg_reinit((opts & KDOPT_ATBOOT))) {
4294 printf("error from kdbg_reinit, kernel tracing not started\n");
4295 goto out;
4296 }
4297
4298 /*
4299 * Wrapping is disabled because boot and wake tracing is interested in
4300 * the earliest events, at the expense of later ones.
4301 */
4302 if (!(opts & KDOPT_WRAPPING)) {
4303 uint32_t old1, old2;
4304 (void)disable_wrap(&old1, &old2);
4305 }
4306
4307 if (filter_desc && filter_desc[0] != '\0') {
4308 if (kdbg_initialize_typefilter(NULL) == KERN_SUCCESS) {
4309 kdbg_set_typefilter_string(filter_desc);
4310 kdbg_enable_typefilter();
4311 }
4312 }
4313
4314 /*
4315 * Hold off interrupts between getting a thread map and enabling trace
4316 * and until the early traces are recorded.
4317 */
4318 bool s = ml_set_interrupts_enabled(false);
4319
4320 if (!(opts & KDOPT_ATBOOT)) {
4321 kdbg_thrmap_init();
4322 }
4323
4324 kdbg_set_tracing_enabled(true, KDEBUG_ENABLE_TRACE);
4325
4326 if ((opts & KDOPT_ATBOOT)) {
4327 /*
4328 * Transfer all very early events from the static buffer into the real
4329 * buffers.
4330 */
4331 kernel_debug_early_end();
4332 }
4333
4334 ml_set_interrupts_enabled(s);
4335
4336 printf("kernel tracing started with %u events, filter = %s\n", n_events,
4337 filter_desc ?: "none");
4338
4339 out:
4340 ktrace_end_single_threaded();
4341 }
4342
4343 void
4344 kdbg_dump_trace_to_file(const char *filename)
4345 {
4346 vfs_context_t ctx;
4347 vnode_t vp;
4348 size_t write_size;
4349 int ret;
4350
4351 ktrace_lock();
4352
4353 if (!(kdebug_enable & KDEBUG_ENABLE_TRACE)) {
4354 goto out;
4355 }
4356
4357 if (ktrace_get_owning_pid() != 0) {
4358 /*
4359 * Another process owns ktrace and is still active, disable tracing to
4360 * prevent wrapping.
4361 */
4362 kdebug_enable = 0;
4363 kd_ctrl_page.enabled = 0;
4364 commpage_update_kdebug_state();
4365 goto out;
4366 }
4367
4368 KDBG_RELEASE(TRACE_WRITING_EVENTS | DBG_FUNC_START);
4369
4370 kdebug_enable = 0;
4371 kd_ctrl_page.enabled = 0;
4372 commpage_update_kdebug_state();
4373
4374 ctx = vfs_context_kernel();
4375
4376 if (vnode_open(filename, (O_CREAT | FWRITE | O_NOFOLLOW), 0600, 0, &vp, ctx)) {
4377 goto out;
4378 }
4379
4380 kdbg_write_thread_map(vp, ctx);
4381
4382 write_size = nkdbufs * sizeof(kd_buf);
4383 ret = kdbg_read(0, &write_size, vp, ctx, RAW_VERSION1);
4384 if (ret) {
4385 goto out_close;
4386 }
4387
4388 /*
4389 * Wait to synchronize the file to capture the I/O in the
4390 * TRACE_WRITING_EVENTS interval.
4391 */
4392 ret = VNOP_FSYNC(vp, MNT_WAIT, ctx);
4393
4394 /*
4395 * Balance the starting TRACE_WRITING_EVENTS tracepoint manually.
4396 */
4397 kd_buf end_event = {
4398 .debugid = TRACE_WRITING_EVENTS | DBG_FUNC_END,
4399 .arg1 = write_size,
4400 .arg2 = ret,
4401 .arg5 = (kd_buf_argtype)thread_tid(current_thread()),
4402 };
4403 kdbg_set_timestamp_and_cpu(&end_event, kdbg_timestamp(),
4404 cpu_number());
4405
4406 /* this is best effort -- ignore any errors */
4407 (void)kdbg_write_to_vnode((caddr_t)&end_event, sizeof(kd_buf), vp, ctx,
4408 RAW_file_offset);
4409
4410 out_close:
4411 vnode_close(vp, FWRITE, ctx);
4412 sync(current_proc(), (void *)NULL, (int *)NULL);
4413
4414 out:
4415 ktrace_unlock();
4416 }
4417
4418 static int
4419 kdbg_sysctl_continuous SYSCTL_HANDLER_ARGS
4420 {
4421 #pragma unused(oidp, arg1, arg2)
4422 int value = kdbg_continuous_time;
4423 int ret = sysctl_io_number(req, value, sizeof(value), &value, NULL);
4424
4425 if (ret || !req->newptr) {
4426 return ret;
4427 }
4428
4429 kdbg_continuous_time = value;
4430 return 0;
4431 }
4432
4433 SYSCTL_NODE(_kern, OID_AUTO, kdbg, CTLFLAG_RD | CTLFLAG_LOCKED, 0,
4434 "kdbg");
4435
4436 SYSCTL_PROC(_kern_kdbg, OID_AUTO, experimental_continuous,
4437 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0,
4438 sizeof(int), kdbg_sysctl_continuous, "I",
4439 "Set kdebug to use mach_continuous_time");
4440
4441 SYSCTL_INT(_kern_kdbg, OID_AUTO, debug,
4442 CTLFLAG_RW | CTLFLAG_LOCKED,
4443 &kdbg_debug, 0, "Set kdebug debug mode");
4444
4445 SYSCTL_QUAD(_kern_kdbg, OID_AUTO, oldest_time,
4446 CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED,
4447 &kd_ctrl_page.oldest_time,
4448 "Find the oldest timestamp still in trace");