2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
4 * @Apple_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
23 #include <sys/errno.h>
24 #include <sys/param.h>
25 #include <sys/systm.h>
26 #include <sys/proc_internal.h>
28 #include <sys/sysctl.h>
29 #include <sys/kdebug.h>
30 #include <sys/kauth.h>
31 #include <sys/ktrace.h>
32 #include <sys/sysproto.h>
33 #include <sys/bsdtask_info.h>
34 #include <sys/random.h>
36 #include <mach/clock_types.h>
37 #include <mach/mach_types.h>
38 #include <mach/mach_time.h>
39 #include <mach/mach_vm.h>
40 #include <machine/machine_routines.h>
42 #include <mach/machine.h>
43 #include <mach/vm_map.h>
45 #if defined(__i386__) || defined(__x86_64__)
46 #include <i386/rtclock_protos.h>
48 #include <i386/machine_routines.h>
52 #include <kern/clock.h>
54 #include <kern/thread.h>
55 #include <kern/task.h>
56 #include <kern/debug.h>
57 #include <kern/kalloc.h>
58 #include <kern/cpu_data.h>
59 #include <kern/assert.h>
60 #include <kern/telemetry.h>
61 #include <kern/sched_prim.h>
62 #include <vm/vm_kern.h>
64 #include <kperf/kperf.h>
65 #include <pexpert/device_tree.h>
67 #include <sys/malloc.h>
68 #include <sys/mcache.h>
70 #include <sys/vnode.h>
71 #include <sys/vnode_internal.h>
72 #include <sys/fcntl.h>
73 #include <sys/file_internal.h>
75 #include <sys/param.h> /* for isset() */
77 #include <mach/mach_host.h> /* for host_info() */
78 #include <libkern/OSAtomic.h>
80 #include <machine/pal_routines.h>
81 #include <machine/atomic.h>
86 * https://coreoswiki.apple.com/wiki/pages/U6z3i0q9/Consistent_Logging_Implementers_Guide.html
88 * IOP(s) are auxiliary cores that want to participate in kdebug event logging.
89 * They are registered dynamically. Each is assigned a cpu_id at registration.
91 * NOTE: IOP trace events may not use the same clock hardware as "normal"
92 * cpus. There is an effort made to synchronize the IOP timebase with the
93 * AP, but it should be understood that there may be discrepancies.
95 * Once registered, an IOP is permanent, it cannot be unloaded/unregistered.
96 * The current implementation depends on this for thread safety.
98 * New registrations occur by allocating an kd_iop struct and assigning
99 * a provisional cpu_id of list_head->cpu_id + 1. Then a CAS to claim the
100 * list_head pointer resolves any races.
102 * You may safely walk the kd_iops list at any time, without holding locks.
104 * When allocating buffers, the current kd_iops head is captured. Any operations
105 * that depend on the buffer state (such as flushing IOP traces on reads,
106 * etc.) should use the captured list head. This will allow registrations to
107 * take place while trace is in use.
110 typedef struct kd_iop
{
111 kd_callback_t callback
;
113 uint64_t last_timestamp
; /* Prevent timer rollback */
117 static kd_iop_t
* kd_iops
= NULL
;
122 * A typefilter is a 8KB bitmap that is used to selectively filter events
123 * being recorded. It is able to individually address every class & subclass.
125 * There is a shared typefilter in the kernel which is lazily allocated. Once
126 * allocated, the shared typefilter is never deallocated. The shared typefilter
127 * is also mapped on demand into userspace processes that invoke kdebug_trace
128 * API from Libsyscall. When mapped into a userspace process, the memory is
129 * read only, and does not have a fixed address.
131 * It is a requirement that the kernel's shared typefilter always pass DBG_TRACE
132 * events. This is enforced automatically, by having the needed bits set any
133 * time the shared typefilter is mutated.
136 typedef uint8_t* typefilter_t
;
138 static typefilter_t kdbg_typefilter
;
139 static mach_port_t kdbg_typefilter_memory_entry
;
142 * There are 3 combinations of page sizes:
148 * The typefilter is exactly 8KB. In the first two scenarios, we would like
149 * to use 2 pages exactly; in the third scenario we must make certain that
150 * a full page is allocated so we do not inadvertantly share 8KB of random
151 * data to userspace. The round_page_32 macro rounds to kernel page size.
153 #define TYPEFILTER_ALLOC_SIZE MAX(round_page_32(KDBG_TYPEFILTER_BITMAP_SIZE), KDBG_TYPEFILTER_BITMAP_SIZE)
156 typefilter_create(void)
159 if (KERN_SUCCESS
== kmem_alloc(kernel_map
, (vm_offset_t
*)&tf
, TYPEFILTER_ALLOC_SIZE
, VM_KERN_MEMORY_DIAG
)) {
160 memset(&tf
[KDBG_TYPEFILTER_BITMAP_SIZE
], 0, TYPEFILTER_ALLOC_SIZE
- KDBG_TYPEFILTER_BITMAP_SIZE
);
167 typefilter_deallocate(typefilter_t tf
)
170 assert(tf
!= kdbg_typefilter
);
171 kmem_free(kernel_map
, (vm_offset_t
)tf
, TYPEFILTER_ALLOC_SIZE
);
175 typefilter_copy(typefilter_t dst
, typefilter_t src
)
179 memcpy(dst
, src
, KDBG_TYPEFILTER_BITMAP_SIZE
);
183 typefilter_reject_all(typefilter_t tf
)
186 memset(tf
, 0, KDBG_TYPEFILTER_BITMAP_SIZE
);
190 typefilter_allow_all(typefilter_t tf
)
193 memset(tf
, ~0, KDBG_TYPEFILTER_BITMAP_SIZE
);
197 typefilter_allow_class(typefilter_t tf
, uint8_t class)
200 const uint32_t BYTES_PER_CLASS
= 256 / 8; // 256 subclasses, 1 bit each
201 memset(&tf
[class * BYTES_PER_CLASS
], 0xFF, BYTES_PER_CLASS
);
205 typefilter_allow_csc(typefilter_t tf
, uint16_t csc
)
212 typefilter_is_debugid_allowed(typefilter_t tf
, uint32_t id
)
215 return isset(tf
, KDBG_EXTRACT_CSC(id
));
219 typefilter_create_memory_entry(typefilter_t tf
)
223 mach_port_t memory_entry
= MACH_PORT_NULL
;
224 memory_object_size_t size
= TYPEFILTER_ALLOC_SIZE
;
226 mach_make_memory_entry_64(kernel_map
,
228 (memory_object_offset_t
)tf
,
236 static int kdbg_copyin_typefilter(user_addr_t addr
, size_t size
);
237 static void kdbg_enable_typefilter(void);
238 static void kdbg_disable_typefilter(void);
241 * External prototypes
244 void task_act_iterate_wth_args(task_t
, void (*)(thread_t
, void *), void *);
245 int cpu_number(void); /* XXX <machine/...> include path broken */
246 void commpage_update_kdebug_state(void); /* XXX sign */
248 extern int log_leaks
;
251 * This flag is for testing purposes only -- it's highly experimental and tools
252 * have not been updated to support it.
254 static bool kdbg_continuous_time
= false;
256 static inline uint64_t
259 if (kdbg_continuous_time
) {
260 return mach_continuous_time();
262 return mach_absolute_time();
266 static int kdbg_debug
= 0;
268 #if KDEBUG_MOJO_TRACE
269 #include <sys/kdebugevents.h>
270 static void kdebug_serial_print( /* forward */
271 uint32_t, uint32_t, uint64_t,
272 uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
275 int kdbg_control(int *, u_int
, user_addr_t
, size_t *);
277 static int kdbg_read(user_addr_t
, size_t *, vnode_t
, vfs_context_t
, uint32_t);
278 static int kdbg_readcpumap(user_addr_t
, size_t *);
279 static int kdbg_readthrmap_v3(user_addr_t
, size_t, int);
280 static int kdbg_readcurthrmap(user_addr_t
, size_t *);
281 static int kdbg_setreg(kd_regtype
*);
282 static int kdbg_setpidex(kd_regtype
*);
283 static int kdbg_setpid(kd_regtype
*);
284 static void kdbg_thrmap_init(void);
285 static int kdbg_reinit(boolean_t
);
286 static int kdbg_bootstrap(boolean_t
);
287 static int kdbg_test(size_t flavor
);
289 static int kdbg_write_v1_header(boolean_t write_thread_map
, vnode_t vp
, vfs_context_t ctx
);
290 static int kdbg_write_thread_map(vnode_t vp
, vfs_context_t ctx
);
291 static int kdbg_copyout_thread_map(user_addr_t buffer
, size_t *buffer_size
);
292 static void kdbg_clear_thread_map(void);
294 static boolean_t
kdbg_wait(uint64_t timeout_ms
, boolean_t locked_wait
);
295 static void kdbg_wakeup(void);
297 int kdbg_cpumap_init_internal(kd_iop_t
* iops
, uint32_t cpu_count
,
298 uint8_t** cpumap
, uint32_t* cpumap_size
);
300 static kd_threadmap
*kdbg_thrmap_init_internal(unsigned int count
,
301 unsigned int *mapsize
,
302 unsigned int *mapcount
);
304 static boolean_t
kdebug_current_proc_enabled(uint32_t debugid
);
305 static errno_t
kdebug_check_trace_string(uint32_t debugid
, uint64_t str_id
);
307 int kdbg_write_v3_header(user_addr_t
, size_t *, int);
308 int kdbg_write_v3_chunk_header(user_addr_t buffer
, uint32_t tag
,
309 uint32_t sub_tag
, uint64_t length
,
310 vnode_t vp
, vfs_context_t ctx
);
312 user_addr_t
kdbg_write_v3_event_chunk_header(user_addr_t buffer
, uint32_t tag
,
313 uint64_t length
, vnode_t vp
,
318 static int create_buffers(boolean_t
);
319 static void delete_buffers(void);
321 extern int tasks_count
;
322 extern int threads_count
;
323 extern void IOSleep(int);
325 /* trace enable status */
326 unsigned int kdebug_enable
= 0;
328 /* A static buffer to record events prior to the start of regular logging */
330 #define KD_EARLY_BUFFER_SIZE (16 * 1024)
331 #define KD_EARLY_BUFFER_NBUFS (KD_EARLY_BUFFER_SIZE / sizeof(kd_buf))
334 * On embedded, the space for this is carved out by osfmk/arm/data.s -- clang
335 * has problems aligning to greater than 4K.
337 extern kd_buf kd_early_buffer
[KD_EARLY_BUFFER_NBUFS
];
338 #else /* CONFIG_EMBEDDED */
339 __attribute__((aligned(KD_EARLY_BUFFER_SIZE
)))
340 static kd_buf kd_early_buffer
[KD_EARLY_BUFFER_NBUFS
];
341 #endif /* !CONFIG_EMBEDDED */
343 static unsigned int kd_early_index
= 0;
344 static bool kd_early_overflow
= false;
345 static bool kd_early_done
= false;
347 #define SLOW_NOLOG 0x01
348 #define SLOW_CHECKS 0x02
350 #define EVENTS_PER_STORAGE_UNIT 2048
351 #define MIN_STORAGE_UNITS_PER_CPU 4
353 #define POINTER_FROM_KDS_PTR(x) (&kd_bufs[x.buffer_index].kdsb_addr[x.offset])
357 uint32_t buffer_index
:21;
364 union kds_ptr kds_next
;
365 uint32_t kds_bufindx
;
367 uint32_t kds_readlast
;
368 boolean_t kds_lostevents
;
369 uint64_t kds_timestamp
;
371 kd_buf kds_records
[EVENTS_PER_STORAGE_UNIT
];
374 #define MAX_BUFFER_SIZE (1024 * 1024 * 128)
375 #define N_STORAGE_UNITS_PER_BUFFER (MAX_BUFFER_SIZE / sizeof(struct kd_storage))
376 static_assert(N_STORAGE_UNITS_PER_BUFFER
<= 0x7ff,
377 "shoudn't overflow kds_ptr.offset");
379 struct kd_storage_buffers
{
380 struct kd_storage
*kdsb_addr
;
384 #define KDS_PTR_NULL 0xffffffff
385 struct kd_storage_buffers
*kd_bufs
= NULL
;
386 int n_storage_units
= 0;
387 unsigned int n_storage_buffers
= 0;
388 int n_storage_threshold
= 0;
393 union kds_ptr kd_list_head
;
394 union kds_ptr kd_list_tail
;
395 boolean_t kd_lostevents
;
397 uint64_t kd_prev_timebase
;
399 } __attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE
)));
403 * In principle, this control block can be shared in DRAM with other
404 * coprocessors and runtimes, for configuring what tracing is enabled.
406 struct kd_ctrl_page_t
{
407 union kds_ptr kds_free_list
;
411 uint32_t kdebug_flags
;
412 uint32_t kdebug_slowcheck
;
413 uint64_t oldest_time
;
415 * The number of kd_bufinfo structs allocated may not match the current
416 * number of active cpus. We capture the iops list head at initialization
417 * which we could use to calculate the number of cpus we allocated data for,
418 * unless it happens to be null. To avoid that case, we explicitly also
419 * capture a cpu count.
421 kd_iop_t
* kdebug_iops
;
422 uint32_t kdebug_cpus
;
424 .kds_free_list
= {.raw
= KDS_PTR_NULL
},
425 .kdebug_slowcheck
= SLOW_NOLOG
,
431 struct kd_bufinfo
*kdbip
= NULL
;
433 #define KDCOPYBUF_COUNT 8192
434 #define KDCOPYBUF_SIZE (KDCOPYBUF_COUNT * sizeof(kd_buf))
436 #define PAGE_4KB 4096
437 #define PAGE_16KB 16384
439 kd_buf
*kdcopybuf
= NULL
;
441 unsigned int nkdbufs
= 0;
442 unsigned int kdlog_beg
= 0;
443 unsigned int kdlog_end
= 0;
444 unsigned int kdlog_value1
= 0;
445 unsigned int kdlog_value2
= 0;
446 unsigned int kdlog_value3
= 0;
447 unsigned int kdlog_value4
= 0;
449 static lck_spin_t
* kdw_spin_lock
;
450 static lck_spin_t
* kds_spin_lock
;
452 kd_threadmap
*kd_mapptr
= 0;
453 unsigned int kd_mapsize
= 0;
454 unsigned int kd_mapcount
= 0;
456 off_t RAW_file_offset
= 0;
457 int RAW_file_written
= 0;
459 #define RAW_FLUSH_SIZE (2 * 1024 * 1024)
462 * A globally increasing counter for identifying strings in trace. Starts at
463 * 1 because 0 is a reserved return value.
465 __attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE
)))
466 static uint64_t g_curr_str_id
= 1;
468 #define STR_ID_SIG_OFFSET (48)
469 #define STR_ID_MASK ((1ULL << STR_ID_SIG_OFFSET) - 1)
470 #define STR_ID_SIG_MASK (~STR_ID_MASK)
473 * A bit pattern for identifying string IDs generated by
474 * kdebug_trace_string(2).
476 static uint64_t g_str_id_signature
= (0x70acULL
<< STR_ID_SIG_OFFSET
);
478 #define INTERRUPT 0x01050000
479 #define MACH_vmfault 0x01300008
480 #define BSC_SysCall 0x040c0000
481 #define MACH_SysCall 0x010c0000
483 /* task to string structure */
485 task_t task
; /* from procs task */
486 pid_t pid
; /* from procs p_pid */
487 char task_comm
[20];/* from procs p_comm */
490 typedef struct tts tts_t
;
493 kd_threadmap
*map
; /* pointer to the map buffer */
499 typedef struct krt krt_t
;
502 kdbg_cpu_count(boolean_t early_trace
)
506 return ml_get_cpu_count();
512 host_basic_info_data_t hinfo
;
513 mach_msg_type_number_t count
= HOST_BASIC_INFO_COUNT
;
514 host_info((host_t
)1 /* BSD_HOST */, HOST_BASIC_INFO
, (host_info_t
)&hinfo
, &count
);
515 assert(hinfo
.logical_cpu_max
> 0);
516 return hinfo
.logical_cpu_max
;
522 kdbg_iop_list_is_valid(kd_iop_t
* iop
)
525 /* Is list sorted by cpu_id? */
526 kd_iop_t
* temp
= iop
;
528 assert(!temp
->next
|| temp
->next
->cpu_id
== temp
->cpu_id
- 1);
529 assert(temp
->next
|| (temp
->cpu_id
== kdbg_cpu_count(FALSE
) || temp
->cpu_id
== kdbg_cpu_count(TRUE
)));
530 } while ((temp
= temp
->next
));
532 /* Does each entry have a function and a name? */
535 assert(temp
->callback
.func
);
536 assert(strlen(temp
->callback
.iop_name
) < sizeof(temp
->callback
.iop_name
));
537 } while ((temp
= temp
->next
));
544 kdbg_iop_list_contains_cpu_id(kd_iop_t
* list
, uint32_t cpu_id
)
547 if (list
->cpu_id
== cpu_id
) {
555 #endif /* CONFIG_EMBEDDED */
556 #endif /* MACH_ASSERT */
559 kdbg_iop_list_callback(kd_iop_t
* iop
, kd_callback_type type
, void* arg
)
562 iop
->callback
.func(iop
->callback
.context
, type
, arg
);
567 static lck_grp_t
*kdebug_lck_grp
= NULL
;
570 kdbg_set_tracing_enabled(boolean_t enabled
, uint32_t trace_type
)
572 int s
= ml_set_interrupts_enabled(FALSE
);
573 lck_spin_lock_grp(kds_spin_lock
, kdebug_lck_grp
);
576 * The oldest valid time is now; reject old events from IOPs.
578 kd_ctrl_page
.oldest_time
= kdbg_timestamp();
579 kdebug_enable
|= trace_type
;
580 kd_ctrl_page
.kdebug_slowcheck
&= ~SLOW_NOLOG
;
581 kd_ctrl_page
.enabled
= 1;
582 commpage_update_kdebug_state();
584 kdebug_enable
&= ~(KDEBUG_ENABLE_TRACE
| KDEBUG_ENABLE_PPT
);
585 kd_ctrl_page
.kdebug_slowcheck
|= SLOW_NOLOG
;
586 kd_ctrl_page
.enabled
= 0;
587 commpage_update_kdebug_state();
589 lck_spin_unlock(kds_spin_lock
);
590 ml_set_interrupts_enabled(s
);
593 kdbg_iop_list_callback(kd_ctrl_page
.kdebug_iops
, KD_CALLBACK_KDEBUG_ENABLED
, NULL
);
596 * If you do not flush the IOP trace buffers, they can linger
597 * for a considerable period; consider code which disables and
598 * deallocates without a final sync flush.
600 kdbg_iop_list_callback(kd_ctrl_page
.kdebug_iops
, KD_CALLBACK_KDEBUG_DISABLED
, NULL
);
601 kdbg_iop_list_callback(kd_ctrl_page
.kdebug_iops
, KD_CALLBACK_SYNC_FLUSH
, NULL
);
606 kdbg_set_flags(int slowflag
, int enableflag
, boolean_t enabled
)
608 int s
= ml_set_interrupts_enabled(FALSE
);
609 lck_spin_lock_grp(kds_spin_lock
, kdebug_lck_grp
);
612 kd_ctrl_page
.kdebug_slowcheck
|= slowflag
;
613 kdebug_enable
|= enableflag
;
615 kd_ctrl_page
.kdebug_slowcheck
&= ~slowflag
;
616 kdebug_enable
&= ~enableflag
;
619 lck_spin_unlock(kds_spin_lock
);
620 ml_set_interrupts_enabled(s
);
624 * Disable wrapping and return true if trace wrapped, false otherwise.
627 disable_wrap(uint32_t *old_slowcheck
, uint32_t *old_flags
)
630 int s
= ml_set_interrupts_enabled(FALSE
);
631 lck_spin_lock_grp(kds_spin_lock
, kdebug_lck_grp
);
633 *old_slowcheck
= kd_ctrl_page
.kdebug_slowcheck
;
634 *old_flags
= kd_ctrl_page
.kdebug_flags
;
636 wrapped
= kd_ctrl_page
.kdebug_flags
& KDBG_WRAPPED
;
637 kd_ctrl_page
.kdebug_flags
&= ~KDBG_WRAPPED
;
638 kd_ctrl_page
.kdebug_flags
|= KDBG_NOWRAP
;
640 lck_spin_unlock(kds_spin_lock
);
641 ml_set_interrupts_enabled(s
);
647 enable_wrap(uint32_t old_slowcheck
)
649 int s
= ml_set_interrupts_enabled(FALSE
);
650 lck_spin_lock_grp(kds_spin_lock
, kdebug_lck_grp
);
652 kd_ctrl_page
.kdebug_flags
&= ~KDBG_NOWRAP
;
654 if (!(old_slowcheck
& SLOW_NOLOG
)) {
655 kd_ctrl_page
.kdebug_slowcheck
&= ~SLOW_NOLOG
;
658 lck_spin_unlock(kds_spin_lock
);
659 ml_set_interrupts_enabled(s
);
663 create_buffers(boolean_t early_trace
)
666 unsigned int p_buffer_size
;
667 unsigned int f_buffer_size
;
668 unsigned int f_buffers
;
672 * For the duration of this allocation, trace code will only reference
673 * kdebug_iops. Any iops registered after this enabling will not be
674 * messaged until the buffers are reallocated.
676 * TLDR; Must read kd_iops once and only once!
678 kd_ctrl_page
.kdebug_iops
= kd_iops
;
681 assert(kdbg_iop_list_is_valid(kd_ctrl_page
.kdebug_iops
));
685 * If the list is valid, it is sorted, newest -> oldest. Each iop entry
686 * has a cpu_id of "the older entry + 1", so the highest cpu_id will
687 * be the list head + 1.
690 kd_ctrl_page
.kdebug_cpus
= kd_ctrl_page
.kdebug_iops
? kd_ctrl_page
.kdebug_iops
->cpu_id
+ 1 : kdbg_cpu_count(early_trace
);
692 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&kdbip
, sizeof(struct kd_bufinfo
) * kd_ctrl_page
.kdebug_cpus
, VM_KERN_MEMORY_DIAG
) != KERN_SUCCESS
) {
697 if (nkdbufs
< (kd_ctrl_page
.kdebug_cpus
* EVENTS_PER_STORAGE_UNIT
* MIN_STORAGE_UNITS_PER_CPU
)) {
698 n_storage_units
= kd_ctrl_page
.kdebug_cpus
* MIN_STORAGE_UNITS_PER_CPU
;
700 n_storage_units
= nkdbufs
/ EVENTS_PER_STORAGE_UNIT
;
703 nkdbufs
= n_storage_units
* EVENTS_PER_STORAGE_UNIT
;
705 f_buffers
= n_storage_units
/ N_STORAGE_UNITS_PER_BUFFER
;
706 n_storage_buffers
= f_buffers
;
708 f_buffer_size
= N_STORAGE_UNITS_PER_BUFFER
* sizeof(struct kd_storage
);
709 p_buffer_size
= (n_storage_units
% N_STORAGE_UNITS_PER_BUFFER
) * sizeof(struct kd_storage
);
717 if (kdcopybuf
== 0) {
718 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&kdcopybuf
, (vm_size_t
)KDCOPYBUF_SIZE
, VM_KERN_MEMORY_DIAG
) != KERN_SUCCESS
) {
723 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&kd_bufs
, (vm_size_t
)(n_storage_buffers
* sizeof(struct kd_storage_buffers
)), VM_KERN_MEMORY_DIAG
) != KERN_SUCCESS
) {
727 bzero(kd_bufs
, n_storage_buffers
* sizeof(struct kd_storage_buffers
));
729 for (i
= 0; i
< f_buffers
; i
++) {
730 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&kd_bufs
[i
].kdsb_addr
, (vm_size_t
)f_buffer_size
, VM_KERN_MEMORY_DIAG
) != KERN_SUCCESS
) {
734 bzero(kd_bufs
[i
].kdsb_addr
, f_buffer_size
);
736 kd_bufs
[i
].kdsb_size
= f_buffer_size
;
739 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&kd_bufs
[i
].kdsb_addr
, (vm_size_t
)p_buffer_size
, VM_KERN_MEMORY_DIAG
) != KERN_SUCCESS
) {
743 bzero(kd_bufs
[i
].kdsb_addr
, p_buffer_size
);
745 kd_bufs
[i
].kdsb_size
= p_buffer_size
;
749 for (i
= 0; i
< n_storage_buffers
; i
++) {
750 struct kd_storage
*kds
;
754 n_elements
= kd_bufs
[i
].kdsb_size
/ sizeof(struct kd_storage
);
755 kds
= kd_bufs
[i
].kdsb_addr
;
757 for (n
= 0; n
< n_elements
; n
++) {
758 kds
[n
].kds_next
.buffer_index
= kd_ctrl_page
.kds_free_list
.buffer_index
;
759 kds
[n
].kds_next
.offset
= kd_ctrl_page
.kds_free_list
.offset
;
761 kd_ctrl_page
.kds_free_list
.buffer_index
= i
;
762 kd_ctrl_page
.kds_free_list
.offset
= n
;
764 n_storage_units
+= n_elements
;
767 bzero((char *)kdbip
, sizeof(struct kd_bufinfo
) * kd_ctrl_page
.kdebug_cpus
);
769 for (i
= 0; i
< kd_ctrl_page
.kdebug_cpus
; i
++) {
770 kdbip
[i
].kd_list_head
.raw
= KDS_PTR_NULL
;
771 kdbip
[i
].kd_list_tail
.raw
= KDS_PTR_NULL
;
772 kdbip
[i
].kd_lostevents
= FALSE
;
773 kdbip
[i
].num_bufs
= 0;
776 kd_ctrl_page
.kdebug_flags
|= KDBG_BUFINIT
;
778 kd_ctrl_page
.kds_inuse_count
= 0;
779 n_storage_threshold
= n_storage_units
/ 2;
794 for (i
= 0; i
< n_storage_buffers
; i
++) {
795 if (kd_bufs
[i
].kdsb_addr
) {
796 kmem_free(kernel_map
, (vm_offset_t
)kd_bufs
[i
].kdsb_addr
, (vm_size_t
)kd_bufs
[i
].kdsb_size
);
799 kmem_free(kernel_map
, (vm_offset_t
)kd_bufs
, (vm_size_t
)(n_storage_buffers
* sizeof(struct kd_storage_buffers
)));
802 n_storage_buffers
= 0;
805 kmem_free(kernel_map
, (vm_offset_t
)kdcopybuf
, KDCOPYBUF_SIZE
);
809 kd_ctrl_page
.kds_free_list
.raw
= KDS_PTR_NULL
;
812 kmem_free(kernel_map
, (vm_offset_t
)kdbip
, sizeof(struct kd_bufinfo
) * kd_ctrl_page
.kdebug_cpus
);
816 kd_ctrl_page
.kdebug_iops
= NULL
;
817 kd_ctrl_page
.kdebug_cpus
= 0;
818 kd_ctrl_page
.kdebug_flags
&= ~KDBG_BUFINIT
;
822 release_storage_unit(int cpu
, uint32_t kdsp_raw
)
825 struct kd_storage
*kdsp_actual
;
826 struct kd_bufinfo
*kdbp
;
831 s
= ml_set_interrupts_enabled(FALSE
);
832 lck_spin_lock_grp(kds_spin_lock
, kdebug_lck_grp
);
836 if (kdsp
.raw
== kdbp
->kd_list_head
.raw
) {
838 * it's possible for the storage unit pointed to
839 * by kdsp to have already been stolen... so
840 * check to see if it's still the head of the list
841 * now that we're behind the lock that protects
842 * adding and removing from the queue...
843 * since we only ever release and steal units from
844 * that position, if it's no longer the head
845 * we having nothing to do in this context
847 kdsp_actual
= POINTER_FROM_KDS_PTR(kdsp
);
848 kdbp
->kd_list_head
= kdsp_actual
->kds_next
;
850 kdsp_actual
->kds_next
= kd_ctrl_page
.kds_free_list
;
851 kd_ctrl_page
.kds_free_list
= kdsp
;
853 kd_ctrl_page
.kds_inuse_count
--;
855 lck_spin_unlock(kds_spin_lock
);
856 ml_set_interrupts_enabled(s
);
861 allocate_storage_unit(int cpu
)
864 struct kd_storage
*kdsp_actual
, *kdsp_next_actual
;
865 struct kd_bufinfo
*kdbp
, *kdbp_vict
, *kdbp_try
;
866 uint64_t oldest_ts
, ts
;
867 boolean_t retval
= TRUE
;
870 s
= ml_set_interrupts_enabled(FALSE
);
871 lck_spin_lock_grp(kds_spin_lock
, kdebug_lck_grp
);
875 /* If someone beat us to the allocate, return success */
876 if (kdbp
->kd_list_tail
.raw
!= KDS_PTR_NULL
) {
877 kdsp_actual
= POINTER_FROM_KDS_PTR(kdbp
->kd_list_tail
);
879 if (kdsp_actual
->kds_bufindx
< EVENTS_PER_STORAGE_UNIT
) {
884 if ((kdsp
= kd_ctrl_page
.kds_free_list
).raw
!= KDS_PTR_NULL
) {
886 * If there's a free page, grab it from the free list.
888 kdsp_actual
= POINTER_FROM_KDS_PTR(kdsp
);
889 kd_ctrl_page
.kds_free_list
= kdsp_actual
->kds_next
;
891 kd_ctrl_page
.kds_inuse_count
++;
894 * Otherwise, we're going to lose events and repurpose the oldest
895 * storage unit we can find.
897 if (kd_ctrl_page
.kdebug_flags
& KDBG_NOWRAP
) {
898 kd_ctrl_page
.kdebug_slowcheck
|= SLOW_NOLOG
;
899 kdbp
->kd_lostevents
= TRUE
;
904 oldest_ts
= UINT64_MAX
;
906 for (kdbp_try
= &kdbip
[0]; kdbp_try
< &kdbip
[kd_ctrl_page
.kdebug_cpus
]; kdbp_try
++) {
907 if (kdbp_try
->kd_list_head
.raw
== KDS_PTR_NULL
) {
909 * no storage unit to steal
914 kdsp_actual
= POINTER_FROM_KDS_PTR(kdbp_try
->kd_list_head
);
916 if (kdsp_actual
->kds_bufcnt
< EVENTS_PER_STORAGE_UNIT
) {
918 * make sure we don't steal the storage unit
919 * being actively recorded to... need to
920 * move on because we don't want an out-of-order
921 * set of events showing up later
927 * When wrapping, steal the storage unit with the
928 * earliest timestamp on its last event, instead of the
929 * earliest timestamp on the first event. This allows a
930 * storage unit with more recent events to be preserved,
931 * even if the storage unit contains events that are
932 * older than those found in other CPUs.
934 ts
= kdbg_get_timestamp(&kdsp_actual
->kds_records
[EVENTS_PER_STORAGE_UNIT
- 1]);
935 if (ts
< oldest_ts
) {
937 kdbp_vict
= kdbp_try
;
940 if (kdbp_vict
== NULL
) {
942 kd_ctrl_page
.enabled
= 0;
943 commpage_update_kdebug_state();
947 kdsp
= kdbp_vict
->kd_list_head
;
948 kdsp_actual
= POINTER_FROM_KDS_PTR(kdsp
);
949 kdbp_vict
->kd_list_head
= kdsp_actual
->kds_next
;
951 if (kdbp_vict
->kd_list_head
.raw
!= KDS_PTR_NULL
) {
952 kdsp_next_actual
= POINTER_FROM_KDS_PTR(kdbp_vict
->kd_list_head
);
953 kdsp_next_actual
->kds_lostevents
= TRUE
;
955 kdbp_vict
->kd_lostevents
= TRUE
;
958 if (kd_ctrl_page
.oldest_time
< oldest_ts
) {
959 kd_ctrl_page
.oldest_time
= oldest_ts
;
961 kd_ctrl_page
.kdebug_flags
|= KDBG_WRAPPED
;
963 kdsp_actual
->kds_timestamp
= kdbg_timestamp();
964 kdsp_actual
->kds_next
.raw
= KDS_PTR_NULL
;
965 kdsp_actual
->kds_bufcnt
= 0;
966 kdsp_actual
->kds_readlast
= 0;
968 kdsp_actual
->kds_lostevents
= kdbp
->kd_lostevents
;
969 kdbp
->kd_lostevents
= FALSE
;
970 kdsp_actual
->kds_bufindx
= 0;
972 if (kdbp
->kd_list_head
.raw
== KDS_PTR_NULL
) {
973 kdbp
->kd_list_head
= kdsp
;
975 POINTER_FROM_KDS_PTR(kdbp
->kd_list_tail
)->kds_next
= kdsp
;
977 kdbp
->kd_list_tail
= kdsp
;
979 lck_spin_unlock(kds_spin_lock
);
980 ml_set_interrupts_enabled(s
);
986 kernel_debug_register_callback(kd_callback_t callback
)
989 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&iop
, sizeof(kd_iop_t
), VM_KERN_MEMORY_DIAG
) == KERN_SUCCESS
) {
990 memcpy(&iop
->callback
, &callback
, sizeof(kd_callback_t
));
993 * <rdar://problem/13351477> Some IOP clients are not providing a name.
998 boolean_t is_valid_name
= FALSE
;
999 for (uint32_t length
= 0; length
< sizeof(callback
.iop_name
); ++length
) {
1000 /* This is roughly isprintable(c) */
1001 if (callback
.iop_name
[length
] > 0x20 && callback
.iop_name
[length
] < 0x7F) {
1004 if (callback
.iop_name
[length
] == 0) {
1006 is_valid_name
= TRUE
;
1012 if (!is_valid_name
) {
1013 strlcpy(iop
->callback
.iop_name
, "IOP-???", sizeof(iop
->callback
.iop_name
));
1017 iop
->last_timestamp
= 0;
1021 * We use two pieces of state, the old list head
1022 * pointer, and the value of old_list_head->cpu_id.
1023 * If we read kd_iops more than once, it can change
1026 * TLDR; Must not read kd_iops more than once per loop.
1028 iop
->next
= kd_iops
;
1029 iop
->cpu_id
= iop
->next
? (iop
->next
->cpu_id
+ 1) : kdbg_cpu_count(FALSE
);
1032 * Header says OSCompareAndSwapPtr has a memory barrier
1034 } while (!OSCompareAndSwapPtr(iop
->next
, iop
, (void* volatile*)&kd_iops
));
1056 struct kd_bufinfo
*kdbp
;
1057 struct kd_storage
*kdsp_actual
;
1058 union kds_ptr kds_raw
;
1060 if (kd_ctrl_page
.kdebug_slowcheck
) {
1061 if ((kd_ctrl_page
.kdebug_slowcheck
& SLOW_NOLOG
) || !(kdebug_enable
& (KDEBUG_ENABLE_TRACE
| KDEBUG_ENABLE_PPT
))) {
1065 if (kd_ctrl_page
.kdebug_flags
& KDBG_TYPEFILTER_CHECK
) {
1066 if (typefilter_is_debugid_allowed(kdbg_typefilter
, debugid
)) {
1070 } else if (kd_ctrl_page
.kdebug_flags
& KDBG_RANGECHECK
) {
1071 if (debugid
>= kdlog_beg
&& debugid
<= kdlog_end
) {
1075 } else if (kd_ctrl_page
.kdebug_flags
& KDBG_VALCHECK
) {
1076 if ((debugid
& KDBG_EVENTID_MASK
) != kdlog_value1
&&
1077 (debugid
& KDBG_EVENTID_MASK
) != kdlog_value2
&&
1078 (debugid
& KDBG_EVENTID_MASK
) != kdlog_value3
&&
1079 (debugid
& KDBG_EVENTID_MASK
) != kdlog_value4
) {
1086 if (timestamp
< kd_ctrl_page
.oldest_time
) {
1092 * When start_kern_tracing is called by the kernel to trace very
1093 * early kernel events, it saves data to a secondary buffer until
1094 * it is possible to initialize ktrace, and then dumps the events
1095 * into the ktrace buffer using this method. In this case, iops will
1096 * be NULL, and the coreid will be zero. It is not possible to have
1097 * a valid IOP coreid of zero, so pass if both iops is NULL and coreid
1100 assert(kdbg_iop_list_contains_cpu_id(kd_ctrl_page
.kdebug_iops
, coreid
) || (kd_ctrl_page
.kdebug_iops
== NULL
&& coreid
== 0));
1103 disable_preemption();
1105 if (kd_ctrl_page
.enabled
== 0) {
1109 kdbp
= &kdbip
[coreid
];
1110 timestamp
&= KDBG_TIMESTAMP_MASK
;
1112 #if KDEBUG_MOJO_TRACE
1113 if (kdebug_enable
& KDEBUG_ENABLE_SERIAL
) {
1114 kdebug_serial_print(coreid
, debugid
, timestamp
,
1115 arg1
, arg2
, arg3
, arg4
, threadid
);
1120 kds_raw
= kdbp
->kd_list_tail
;
1122 if (kds_raw
.raw
!= KDS_PTR_NULL
) {
1123 kdsp_actual
= POINTER_FROM_KDS_PTR(kds_raw
);
1124 bindx
= kdsp_actual
->kds_bufindx
;
1127 bindx
= EVENTS_PER_STORAGE_UNIT
;
1130 if (kdsp_actual
== NULL
|| bindx
>= EVENTS_PER_STORAGE_UNIT
) {
1131 if (allocate_storage_unit(coreid
) == FALSE
) {
1133 * this can only happen if wrapping
1140 if (!OSCompareAndSwap(bindx
, bindx
+ 1, &kdsp_actual
->kds_bufindx
)) {
1144 // IOP entries can be allocated before xnu allocates and inits the buffer
1145 if (timestamp
< kdsp_actual
->kds_timestamp
) {
1146 kdsp_actual
->kds_timestamp
= timestamp
;
1149 kd
= &kdsp_actual
->kds_records
[bindx
];
1151 kd
->debugid
= debugid
;
1156 kd
->arg5
= threadid
;
1158 kdbg_set_timestamp_and_cpu(kd
, timestamp
, coreid
);
1160 OSAddAtomic(1, &kdsp_actual
->kds_bufcnt
);
1162 enable_preemption();
1164 if ((kds_waiter
&& kd_ctrl_page
.kds_inuse_count
>= n_storage_threshold
)) {
1170 * Check if the given debug ID is allowed to be traced on the current process.
1172 * Returns true if allowed and false otherwise.
1175 kdebug_debugid_procfilt_allowed(uint32_t debugid
)
1177 uint32_t procfilt_flags
= kd_ctrl_page
.kdebug_flags
&
1178 (KDBG_PIDCHECK
| KDBG_PIDEXCLUDE
);
1180 if (!procfilt_flags
) {
1185 * DBG_TRACE and MACH_SCHED tracepoints ignore the process filter.
1187 if ((debugid
& 0xffff0000) == MACHDBG_CODE(DBG_MACH_SCHED
, 0) ||
1188 (debugid
>> 24 == DBG_TRACE
)) {
1192 struct proc
*curproc
= current_proc();
1194 * If the process is missing (early in boot), allow it.
1200 if (procfilt_flags
& KDBG_PIDCHECK
) {
1202 * Allow only processes marked with the kdebug bit.
1204 return curproc
->p_kdebug
;
1205 } else if (procfilt_flags
& KDBG_PIDEXCLUDE
) {
1207 * Exclude any process marked with the kdebug bit.
1209 return !curproc
->p_kdebug
;
1211 panic("kdebug: invalid procfilt flags %x", kd_ctrl_page
.kdebug_flags
);
1212 __builtin_unreachable();
1217 kernel_debug_internal(
1230 struct kd_bufinfo
*kdbp
;
1231 struct kd_storage
*kdsp_actual
;
1232 union kds_ptr kds_raw
;
1233 bool only_filter
= flags
& KDBG_FLAG_FILTERED
;
1234 bool observe_procfilt
= !(flags
& KDBG_FLAG_NOPROCFILT
);
1236 if (kd_ctrl_page
.kdebug_slowcheck
) {
1237 if ((kd_ctrl_page
.kdebug_slowcheck
& SLOW_NOLOG
) ||
1238 !(kdebug_enable
& (KDEBUG_ENABLE_TRACE
| KDEBUG_ENABLE_PPT
))) {
1242 if (!ml_at_interrupt_context() && observe_procfilt
&&
1243 !kdebug_debugid_procfilt_allowed(debugid
)) {
1247 if (kd_ctrl_page
.kdebug_flags
& KDBG_TYPEFILTER_CHECK
) {
1248 if (typefilter_is_debugid_allowed(kdbg_typefilter
, debugid
)) {
1253 } else if (only_filter
) {
1255 } else if (kd_ctrl_page
.kdebug_flags
& KDBG_RANGECHECK
) {
1256 /* Always record trace system info */
1257 if (KDBG_EXTRACT_CLASS(debugid
) == DBG_TRACE
) {
1261 if (debugid
< kdlog_beg
|| debugid
> kdlog_end
) {
1264 } else if (kd_ctrl_page
.kdebug_flags
& KDBG_VALCHECK
) {
1265 /* Always record trace system info */
1266 if (KDBG_EXTRACT_CLASS(debugid
) == DBG_TRACE
) {
1270 if ((debugid
& KDBG_EVENTID_MASK
) != kdlog_value1
&&
1271 (debugid
& KDBG_EVENTID_MASK
) != kdlog_value2
&&
1272 (debugid
& KDBG_EVENTID_MASK
) != kdlog_value3
&&
1273 (debugid
& KDBG_EVENTID_MASK
) != kdlog_value4
) {
1277 } else if (only_filter
) {
1282 disable_preemption();
1284 if (kd_ctrl_page
.enabled
== 0) {
1291 #if KDEBUG_MOJO_TRACE
1292 if (kdebug_enable
& KDEBUG_ENABLE_SERIAL
) {
1293 kdebug_serial_print(cpu
, debugid
,
1294 kdbg_timestamp() & KDBG_TIMESTAMP_MASK
,
1295 arg1
, arg2
, arg3
, arg4
, arg5
);
1300 kds_raw
= kdbp
->kd_list_tail
;
1302 if (kds_raw
.raw
!= KDS_PTR_NULL
) {
1303 kdsp_actual
= POINTER_FROM_KDS_PTR(kds_raw
);
1304 bindx
= kdsp_actual
->kds_bufindx
;
1307 bindx
= EVENTS_PER_STORAGE_UNIT
;
1310 if (kdsp_actual
== NULL
|| bindx
>= EVENTS_PER_STORAGE_UNIT
) {
1311 if (allocate_storage_unit(cpu
) == FALSE
) {
1313 * this can only happen if wrapping
1321 now
= kdbg_timestamp() & KDBG_TIMESTAMP_MASK
;
1323 if (!OSCompareAndSwap(bindx
, bindx
+ 1, &kdsp_actual
->kds_bufindx
)) {
1327 kd
= &kdsp_actual
->kds_records
[bindx
];
1329 kd
->debugid
= debugid
;
1336 kdbg_set_timestamp_and_cpu(kd
, now
, cpu
);
1338 OSAddAtomic(1, &kdsp_actual
->kds_bufcnt
);
1341 kperf_kdebug_callback(debugid
, __builtin_frame_address(0));
1344 enable_preemption();
1346 if (kds_waiter
&& kd_ctrl_page
.kds_inuse_count
>= n_storage_threshold
) {
1350 etype
= debugid
& KDBG_EVENTID_MASK
;
1351 stype
= debugid
& KDBG_CSC_MASK
;
1353 if (etype
== INTERRUPT
|| etype
== MACH_vmfault
||
1354 stype
== BSC_SysCall
|| stype
== MACH_SysCall
) {
1367 __unused
uintptr_t arg5
)
1369 kernel_debug_internal(debugid
, arg1
, arg2
, arg3
, arg4
,
1370 (uintptr_t)thread_tid(current_thread()), 0);
1382 kernel_debug_internal(debugid
, arg1
, arg2
, arg3
, arg4
, arg5
, 0);
1394 kernel_debug_internal(debugid
, arg1
, arg2
, arg3
, arg4
,
1395 (uintptr_t)thread_tid(current_thread()), flags
);
1399 kernel_debug_filtered(
1406 kernel_debug_flags(debugid
, arg1
, arg2
, arg3
, arg4
, KDBG_FLAG_FILTERED
);
1410 kernel_debug_string_early(const char *message
)
1412 uintptr_t arg
[4] = {0, 0, 0, 0};
1414 /* Stuff the message string in the args and log it. */
1415 strncpy((char *)arg
, message
, MIN(sizeof(arg
), strlen(message
)));
1418 arg
[0], arg
[1], arg
[2], arg
[3]);
1421 #define SIMPLE_STR_LEN (64)
1422 static_assert(SIMPLE_STR_LEN
% sizeof(uintptr_t) == 0);
1425 kernel_debug_string_simple(uint32_t eventid
, const char *str
)
1427 if (!kdebug_enable
) {
1431 /* array of uintptr_ts simplifies emitting the string as arguments */
1432 uintptr_t str_buf
[(SIMPLE_STR_LEN
/ sizeof(uintptr_t)) + 1] = { 0 };
1433 size_t len
= strlcpy((char *)str_buf
, str
, SIMPLE_STR_LEN
+ 1);
1435 uintptr_t thread_id
= (uintptr_t)thread_tid(current_thread());
1436 uint32_t debugid
= eventid
| DBG_FUNC_START
;
1438 /* string can fit in a single tracepoint */
1439 if (len
<= (4 * sizeof(uintptr_t))) {
1440 debugid
|= DBG_FUNC_END
;
1443 kernel_debug_internal(debugid
, str_buf
[0],
1446 str_buf
[3], thread_id
, 0);
1448 debugid
&= KDBG_EVENTID_MASK
;
1450 size_t written
= 4 * sizeof(uintptr_t);
1452 for (; written
< len
; i
+= 4, written
+= 4 * sizeof(uintptr_t)) {
1453 /* if this is the last tracepoint to be emitted */
1454 if ((written
+ (4 * sizeof(uintptr_t))) >= len
) {
1455 debugid
|= DBG_FUNC_END
;
1457 kernel_debug_internal(debugid
, str_buf
[i
],
1460 str_buf
[i
+ 3], thread_id
, 0);
1464 extern int master_cpu
; /* MACH_KERNEL_PRIVATE */
1466 * Used prior to start_kern_tracing() being called.
1467 * Log temporarily into a static buffer.
1477 #if defined(__x86_64__)
1478 extern int early_boot
;
1480 * Note that "early" isn't early enough in some cases where
1481 * we're invoked before gsbase is set on x86, hence the
1482 * check of "early_boot".
1489 /* If early tracing is over, use the normal path. */
1490 if (kd_early_done
) {
1491 KERNEL_DEBUG_CONSTANT(debugid
, arg1
, arg2
, arg3
, arg4
, 0);
1495 /* Do nothing if the buffer is full or we're not on the boot cpu. */
1496 kd_early_overflow
= kd_early_index
>= KD_EARLY_BUFFER_NBUFS
;
1497 if (kd_early_overflow
|| cpu_number() != master_cpu
) {
1501 kd_early_buffer
[kd_early_index
].debugid
= debugid
;
1502 kd_early_buffer
[kd_early_index
].timestamp
= mach_absolute_time();
1503 kd_early_buffer
[kd_early_index
].arg1
= arg1
;
1504 kd_early_buffer
[kd_early_index
].arg2
= arg2
;
1505 kd_early_buffer
[kd_early_index
].arg3
= arg3
;
1506 kd_early_buffer
[kd_early_index
].arg4
= arg4
;
1507 kd_early_buffer
[kd_early_index
].arg5
= 0;
1512 * Transfer the contents of the temporary buffer into the trace buffers.
1513 * Precede that by logging the rebase time (offset) - the TSC-based time (in ns)
1514 * when mach_absolute_time is set to 0.
1517 kernel_debug_early_end(void)
1519 if (cpu_number() != master_cpu
) {
1520 panic("kernel_debug_early_end() not call on boot processor");
1523 /* reset the current oldest time to allow early events */
1524 kd_ctrl_page
.oldest_time
= 0;
1526 #if !CONFIG_EMBEDDED
1527 /* Fake sentinel marking the start of kernel time relative to TSC */
1528 kernel_debug_enter(0,
1531 (uint32_t)(tsc_rebase_abs_time
>> 32),
1532 (uint32_t)tsc_rebase_abs_time
,
1537 for (unsigned int i
= 0; i
< kd_early_index
; i
++) {
1538 kernel_debug_enter(0,
1539 kd_early_buffer
[i
].debugid
,
1540 kd_early_buffer
[i
].timestamp
,
1541 kd_early_buffer
[i
].arg1
,
1542 kd_early_buffer
[i
].arg2
,
1543 kd_early_buffer
[i
].arg3
,
1544 kd_early_buffer
[i
].arg4
,
1548 /* Cut events-lost event on overflow */
1549 if (kd_early_overflow
) {
1550 KDBG_RELEASE(TRACE_LOST_EVENTS
, 1);
1553 kd_early_done
= true;
1555 /* This trace marks the start of kernel tracing */
1556 kernel_debug_string_early("early trace done");
1560 kernel_debug_disable(void)
1562 if (kdebug_enable
) {
1563 kdbg_set_tracing_enabled(FALSE
, 0);
1568 * Returns non-zero if debugid is in a reserved class.
1571 kdebug_validate_debugid(uint32_t debugid
)
1573 uint8_t debugid_class
;
1575 debugid_class
= KDBG_EXTRACT_CLASS(debugid
);
1576 switch (debugid_class
) {
1585 * Support syscall SYS_kdebug_typefilter.
1588 kdebug_typefilter(__unused
struct proc
* p
,
1589 struct kdebug_typefilter_args
* uap
,
1590 __unused
int *retval
)
1592 int ret
= KERN_SUCCESS
;
1594 if (uap
->addr
== USER_ADDR_NULL
||
1595 uap
->size
== USER_ADDR_NULL
) {
1600 * The atomic load is to close a race window with setting the typefilter
1601 * and memory entry values. A description follows:
1605 * Allocate Typefilter
1606 * Allocate MemoryEntry
1607 * Write Global MemoryEntry Ptr
1608 * Atomic Store (Release) Global Typefilter Ptr
1610 * Thread 2 (reader, AKA us)
1612 * if ((Atomic Load (Acquire) Global Typefilter Ptr) == NULL)
1615 * Without the atomic store, it isn't guaranteed that the write of
1616 * Global MemoryEntry Ptr is visible before we can see the write of
1617 * Global Typefilter Ptr.
1619 * Without the atomic load, it isn't guaranteed that the loads of
1620 * Global MemoryEntry Ptr aren't speculated.
1622 * The global pointers transition from NULL -> valid once and only once,
1623 * and never change after becoming valid. This means that having passed
1624 * the first atomic load test of Global Typefilter Ptr, this function
1625 * can then safely use the remaining global state without atomic checks.
1627 if (!__c11_atomic_load((_Atomic typefilter_t
*)&kdbg_typefilter
, memory_order_acquire
)) {
1631 assert(kdbg_typefilter_memory_entry
);
1633 mach_vm_offset_t user_addr
= 0;
1634 vm_map_t user_map
= current_map();
1636 ret
= mach_to_bsd_errno(
1637 mach_vm_map_kernel(user_map
, // target map
1638 &user_addr
, // [in, out] target address
1639 TYPEFILTER_ALLOC_SIZE
, // initial size
1640 0, // mask (alignment?)
1641 VM_FLAGS_ANYWHERE
, // flags
1642 VM_MAP_KERNEL_FLAGS_NONE
,
1643 VM_KERN_MEMORY_NONE
,
1644 kdbg_typefilter_memory_entry
, // port (memory entry!)
1645 0, // offset (in memory entry)
1646 FALSE
, // should copy
1647 VM_PROT_READ
, // cur_prot
1648 VM_PROT_READ
, // max_prot
1649 VM_INHERIT_SHARE
)); // inherit behavior on fork
1651 if (ret
== KERN_SUCCESS
) {
1652 vm_size_t user_ptr_size
= vm_map_is_64bit(user_map
) ? 8 : 4;
1653 ret
= copyout(CAST_DOWN(void *, &user_addr
), uap
->addr
, user_ptr_size
);
1655 if (ret
!= KERN_SUCCESS
) {
1656 mach_vm_deallocate(user_map
, user_addr
, TYPEFILTER_ALLOC_SIZE
);
1664 * Support syscall SYS_kdebug_trace. U64->K32 args may get truncated in kdebug_trace64
1667 kdebug_trace(struct proc
*p
, struct kdebug_trace_args
*uap
, int32_t *retval
)
1669 struct kdebug_trace64_args uap64
;
1671 uap64
.code
= uap
->code
;
1672 uap64
.arg1
= uap
->arg1
;
1673 uap64
.arg2
= uap
->arg2
;
1674 uap64
.arg3
= uap
->arg3
;
1675 uap64
.arg4
= uap
->arg4
;
1677 return kdebug_trace64(p
, &uap64
, retval
);
1681 * Support syscall SYS_kdebug_trace64. 64-bit args on K32 will get truncated
1682 * to fit in 32-bit record format.
1684 * It is intentional that error conditions are not checked until kdebug is
1685 * enabled. This is to match the userspace wrapper behavior, which is optimizing
1686 * for non-error case performance.
1689 kdebug_trace64(__unused
struct proc
*p
, struct kdebug_trace64_args
*uap
, __unused
int32_t *retval
)
1693 if (__probable(kdebug_enable
== 0)) {
1697 if ((err
= kdebug_validate_debugid(uap
->code
)) != 0) {
1701 kernel_debug_internal(uap
->code
, (uintptr_t)uap
->arg1
,
1702 (uintptr_t)uap
->arg2
, (uintptr_t)uap
->arg3
, (uintptr_t)uap
->arg4
,
1703 (uintptr_t)thread_tid(current_thread()), 0);
1709 * Adding enough padding to contain a full tracepoint for the last
1710 * portion of the string greatly simplifies the logic of splitting the
1711 * string between tracepoints. Full tracepoints can be generated using
1712 * the buffer itself, without having to manually add zeros to pad the
1716 /* 2 string args in first tracepoint and 9 string data tracepoints */
1717 #define STR_BUF_ARGS (2 + (9 * 4))
1718 /* times the size of each arg on K64 */
1719 #define MAX_STR_LEN (STR_BUF_ARGS * sizeof(uint64_t))
1720 /* on K32, ending straddles a tracepoint, so reserve blanks */
1721 #define STR_BUF_SIZE (MAX_STR_LEN + (2 * sizeof(uint32_t)))
1724 * This function does no error checking and assumes that it is called with
1725 * the correct arguments, including that the buffer pointed to by str is at
1726 * least STR_BUF_SIZE bytes. However, str must be aligned to word-size and
1727 * be NUL-terminated. In cases where a string can fit evenly into a final
1728 * tracepoint without its NUL-terminator, this function will not end those
1729 * strings with a NUL in trace. It's up to clients to look at the function
1730 * qualifier for DBG_FUNC_END in this case, to end the string.
1733 kernel_debug_string_internal(uint32_t debugid
, uint64_t str_id
, void *vstr
,
1736 /* str must be word-aligned */
1737 uintptr_t *str
= vstr
;
1739 uintptr_t thread_id
;
1741 uint32_t trace_debugid
= TRACEDBG_CODE(DBG_TRACE_STRING
,
1742 TRACE_STRING_GLOBAL
);
1744 thread_id
= (uintptr_t)thread_tid(current_thread());
1746 /* if the ID is being invalidated, just emit that */
1747 if (str_id
!= 0 && str_len
== 0) {
1748 kernel_debug_internal(trace_debugid
| DBG_FUNC_START
| DBG_FUNC_END
,
1749 (uintptr_t)debugid
, (uintptr_t)str_id
, 0, 0, thread_id
, 0);
1753 /* generate an ID, if necessary */
1755 str_id
= OSIncrementAtomic64((SInt64
*)&g_curr_str_id
);
1756 str_id
= (str_id
& STR_ID_MASK
) | g_str_id_signature
;
1759 trace_debugid
|= DBG_FUNC_START
;
1760 /* string can fit in a single tracepoint */
1761 if (str_len
<= (2 * sizeof(uintptr_t))) {
1762 trace_debugid
|= DBG_FUNC_END
;
1765 kernel_debug_internal(trace_debugid
, (uintptr_t)debugid
, (uintptr_t)str_id
,
1766 str
[0], str
[1], thread_id
, 0);
1768 trace_debugid
&= KDBG_EVENTID_MASK
;
1770 written
+= 2 * sizeof(uintptr_t);
1772 for (; written
< str_len
; i
+= 4, written
+= 4 * sizeof(uintptr_t)) {
1773 if ((written
+ (4 * sizeof(uintptr_t))) >= str_len
) {
1774 trace_debugid
|= DBG_FUNC_END
;
1776 kernel_debug_internal(trace_debugid
, str
[i
],
1779 str
[i
+ 3], thread_id
, 0);
1786 * Returns true if the current process can emit events, and false otherwise.
1787 * Trace system and scheduling events circumvent this check, as do events
1788 * emitted in interrupt context.
1791 kdebug_current_proc_enabled(uint32_t debugid
)
1793 /* can't determine current process in interrupt context */
1794 if (ml_at_interrupt_context()) {
1798 /* always emit trace system and scheduling events */
1799 if ((KDBG_EXTRACT_CLASS(debugid
) == DBG_TRACE
||
1800 (debugid
& KDBG_CSC_MASK
) == MACHDBG_CODE(DBG_MACH_SCHED
, 0))) {
1804 if (kd_ctrl_page
.kdebug_flags
& KDBG_PIDCHECK
) {
1805 proc_t cur_proc
= current_proc();
1807 /* only the process with the kdebug bit set is allowed */
1808 if (cur_proc
&& !(cur_proc
->p_kdebug
)) {
1811 } else if (kd_ctrl_page
.kdebug_flags
& KDBG_PIDEXCLUDE
) {
1812 proc_t cur_proc
= current_proc();
1814 /* every process except the one with the kdebug bit set is allowed */
1815 if (cur_proc
&& cur_proc
->p_kdebug
) {
1824 kdebug_debugid_enabled(uint32_t debugid
)
1826 /* if no filtering is enabled */
1827 if (!kd_ctrl_page
.kdebug_slowcheck
) {
1831 return kdebug_debugid_explicitly_enabled(debugid
);
1835 kdebug_debugid_explicitly_enabled(uint32_t debugid
)
1837 if (kd_ctrl_page
.kdebug_flags
& KDBG_TYPEFILTER_CHECK
) {
1838 return typefilter_is_debugid_allowed(kdbg_typefilter
, debugid
);
1839 } else if (KDBG_EXTRACT_CLASS(debugid
) == DBG_TRACE
) {
1841 } else if (kd_ctrl_page
.kdebug_flags
& KDBG_RANGECHECK
) {
1842 if (debugid
< kdlog_beg
|| debugid
> kdlog_end
) {
1845 } else if (kd_ctrl_page
.kdebug_flags
& KDBG_VALCHECK
) {
1846 if ((debugid
& KDBG_EVENTID_MASK
) != kdlog_value1
&&
1847 (debugid
& KDBG_EVENTID_MASK
) != kdlog_value2
&&
1848 (debugid
& KDBG_EVENTID_MASK
) != kdlog_value3
&&
1849 (debugid
& KDBG_EVENTID_MASK
) != kdlog_value4
) {
1858 * Returns 0 if a string can be traced with these arguments. Returns errno
1859 * value if error occurred.
1862 kdebug_check_trace_string(uint32_t debugid
, uint64_t str_id
)
1864 /* if there are function qualifiers on the debugid */
1865 if (debugid
& ~KDBG_EVENTID_MASK
) {
1869 if (kdebug_validate_debugid(debugid
)) {
1873 if (str_id
!= 0 && (str_id
& STR_ID_SIG_MASK
) != g_str_id_signature
) {
1881 * Implementation of KPI kernel_debug_string.
1884 kernel_debug_string(uint32_t debugid
, uint64_t *str_id
, const char *str
)
1886 /* arguments to tracepoints must be word-aligned */
1887 __attribute__((aligned(sizeof(uintptr_t)))) char str_buf
[STR_BUF_SIZE
];
1888 static_assert(sizeof(str_buf
) > MAX_STR_LEN
);
1889 vm_size_t len_copied
;
1894 if (__probable(kdebug_enable
== 0)) {
1898 if (!kdebug_current_proc_enabled(debugid
)) {
1902 if (!kdebug_debugid_enabled(debugid
)) {
1906 if ((err
= kdebug_check_trace_string(debugid
, *str_id
)) != 0) {
1915 *str_id
= kernel_debug_string_internal(debugid
, *str_id
, NULL
, 0);
1919 memset(str_buf
, 0, sizeof(str_buf
));
1920 len_copied
= strlcpy(str_buf
, str
, MAX_STR_LEN
+ 1);
1921 *str_id
= kernel_debug_string_internal(debugid
, *str_id
, str_buf
,
1927 * Support syscall kdebug_trace_string.
1930 kdebug_trace_string(__unused
struct proc
*p
,
1931 struct kdebug_trace_string_args
*uap
,
1934 __attribute__((aligned(sizeof(uintptr_t)))) char str_buf
[STR_BUF_SIZE
];
1935 static_assert(sizeof(str_buf
) > MAX_STR_LEN
);
1939 if (__probable(kdebug_enable
== 0)) {
1943 if (!kdebug_current_proc_enabled(uap
->debugid
)) {
1947 if (!kdebug_debugid_enabled(uap
->debugid
)) {
1951 if ((err
= kdebug_check_trace_string(uap
->debugid
, uap
->str_id
)) != 0) {
1955 if (uap
->str
== USER_ADDR_NULL
) {
1956 if (uap
->str_id
== 0) {
1960 *retval
= kernel_debug_string_internal(uap
->debugid
, uap
->str_id
,
1965 memset(str_buf
, 0, sizeof(str_buf
));
1966 err
= copyinstr(uap
->str
, str_buf
, MAX_STR_LEN
+ 1, &len_copied
);
1968 /* it's alright to truncate the string, so allow ENAMETOOLONG */
1969 if (err
== ENAMETOOLONG
) {
1970 str_buf
[MAX_STR_LEN
] = '\0';
1975 if (len_copied
<= 1) {
1979 /* convert back to a length */
1982 *retval
= kernel_debug_string_internal(uap
->debugid
, uap
->str_id
, str_buf
,
1988 kdbg_lock_init(void)
1990 static lck_grp_attr_t
*kdebug_lck_grp_attr
= NULL
;
1991 static lck_attr_t
*kdebug_lck_attr
= NULL
;
1993 if (kd_ctrl_page
.kdebug_flags
& KDBG_LOCKINIT
) {
1997 assert(kdebug_lck_grp_attr
== NULL
);
1998 kdebug_lck_grp_attr
= lck_grp_attr_alloc_init();
1999 kdebug_lck_grp
= lck_grp_alloc_init("kdebug", kdebug_lck_grp_attr
);
2000 kdebug_lck_attr
= lck_attr_alloc_init();
2002 kds_spin_lock
= lck_spin_alloc_init(kdebug_lck_grp
, kdebug_lck_attr
);
2003 kdw_spin_lock
= lck_spin_alloc_init(kdebug_lck_grp
, kdebug_lck_attr
);
2005 kd_ctrl_page
.kdebug_flags
|= KDBG_LOCKINIT
;
2009 kdbg_bootstrap(boolean_t early_trace
)
2011 kd_ctrl_page
.kdebug_flags
&= ~KDBG_WRAPPED
;
2013 return create_buffers(early_trace
);
2017 kdbg_reinit(boolean_t early_trace
)
2022 * Disable trace collecting
2023 * First make sure we're not in
2024 * the middle of cutting a trace
2026 kernel_debug_disable();
2029 * make sure the SLOW_NOLOG is seen
2030 * by everyone that might be trying
2037 kdbg_clear_thread_map();
2038 ret
= kdbg_bootstrap(early_trace
);
2040 RAW_file_offset
= 0;
2041 RAW_file_written
= 0;
2047 kdbg_trace_data(struct proc
*proc
, long *arg_pid
, long *arg_uniqueid
)
2053 *arg_pid
= proc
->p_pid
;
2054 *arg_uniqueid
= proc
->p_uniqueid
;
2055 if ((uint64_t) *arg_uniqueid
!= proc
->p_uniqueid
) {
2063 kdbg_trace_string(struct proc
*proc
, long *arg1
, long *arg2
, long *arg3
, long *arg4
)
2077 * Collect the pathname for tracing
2079 dbg_nameptr
= proc
->p_comm
;
2080 dbg_namelen
= (int)strlen(proc
->p_comm
);
2086 if (dbg_namelen
> (int)sizeof(dbg_parms
)) {
2087 dbg_namelen
= (int)sizeof(dbg_parms
);
2090 strncpy((char *)dbg_parms
, dbg_nameptr
, dbg_namelen
);
2092 *arg1
= dbg_parms
[0];
2093 *arg2
= dbg_parms
[1];
2094 *arg3
= dbg_parms
[2];
2095 *arg4
= dbg_parms
[3];
2099 kdbg_resolve_map(thread_t th_act
, void *opaque
)
2101 kd_threadmap
*mapptr
;
2102 krt_t
*t
= (krt_t
*)opaque
;
2104 if (t
->count
< t
->maxcount
) {
2105 mapptr
= &t
->map
[t
->count
];
2106 mapptr
->thread
= (uintptr_t)thread_tid(th_act
);
2108 (void) strlcpy(mapptr
->command
, t
->atts
->task_comm
,
2109 sizeof(t
->atts
->task_comm
));
2111 * Some kernel threads have no associated pid.
2112 * We still need to mark the entry as valid.
2115 mapptr
->valid
= t
->atts
->pid
;
2126 * Writes a cpumap for the given iops_list/cpu_count to the provided buffer.
2128 * You may provide a buffer and size, or if you set the buffer to NULL, a
2129 * buffer of sufficient size will be allocated.
2131 * If you provide a buffer and it is too small, sets cpumap_size to the number
2132 * of bytes required and returns EINVAL.
2134 * On success, if you provided a buffer, cpumap_size is set to the number of
2135 * bytes written. If you did not provide a buffer, cpumap is set to the newly
2136 * allocated buffer and cpumap_size is set to the number of bytes allocated.
2138 * NOTE: It may seem redundant to pass both iops and a cpu_count.
2140 * We may be reporting data from "now", or from the "past".
2142 * The "past" data would be for kdbg_readcpumap().
2144 * If we do not pass both iops and cpu_count, and iops is NULL, this function
2145 * will need to read "now" state to get the number of cpus, which would be in
2146 * error if we were reporting "past" state.
2150 kdbg_cpumap_init_internal(kd_iop_t
* iops
, uint32_t cpu_count
, uint8_t** cpumap
, uint32_t* cpumap_size
)
2153 assert(cpumap_size
);
2155 assert(!iops
|| iops
->cpu_id
+ 1 == cpu_count
);
2157 uint32_t bytes_needed
= sizeof(kd_cpumap_header
) + cpu_count
* sizeof(kd_cpumap
);
2158 uint32_t bytes_available
= *cpumap_size
;
2159 *cpumap_size
= bytes_needed
;
2161 if (*cpumap
== NULL
) {
2162 if (kmem_alloc(kernel_map
, (vm_offset_t
*)cpumap
, (vm_size_t
)*cpumap_size
, VM_KERN_MEMORY_DIAG
) != KERN_SUCCESS
) {
2165 bzero(*cpumap
, *cpumap_size
);
2166 } else if (bytes_available
< bytes_needed
) {
2170 kd_cpumap_header
* header
= (kd_cpumap_header
*)(uintptr_t)*cpumap
;
2172 header
->version_no
= RAW_VERSION1
;
2173 header
->cpu_count
= cpu_count
;
2175 kd_cpumap
* cpus
= (kd_cpumap
*)&header
[1];
2177 int32_t index
= cpu_count
- 1;
2179 cpus
[index
].cpu_id
= iops
->cpu_id
;
2180 cpus
[index
].flags
= KDBG_CPUMAP_IS_IOP
;
2181 strlcpy(cpus
[index
].name
, iops
->callback
.iop_name
, sizeof(cpus
->name
));
2187 while (index
>= 0) {
2188 cpus
[index
].cpu_id
= index
;
2189 cpus
[index
].flags
= 0;
2190 strlcpy(cpus
[index
].name
, "AP", sizeof(cpus
->name
));
2195 return KERN_SUCCESS
;
2199 kdbg_thrmap_init(void)
2201 ktrace_assert_lock_held();
2203 if (kd_ctrl_page
.kdebug_flags
& KDBG_MAPINIT
) {
2207 kd_mapptr
= kdbg_thrmap_init_internal(0, &kd_mapsize
, &kd_mapcount
);
2210 kd_ctrl_page
.kdebug_flags
|= KDBG_MAPINIT
;
2214 static kd_threadmap
*
2215 kdbg_thrmap_init_internal(unsigned int count
, unsigned int *mapsize
, unsigned int *mapcount
)
2217 kd_threadmap
*mapptr
;
2220 int tts_count
= 0; /* number of task-to-string structures */
2221 struct tts
*tts_mapptr
;
2222 unsigned int tts_mapsize
= 0;
2225 assert(mapsize
!= NULL
);
2226 assert(mapcount
!= NULL
);
2228 *mapcount
= threads_count
;
2229 tts_count
= tasks_count
;
2232 * The proc count could change during buffer allocation,
2233 * so introduce a small fudge factor to bump up the
2234 * buffer sizes. This gives new tasks some chance of
2235 * making into the tables. Bump up by 25%.
2237 *mapcount
+= *mapcount
/ 4;
2238 tts_count
+= tts_count
/ 4;
2240 *mapsize
= *mapcount
* sizeof(kd_threadmap
);
2242 if (count
&& count
< *mapcount
) {
2246 if ((kmem_alloc(kernel_map
, &kaddr
, (vm_size_t
)*mapsize
, VM_KERN_MEMORY_DIAG
) == KERN_SUCCESS
)) {
2247 bzero((void *)kaddr
, *mapsize
);
2248 mapptr
= (kd_threadmap
*)kaddr
;
2253 tts_mapsize
= tts_count
* sizeof(struct tts
);
2255 if ((kmem_alloc(kernel_map
, &kaddr
, (vm_size_t
)tts_mapsize
, VM_KERN_MEMORY_DIAG
) == KERN_SUCCESS
)) {
2256 bzero((void *)kaddr
, tts_mapsize
);
2257 tts_mapptr
= (struct tts
*)kaddr
;
2259 kmem_free(kernel_map
, (vm_offset_t
)mapptr
, *mapsize
);
2265 * Save the proc's name and take a reference for each task associated
2266 * with a valid process.
2271 ALLPROC_FOREACH(p
) {
2272 if (i
>= tts_count
) {
2275 if (p
->p_lflag
& P_LEXIT
) {
2279 task_reference(p
->task
);
2280 tts_mapptr
[i
].task
= p
->task
;
2281 tts_mapptr
[i
].pid
= p
->p_pid
;
2282 (void)strlcpy(tts_mapptr
[i
].task_comm
, proc_best_name(p
), sizeof(tts_mapptr
[i
].task_comm
));
2291 * Initialize thread map data
2295 akrt
.maxcount
= *mapcount
;
2297 for (i
= 0; i
< tts_count
; i
++) {
2298 akrt
.atts
= &tts_mapptr
[i
];
2299 task_act_iterate_wth_args(tts_mapptr
[i
].task
, kdbg_resolve_map
, &akrt
);
2300 task_deallocate((task_t
)tts_mapptr
[i
].task
);
2302 kmem_free(kernel_map
, (vm_offset_t
)tts_mapptr
, tts_mapsize
);
2304 *mapcount
= akrt
.count
;
2313 * Clean up the trace buffer
2314 * First make sure we're not in
2315 * the middle of cutting a trace
2317 kernel_debug_disable();
2318 kdbg_disable_typefilter();
2321 * make sure the SLOW_NOLOG is seen
2322 * by everyone that might be trying
2327 /* reset kdebug state for each process */
2328 if (kd_ctrl_page
.kdebug_flags
& (KDBG_PIDCHECK
| KDBG_PIDEXCLUDE
)) {
2331 ALLPROC_FOREACH(p
) {
2337 kd_ctrl_page
.kdebug_flags
&= (unsigned int)~KDBG_CKTYPES
;
2338 kd_ctrl_page
.kdebug_flags
&= ~(KDBG_NOWRAP
| KDBG_RANGECHECK
| KDBG_VALCHECK
);
2339 kd_ctrl_page
.kdebug_flags
&= ~(KDBG_PIDCHECK
| KDBG_PIDEXCLUDE
);
2341 kd_ctrl_page
.oldest_time
= 0;
2346 /* Clean up the thread map buffer */
2347 kdbg_clear_thread_map();
2349 RAW_file_offset
= 0;
2350 RAW_file_written
= 0;
2356 ktrace_assert_lock_held();
2361 if (kdbg_typefilter
) {
2362 typefilter_reject_all(kdbg_typefilter
);
2363 typefilter_allow_class(kdbg_typefilter
, DBG_TRACE
);
2368 kdebug_free_early_buf(void)
2370 #if !CONFIG_EMBEDDED
2371 /* Must be done with the buffer, so release it back to the VM.
2372 * On embedded targets this buffer is freed when the BOOTDATA segment is freed. */
2373 ml_static_mfree((vm_offset_t
)&kd_early_buffer
, sizeof(kd_early_buffer
));
2378 kdbg_setpid(kd_regtype
*kdr
)
2384 pid
= (pid_t
)kdr
->value1
;
2385 flag
= (int)kdr
->value2
;
2388 if ((p
= proc_find(pid
)) == NULL
) {
2393 * turn on pid check for this and all pids
2395 kd_ctrl_page
.kdebug_flags
|= KDBG_PIDCHECK
;
2396 kd_ctrl_page
.kdebug_flags
&= ~KDBG_PIDEXCLUDE
;
2397 kdbg_set_flags(SLOW_CHECKS
, 0, TRUE
);
2402 * turn off pid check for this pid value
2403 * Don't turn off all pid checking though
2405 * kd_ctrl_page.kdebug_flags &= ~KDBG_PIDCHECK;
2418 /* This is for pid exclusion in the trace buffer */
2420 kdbg_setpidex(kd_regtype
*kdr
)
2426 pid
= (pid_t
)kdr
->value1
;
2427 flag
= (int)kdr
->value2
;
2430 if ((p
= proc_find(pid
)) == NULL
) {
2435 * turn on pid exclusion
2437 kd_ctrl_page
.kdebug_flags
|= KDBG_PIDEXCLUDE
;
2438 kd_ctrl_page
.kdebug_flags
&= ~KDBG_PIDCHECK
;
2439 kdbg_set_flags(SLOW_CHECKS
, 0, TRUE
);
2444 * turn off pid exclusion for this pid value
2445 * Don't turn off all pid exclusion though
2447 * kd_ctrl_page.kdebug_flags &= ~KDBG_PIDEXCLUDE;
2461 * The following functions all operate on the "global" typefilter singleton.
2465 * The tf param is optional, you may pass either a valid typefilter or NULL.
2466 * If you pass a valid typefilter, you release ownership of that typefilter.
2469 kdbg_initialize_typefilter(typefilter_t tf
)
2471 ktrace_assert_lock_held();
2472 assert(!kdbg_typefilter
);
2473 assert(!kdbg_typefilter_memory_entry
);
2474 typefilter_t deallocate_tf
= NULL
;
2476 if (!tf
&& ((tf
= deallocate_tf
= typefilter_create()) == NULL
)) {
2480 if ((kdbg_typefilter_memory_entry
= typefilter_create_memory_entry(tf
)) == MACH_PORT_NULL
) {
2481 if (deallocate_tf
) {
2482 typefilter_deallocate(deallocate_tf
);
2488 * The atomic store closes a race window with
2489 * the kdebug_typefilter syscall, which assumes
2490 * that any non-null kdbg_typefilter means a
2491 * valid memory_entry is available.
2493 __c11_atomic_store(((_Atomic typefilter_t
*)&kdbg_typefilter
), tf
, memory_order_release
);
2495 return KERN_SUCCESS
;
2499 kdbg_copyin_typefilter(user_addr_t addr
, size_t size
)
2504 ktrace_assert_lock_held();
2506 if (size
!= KDBG_TYPEFILTER_BITMAP_SIZE
) {
2510 if ((tf
= typefilter_create())) {
2511 if ((ret
= copyin(addr
, tf
, KDBG_TYPEFILTER_BITMAP_SIZE
)) == 0) {
2512 /* The kernel typefilter must always allow DBG_TRACE */
2513 typefilter_allow_class(tf
, DBG_TRACE
);
2516 * If this is the first typefilter; claim it.
2517 * Otherwise copy and deallocate.
2519 * Allocating a typefilter for the copyin allows
2520 * the kernel to hold the invariant that DBG_TRACE
2521 * must always be allowed.
2523 if (!kdbg_typefilter
) {
2524 if ((ret
= kdbg_initialize_typefilter(tf
))) {
2529 typefilter_copy(kdbg_typefilter
, tf
);
2532 kdbg_enable_typefilter();
2533 kdbg_iop_list_callback(kd_ctrl_page
.kdebug_iops
, KD_CALLBACK_TYPEFILTER_CHANGED
, kdbg_typefilter
);
2537 typefilter_deallocate(tf
);
2545 * Enable the flags in the control page for the typefilter. Assumes that
2546 * kdbg_typefilter has already been allocated, so events being written
2547 * don't see a bad typefilter.
2550 kdbg_enable_typefilter(void)
2552 assert(kdbg_typefilter
);
2553 kd_ctrl_page
.kdebug_flags
&= ~(KDBG_RANGECHECK
| KDBG_VALCHECK
);
2554 kd_ctrl_page
.kdebug_flags
|= KDBG_TYPEFILTER_CHECK
;
2555 kdbg_set_flags(SLOW_CHECKS
, 0, TRUE
);
2556 commpage_update_kdebug_state();
2560 * Disable the flags in the control page for the typefilter. The typefilter
2561 * may be safely deallocated shortly after this function returns.
2564 kdbg_disable_typefilter(void)
2566 bool notify_iops
= kd_ctrl_page
.kdebug_flags
& KDBG_TYPEFILTER_CHECK
;
2567 kd_ctrl_page
.kdebug_flags
&= ~KDBG_TYPEFILTER_CHECK
;
2569 if ((kd_ctrl_page
.kdebug_flags
& (KDBG_PIDCHECK
| KDBG_PIDEXCLUDE
))) {
2570 kdbg_set_flags(SLOW_CHECKS
, 0, TRUE
);
2572 kdbg_set_flags(SLOW_CHECKS
, 0, FALSE
);
2574 commpage_update_kdebug_state();
2578 * Notify IOPs that the typefilter will now allow everything.
2579 * Otherwise, they won't know a typefilter is no longer in
2582 typefilter_allow_all(kdbg_typefilter
);
2583 kdbg_iop_list_callback(kd_ctrl_page
.kdebug_iops
,
2584 KD_CALLBACK_TYPEFILTER_CHANGED
, kdbg_typefilter
);
2589 kdebug_commpage_state(void)
2591 if (kdebug_enable
) {
2592 if (kd_ctrl_page
.kdebug_flags
& KDBG_TYPEFILTER_CHECK
) {
2593 return KDEBUG_COMMPAGE_ENABLE_TYPEFILTER
| KDEBUG_COMMPAGE_ENABLE_TRACE
;
2596 return KDEBUG_COMMPAGE_ENABLE_TRACE
;
2603 kdbg_setreg(kd_regtype
* kdr
)
2606 unsigned int val_1
, val_2
, val
;
2607 switch (kdr
->type
) {
2608 case KDBG_CLASSTYPE
:
2609 val_1
= (kdr
->value1
& 0xff);
2610 val_2
= (kdr
->value2
& 0xff);
2611 kdlog_beg
= (val_1
<< 24);
2612 kdlog_end
= (val_2
<< 24);
2613 kd_ctrl_page
.kdebug_flags
&= (unsigned int)~KDBG_CKTYPES
;
2614 kd_ctrl_page
.kdebug_flags
&= ~KDBG_VALCHECK
; /* Turn off specific value check */
2615 kd_ctrl_page
.kdebug_flags
|= (KDBG_RANGECHECK
| KDBG_CLASSTYPE
);
2616 kdbg_set_flags(SLOW_CHECKS
, 0, TRUE
);
2618 case KDBG_SUBCLSTYPE
:
2619 val_1
= (kdr
->value1
& 0xff);
2620 val_2
= (kdr
->value2
& 0xff);
2622 kdlog_beg
= ((val_1
<< 24) | (val_2
<< 16));
2623 kdlog_end
= ((val_1
<< 24) | (val
<< 16));
2624 kd_ctrl_page
.kdebug_flags
&= (unsigned int)~KDBG_CKTYPES
;
2625 kd_ctrl_page
.kdebug_flags
&= ~KDBG_VALCHECK
; /* Turn off specific value check */
2626 kd_ctrl_page
.kdebug_flags
|= (KDBG_RANGECHECK
| KDBG_SUBCLSTYPE
);
2627 kdbg_set_flags(SLOW_CHECKS
, 0, TRUE
);
2629 case KDBG_RANGETYPE
:
2630 kdlog_beg
= (kdr
->value1
);
2631 kdlog_end
= (kdr
->value2
);
2632 kd_ctrl_page
.kdebug_flags
&= (unsigned int)~KDBG_CKTYPES
;
2633 kd_ctrl_page
.kdebug_flags
&= ~KDBG_VALCHECK
; /* Turn off specific value check */
2634 kd_ctrl_page
.kdebug_flags
|= (KDBG_RANGECHECK
| KDBG_RANGETYPE
);
2635 kdbg_set_flags(SLOW_CHECKS
, 0, TRUE
);
2638 kdlog_value1
= (kdr
->value1
);
2639 kdlog_value2
= (kdr
->value2
);
2640 kdlog_value3
= (kdr
->value3
);
2641 kdlog_value4
= (kdr
->value4
);
2642 kd_ctrl_page
.kdebug_flags
&= (unsigned int)~KDBG_CKTYPES
;
2643 kd_ctrl_page
.kdebug_flags
&= ~KDBG_RANGECHECK
; /* Turn off range check */
2644 kd_ctrl_page
.kdebug_flags
|= KDBG_VALCHECK
; /* Turn on specific value check */
2645 kdbg_set_flags(SLOW_CHECKS
, 0, TRUE
);
2648 kd_ctrl_page
.kdebug_flags
&= (unsigned int)~KDBG_CKTYPES
;
2650 if ((kd_ctrl_page
.kdebug_flags
& (KDBG_RANGECHECK
| KDBG_VALCHECK
|
2651 KDBG_PIDCHECK
| KDBG_PIDEXCLUDE
|
2652 KDBG_TYPEFILTER_CHECK
))) {
2653 kdbg_set_flags(SLOW_CHECKS
, 0, TRUE
);
2655 kdbg_set_flags(SLOW_CHECKS
, 0, FALSE
);
2669 kdbg_write_to_vnode(caddr_t buffer
, size_t size
, vnode_t vp
, vfs_context_t ctx
, off_t file_offset
)
2671 return vn_rdwr(UIO_WRITE
, vp
, buffer
, size
, file_offset
, UIO_SYSSPACE
, IO_NODELOCKED
| IO_UNIT
,
2672 vfs_context_ucred(ctx
), (int *) 0, vfs_context_proc(ctx
));
2676 kdbg_write_v3_chunk_header(user_addr_t buffer
, uint32_t tag
, uint32_t sub_tag
, uint64_t length
, vnode_t vp
, vfs_context_t ctx
)
2678 int ret
= KERN_SUCCESS
;
2679 kd_chunk_header_v3 header
= {
2685 // Check that only one of them is valid
2686 assert(!buffer
^ !vp
);
2687 assert((vp
== NULL
) || (ctx
!= NULL
));
2689 // Write the 8-byte future_chunk_timestamp field in the payload
2692 ret
= kdbg_write_to_vnode((caddr_t
)&header
, sizeof(kd_chunk_header_v3
), vp
, ctx
, RAW_file_offset
);
2696 RAW_file_offset
+= (sizeof(kd_chunk_header_v3
));
2698 ret
= copyout(&header
, buffer
, sizeof(kd_chunk_header_v3
));
2709 kdbg_write_v3_chunk_header_to_buffer(void * buffer
, uint32_t tag
, uint32_t sub_tag
, uint64_t length
)
2711 kd_chunk_header_v3 header
= {
2721 memcpy(buffer
, &header
, sizeof(kd_chunk_header_v3
));
2723 return sizeof(kd_chunk_header_v3
);
2727 kdbg_write_v3_chunk_to_fd(uint32_t tag
, uint32_t sub_tag
, uint64_t length
, void *payload
, uint64_t payload_size
, int fd
)
2730 struct vfs_context context
;
2731 struct fileproc
*fp
;
2736 if ((fp_lookup(p
, fd
, &fp
, 1))) {
2741 context
.vc_thread
= current_thread();
2742 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
2744 if (FILEGLOB_DTYPE(fp
->f_fglob
) != DTYPE_VNODE
) {
2745 fp_drop(p
, fd
, fp
, 1);
2749 vp
= (struct vnode
*) fp
->f_fglob
->fg_data
;
2752 if ((vnode_getwithref(vp
)) == 0) {
2753 RAW_file_offset
= fp
->f_fglob
->fg_offset
;
2755 kd_chunk_header_v3 chunk_header
= {
2761 int ret
= kdbg_write_to_vnode((caddr_t
) &chunk_header
, sizeof(kd_chunk_header_v3
), vp
, &context
, RAW_file_offset
);
2763 RAW_file_offset
+= sizeof(kd_chunk_header_v3
);
2766 ret
= kdbg_write_to_vnode((caddr_t
) payload
, (size_t) payload_size
, vp
, &context
, RAW_file_offset
);
2768 RAW_file_offset
+= payload_size
;
2771 fp
->f_fglob
->fg_offset
= RAW_file_offset
;
2775 fp_drop(p
, fd
, fp
, 0);
2776 return KERN_SUCCESS
;
2780 kdbg_write_v3_event_chunk_header(user_addr_t buffer
, uint32_t tag
, uint64_t length
, vnode_t vp
, vfs_context_t ctx
)
2782 uint64_t future_chunk_timestamp
= 0;
2783 length
+= sizeof(uint64_t);
2785 if (kdbg_write_v3_chunk_header(buffer
, tag
, V3_EVENT_DATA_VERSION
, length
, vp
, ctx
)) {
2789 buffer
+= sizeof(kd_chunk_header_v3
);
2792 // Check that only one of them is valid
2793 assert(!buffer
^ !vp
);
2794 assert((vp
== NULL
) || (ctx
!= NULL
));
2796 // Write the 8-byte future_chunk_timestamp field in the payload
2799 int ret
= kdbg_write_to_vnode((caddr_t
)&future_chunk_timestamp
, sizeof(uint64_t), vp
, ctx
, RAW_file_offset
);
2801 RAW_file_offset
+= (sizeof(uint64_t));
2804 if (copyout(&future_chunk_timestamp
, buffer
, sizeof(uint64_t))) {
2810 return buffer
+ sizeof(uint64_t);
2814 kdbg_write_v3_header(user_addr_t user_header
, size_t *user_header_size
, int fd
)
2816 int ret
= KERN_SUCCESS
;
2818 uint8_t* cpumap
= 0;
2819 uint32_t cpumap_size
= 0;
2820 uint32_t thrmap_size
= 0;
2822 size_t bytes_needed
= 0;
2824 // Check that only one of them is valid
2825 assert(!user_header
^ !fd
);
2826 assert(user_header_size
);
2828 if (!(kd_ctrl_page
.kdebug_flags
& KDBG_BUFINIT
)) {
2833 if (!(user_header
|| fd
)) {
2838 // Initialize the cpu map
2839 ret
= kdbg_cpumap_init_internal(kd_ctrl_page
.kdebug_iops
, kd_ctrl_page
.kdebug_cpus
, &cpumap
, &cpumap_size
);
2840 if (ret
!= KERN_SUCCESS
) {
2844 // Check if a thread map is initialized
2849 thrmap_size
= kd_mapcount
* sizeof(kd_threadmap
);
2851 mach_timebase_info_data_t timebase
= {0, 0};
2852 clock_timebase_info(&timebase
);
2854 // Setup the header.
2855 // See v3 header description in sys/kdebug.h for more inforamtion.
2856 kd_header_v3 header
= {
2857 .tag
= RAW_VERSION3
,
2858 .sub_tag
= V3_HEADER_VERSION
,
2859 .length
= (sizeof(kd_header_v3
) + cpumap_size
- sizeof(kd_cpumap_header
)),
2860 .timebase_numer
= timebase
.numer
,
2861 .timebase_denom
= timebase
.denom
,
2862 .timestamp
= 0, /* FIXME rdar://problem/22053009 */
2864 .walltime_usecs
= 0,
2865 .timezone_minuteswest
= 0,
2867 #if defined(__LP64__)
2874 // If its a buffer, check if we have enough space to copy the header and the maps.
2876 bytes_needed
= header
.length
+ thrmap_size
+ (2 * sizeof(kd_chunk_header_v3
));
2877 if (*user_header_size
< bytes_needed
) {
2883 // Start writing the header
2885 void *hdr_ptr
= (void *)(((uintptr_t) &header
) + sizeof(kd_chunk_header_v3
));
2886 size_t payload_size
= (sizeof(kd_header_v3
) - sizeof(kd_chunk_header_v3
));
2888 ret
= kdbg_write_v3_chunk_to_fd(RAW_VERSION3
, V3_HEADER_VERSION
, header
.length
, hdr_ptr
, payload_size
, fd
);
2893 if (copyout(&header
, user_header
, sizeof(kd_header_v3
))) {
2897 // Update the user pointer
2898 user_header
+= sizeof(kd_header_v3
);
2901 // Write a cpu map. This is a sub chunk of the header
2902 cpumap
= (uint8_t*)((uintptr_t) cpumap
+ sizeof(kd_cpumap_header
));
2903 size_t payload_size
= (size_t)(cpumap_size
- sizeof(kd_cpumap_header
));
2905 ret
= kdbg_write_v3_chunk_to_fd(V3_CPU_MAP
, V3_CPUMAP_VERSION
, payload_size
, (void *)cpumap
, payload_size
, fd
);
2910 ret
= kdbg_write_v3_chunk_header(user_header
, V3_CPU_MAP
, V3_CPUMAP_VERSION
, payload_size
, NULL
, NULL
);
2914 user_header
+= sizeof(kd_chunk_header_v3
);
2915 if (copyout(cpumap
, user_header
, payload_size
)) {
2919 // Update the user pointer
2920 user_header
+= payload_size
;
2923 // Write a thread map
2925 ret
= kdbg_write_v3_chunk_to_fd(V3_THREAD_MAP
, V3_THRMAP_VERSION
, thrmap_size
, (void *)kd_mapptr
, thrmap_size
, fd
);
2930 ret
= kdbg_write_v3_chunk_header(user_header
, V3_THREAD_MAP
, V3_THRMAP_VERSION
, thrmap_size
, NULL
, NULL
);
2934 user_header
+= sizeof(kd_chunk_header_v3
);
2935 if (copyout(kd_mapptr
, user_header
, thrmap_size
)) {
2939 user_header
+= thrmap_size
;
2943 RAW_file_written
+= bytes_needed
;
2946 *user_header_size
= bytes_needed
;
2949 kmem_free(kernel_map
, (vm_offset_t
)cpumap
, cpumap_size
);
2955 kdbg_readcpumap(user_addr_t user_cpumap
, size_t *user_cpumap_size
)
2957 uint8_t* cpumap
= NULL
;
2958 uint32_t cpumap_size
= 0;
2959 int ret
= KERN_SUCCESS
;
2961 if (kd_ctrl_page
.kdebug_flags
& KDBG_BUFINIT
) {
2962 if (kdbg_cpumap_init_internal(kd_ctrl_page
.kdebug_iops
, kd_ctrl_page
.kdebug_cpus
, &cpumap
, &cpumap_size
) == KERN_SUCCESS
) {
2964 size_t bytes_to_copy
= (*user_cpumap_size
>= cpumap_size
) ? cpumap_size
: *user_cpumap_size
;
2965 if (copyout(cpumap
, user_cpumap
, (size_t)bytes_to_copy
)) {
2969 *user_cpumap_size
= cpumap_size
;
2970 kmem_free(kernel_map
, (vm_offset_t
)cpumap
, cpumap_size
);
2982 kdbg_readcurthrmap(user_addr_t buffer
, size_t *bufsize
)
2984 kd_threadmap
*mapptr
;
2985 unsigned int mapsize
;
2986 unsigned int mapcount
;
2987 unsigned int count
= 0;
2990 count
= *bufsize
/ sizeof(kd_threadmap
);
2993 if ((mapptr
= kdbg_thrmap_init_internal(count
, &mapsize
, &mapcount
))) {
2994 if (copyout(mapptr
, buffer
, mapcount
* sizeof(kd_threadmap
))) {
2997 *bufsize
= (mapcount
* sizeof(kd_threadmap
));
3000 kmem_free(kernel_map
, (vm_offset_t
)mapptr
, mapsize
);
3009 kdbg_write_v1_header(boolean_t write_thread_map
, vnode_t vp
, vfs_context_t ctx
)
3017 uint32_t extra_thread_count
= 0;
3018 uint32_t cpumap_size
;
3019 size_t map_size
= 0;
3020 size_t map_count
= 0;
3022 if (write_thread_map
) {
3023 assert(kd_ctrl_page
.kdebug_flags
& KDBG_MAPINIT
);
3024 map_count
= kd_mapcount
;
3025 map_size
= map_count
* sizeof(kd_threadmap
);
3029 * Without the buffers initialized, we cannot construct a CPU map or a
3030 * thread map, and cannot write a header.
3032 if (!(kd_ctrl_page
.kdebug_flags
& KDBG_BUFINIT
)) {
3037 * To write a RAW_VERSION1+ file, we must embed a cpumap in the
3038 * "padding" used to page align the events following the threadmap. If
3039 * the threadmap happens to not require enough padding, we artificially
3040 * increase its footprint until it needs enough padding.
3046 pad_size
= PAGE_16KB
- ((sizeof(RAW_header
) + map_size
) & PAGE_MASK_64
);
3047 cpumap_size
= sizeof(kd_cpumap_header
) + kd_ctrl_page
.kdebug_cpus
* sizeof(kd_cpumap
);
3049 if (cpumap_size
> pad_size
) {
3050 /* If the cpu map doesn't fit in the current available pad_size,
3051 * we increase the pad_size by 16K. We do this so that the event
3052 * data is always available on a page aligned boundary for both
3053 * 4k and 16k systems. We enforce this alignment for the event
3054 * data so that we can take advantage of optimized file/disk writes.
3056 pad_size
+= PAGE_16KB
;
3059 /* The way we are silently embedding a cpumap in the "padding" is by artificially
3060 * increasing the number of thread entries. However, we'll also need to ensure that
3061 * the cpumap is embedded in the last 4K page before when the event data is expected.
3062 * This way the tools can read the data starting the next page boundary on both
3063 * 4K and 16K systems preserving compatibility with older versions of the tools
3065 if (pad_size
> PAGE_4KB
) {
3066 pad_size
-= PAGE_4KB
;
3067 extra_thread_count
= (pad_size
/ sizeof(kd_threadmap
)) + 1;
3070 memset(&header
, 0, sizeof(header
));
3071 header
.version_no
= RAW_VERSION1
;
3072 header
.thread_count
= map_count
+ extra_thread_count
;
3074 clock_get_calendar_microtime(&secs
, &usecs
);
3075 header
.TOD_secs
= secs
;
3076 header
.TOD_usecs
= usecs
;
3078 ret
= vn_rdwr(UIO_WRITE
, vp
, (caddr_t
)&header
, sizeof(RAW_header
), RAW_file_offset
,
3079 UIO_SYSSPACE
, IO_NODELOCKED
| IO_UNIT
, vfs_context_ucred(ctx
), (int *) 0, vfs_context_proc(ctx
));
3083 RAW_file_offset
+= sizeof(RAW_header
);
3084 RAW_file_written
+= sizeof(RAW_header
);
3086 if (write_thread_map
) {
3087 ret
= vn_rdwr(UIO_WRITE
, vp
, (caddr_t
)kd_mapptr
, map_size
, RAW_file_offset
,
3088 UIO_SYSSPACE
, IO_NODELOCKED
| IO_UNIT
, vfs_context_ucred(ctx
), (int *) 0, vfs_context_proc(ctx
));
3093 RAW_file_offset
+= map_size
;
3094 RAW_file_written
+= map_size
;
3097 if (extra_thread_count
) {
3098 pad_size
= extra_thread_count
* sizeof(kd_threadmap
);
3099 pad_buf
= kalloc(pad_size
);
3104 memset(pad_buf
, 0, pad_size
);
3106 ret
= vn_rdwr(UIO_WRITE
, vp
, (caddr_t
)pad_buf
, pad_size
, RAW_file_offset
,
3107 UIO_SYSSPACE
, IO_NODELOCKED
| IO_UNIT
, vfs_context_ucred(ctx
), (int *) 0, vfs_context_proc(ctx
));
3108 kfree(pad_buf
, pad_size
);
3113 RAW_file_offset
+= pad_size
;
3114 RAW_file_written
+= pad_size
;
3117 pad_size
= PAGE_SIZE
- (RAW_file_offset
& PAGE_MASK_64
);
3119 pad_buf
= (char *)kalloc(pad_size
);
3124 memset(pad_buf
, 0, pad_size
);
3127 * embed a cpumap in the padding bytes.
3128 * older code will skip this.
3129 * newer code will know how to read it.
3131 uint32_t temp
= pad_size
;
3132 if (kdbg_cpumap_init_internal(kd_ctrl_page
.kdebug_iops
, kd_ctrl_page
.kdebug_cpus
, (uint8_t**)&pad_buf
, &temp
) != KERN_SUCCESS
) {
3133 memset(pad_buf
, 0, pad_size
);
3136 ret
= vn_rdwr(UIO_WRITE
, vp
, (caddr_t
)pad_buf
, pad_size
, RAW_file_offset
,
3137 UIO_SYSSPACE
, IO_NODELOCKED
| IO_UNIT
, vfs_context_ucred(ctx
), (int *) 0, vfs_context_proc(ctx
));
3138 kfree(pad_buf
, pad_size
);
3143 RAW_file_offset
+= pad_size
;
3144 RAW_file_written
+= pad_size
;
3152 kdbg_clear_thread_map(void)
3154 ktrace_assert_lock_held();
3156 if (kd_ctrl_page
.kdebug_flags
& KDBG_MAPINIT
) {
3157 assert(kd_mapptr
!= NULL
);
3158 kmem_free(kernel_map
, (vm_offset_t
)kd_mapptr
, kd_mapsize
);
3162 kd_ctrl_page
.kdebug_flags
&= ~KDBG_MAPINIT
;
3167 * Write out a version 1 header and the thread map, if it is initialized, to a
3168 * vnode. Used by KDWRITEMAP and kdbg_dump_trace_to_file.
3170 * Returns write errors from vn_rdwr if a write fails. Returns ENODATA if the
3171 * thread map has not been initialized, but the header will still be written.
3172 * Returns ENOMEM if padding could not be allocated. Returns 0 otherwise.
3175 kdbg_write_thread_map(vnode_t vp
, vfs_context_t ctx
)
3178 boolean_t map_initialized
;
3180 ktrace_assert_lock_held();
3181 assert(ctx
!= NULL
);
3183 map_initialized
= (kd_ctrl_page
.kdebug_flags
& KDBG_MAPINIT
);
3185 ret
= kdbg_write_v1_header(map_initialized
, vp
, ctx
);
3187 if (map_initialized
) {
3188 kdbg_clear_thread_map();
3198 * Copy out the thread map to a user space buffer. Used by KDTHRMAP.
3200 * Returns copyout errors if the copyout fails. Returns ENODATA if the thread
3201 * map has not been initialized. Returns EINVAL if the buffer provided is not
3202 * large enough for the entire thread map. Returns 0 otherwise.
3205 kdbg_copyout_thread_map(user_addr_t buffer
, size_t *buffer_size
)
3207 boolean_t map_initialized
;
3211 ktrace_assert_lock_held();
3212 assert(buffer_size
!= NULL
);
3214 map_initialized
= (kd_ctrl_page
.kdebug_flags
& KDBG_MAPINIT
);
3215 if (!map_initialized
) {
3219 map_size
= kd_mapcount
* sizeof(kd_threadmap
);
3220 if (*buffer_size
< map_size
) {
3224 ret
= copyout(kd_mapptr
, buffer
, map_size
);
3226 kdbg_clear_thread_map();
3233 kdbg_readthrmap_v3(user_addr_t buffer
, size_t buffer_size
, int fd
)
3236 boolean_t map_initialized
;
3239 ktrace_assert_lock_held();
3241 if ((!fd
&& !buffer
) || (fd
&& buffer
)) {
3245 map_initialized
= (kd_ctrl_page
.kdebug_flags
& KDBG_MAPINIT
);
3246 map_size
= kd_mapcount
* sizeof(kd_threadmap
);
3248 if (map_initialized
&& (buffer_size
>= map_size
)) {
3249 ret
= kdbg_write_v3_header(buffer
, &buffer_size
, fd
);
3252 kdbg_clear_thread_map();
3262 kdbg_set_nkdbufs(unsigned int req_nkdbufs
)
3265 * Only allow allocation up to half the available memory (sane_size).
3267 uint64_t max_nkdbufs
= (sane_size
/ 2) / sizeof(kd_buf
);
3268 nkdbufs
= (req_nkdbufs
> max_nkdbufs
) ? max_nkdbufs
: req_nkdbufs
;
3272 * Block until there are `n_storage_threshold` storage units filled with
3273 * events or `timeout_ms` milliseconds have passed. If `locked_wait` is true,
3274 * `ktrace_lock` is held while waiting. This is necessary while waiting to
3275 * write events out of the buffers.
3277 * Returns true if the threshold was reached and false otherwise.
3279 * Called with `ktrace_lock` locked and interrupts enabled.
3282 kdbg_wait(uint64_t timeout_ms
, boolean_t locked_wait
)
3284 int wait_result
= THREAD_AWAKENED
;
3285 uint64_t abstime
= 0;
3287 ktrace_assert_lock_held();
3289 if (timeout_ms
!= 0) {
3290 uint64_t ns
= timeout_ms
* NSEC_PER_MSEC
;
3291 nanoseconds_to_absolutetime(ns
, &abstime
);
3292 clock_absolutetime_interval_to_deadline(abstime
, &abstime
);
3295 boolean_t s
= ml_set_interrupts_enabled(FALSE
);
3297 panic("kdbg_wait() called with interrupts disabled");
3299 lck_spin_lock_grp(kdw_spin_lock
, kdebug_lck_grp
);
3302 /* drop the mutex to allow others to access trace */
3306 while (wait_result
== THREAD_AWAKENED
&&
3307 kd_ctrl_page
.kds_inuse_count
< n_storage_threshold
) {
3311 wait_result
= lck_spin_sleep_deadline(kdw_spin_lock
, 0, &kds_waiter
, THREAD_ABORTSAFE
, abstime
);
3313 wait_result
= lck_spin_sleep(kdw_spin_lock
, 0, &kds_waiter
, THREAD_ABORTSAFE
);
3319 /* check the count under the spinlock */
3320 boolean_t threshold_exceeded
= (kd_ctrl_page
.kds_inuse_count
>= n_storage_threshold
);
3322 lck_spin_unlock(kdw_spin_lock
);
3323 ml_set_interrupts_enabled(s
);
3326 /* pick the mutex back up again */
3330 /* write out whether we've exceeded the threshold */
3331 return threshold_exceeded
;
3335 * Wakeup a thread waiting using `kdbg_wait` if there are at least
3336 * `n_storage_threshold` storage units in use.
3341 boolean_t need_kds_wakeup
= FALSE
;
3344 * Try to take the lock here to synchronize with the waiter entering
3345 * the blocked state. Use the try mode to prevent deadlocks caused by
3346 * re-entering this routine due to various trace points triggered in the
3347 * lck_spin_sleep_xxxx routines used to actually enter one of our 2 wait
3348 * conditions. No problem if we fail, there will be lots of additional
3349 * events coming in that will eventually succeed in grabbing this lock.
3351 boolean_t s
= ml_set_interrupts_enabled(FALSE
);
3353 if (lck_spin_try_lock(kdw_spin_lock
)) {
3355 (kd_ctrl_page
.kds_inuse_count
>= n_storage_threshold
)) {
3357 need_kds_wakeup
= TRUE
;
3359 lck_spin_unlock(kdw_spin_lock
);
3362 ml_set_interrupts_enabled(s
);
3364 if (need_kds_wakeup
== TRUE
) {
3365 wakeup(&kds_waiter
);
3370 kdbg_control(int *name
, u_int namelen
, user_addr_t where
, size_t *sizep
)
3373 size_t size
= *sizep
;
3374 unsigned int value
= 0;
3376 kbufinfo_t kd_bufinfo
;
3379 if (name
[0] == KERN_KDWRITETR
||
3380 name
[0] == KERN_KDWRITETR_V3
||
3381 name
[0] == KERN_KDWRITEMAP
||
3382 name
[0] == KERN_KDWRITEMAP_V3
||
3383 name
[0] == KERN_KDEFLAGS
||
3384 name
[0] == KERN_KDDFLAGS
||
3385 name
[0] == KERN_KDENABLE
||
3386 name
[0] == KERN_KDSETBUF
) {
3394 assert(kd_ctrl_page
.kdebug_flags
& KDBG_LOCKINIT
);
3399 * Some requests only require "read" access to kdebug trace. Regardless,
3400 * tell ktrace that a configuration or read is occurring (and see if it's
3403 if (name
[0] != KERN_KDGETBUF
&&
3404 name
[0] != KERN_KDGETREG
&&
3405 name
[0] != KERN_KDREADCURTHRMAP
) {
3406 if ((ret
= ktrace_configure(KTRACE_KDEBUG
))) {
3410 if ((ret
= ktrace_read_check())) {
3417 if (size
< sizeof(kd_bufinfo
.nkdbufs
)) {
3419 * There is not enough room to return even
3420 * the first element of the info structure.
3426 memset(&kd_bufinfo
, 0, sizeof(kd_bufinfo
));
3428 kd_bufinfo
.nkdbufs
= nkdbufs
;
3429 kd_bufinfo
.nkdthreads
= kd_mapcount
;
3431 if ((kd_ctrl_page
.kdebug_slowcheck
& SLOW_NOLOG
)) {
3432 kd_bufinfo
.nolog
= 1;
3434 kd_bufinfo
.nolog
= 0;
3437 kd_bufinfo
.flags
= kd_ctrl_page
.kdebug_flags
;
3438 #if defined(__LP64__)
3439 kd_bufinfo
.flags
|= KDBG_LP64
;
3442 int pid
= ktrace_get_owning_pid();
3443 kd_bufinfo
.bufid
= (pid
== 0 ? -1 : pid
);
3446 if (size
>= sizeof(kd_bufinfo
)) {
3448 * Provide all the info we have
3450 if (copyout(&kd_bufinfo
, where
, sizeof(kd_bufinfo
))) {
3455 * For backwards compatibility, only provide
3456 * as much info as there is room for.
3458 if (copyout(&kd_bufinfo
, where
, size
)) {
3464 case KERN_KDREADCURTHRMAP
:
3465 ret
= kdbg_readcurthrmap(where
, sizep
);
3469 value
&= KDBG_USERFLAGS
;
3470 kd_ctrl_page
.kdebug_flags
|= value
;
3474 value
&= KDBG_USERFLAGS
;
3475 kd_ctrl_page
.kdebug_flags
&= ~value
;
3480 * Enable tracing mechanism. Two types:
3481 * KDEBUG_TRACE is the standard one,
3482 * and KDEBUG_PPT which is a carefully
3483 * chosen subset to avoid performance impact.
3487 * enable only if buffer is initialized
3489 if (!(kd_ctrl_page
.kdebug_flags
& KDBG_BUFINIT
) ||
3490 !(value
== KDEBUG_ENABLE_TRACE
|| value
== KDEBUG_ENABLE_PPT
)) {
3496 kdbg_set_tracing_enabled(TRUE
, value
);
3498 if (!kdebug_enable
) {
3502 kernel_debug_disable();
3507 kdbg_set_nkdbufs(value
);
3511 ret
= kdbg_reinit(FALSE
);
3515 ktrace_reset(KTRACE_KDEBUG
);
3519 if (size
< sizeof(kd_regtype
)) {
3523 if (copyin(where
, &kd_Reg
, sizeof(kd_regtype
))) {
3528 ret
= kdbg_setreg(&kd_Reg
);
3536 ret
= kdbg_read(where
, sizep
, NULL
, NULL
, RAW_VERSION1
);
3539 case KERN_KDWRITETR
:
3540 case KERN_KDWRITETR_V3
:
3541 case KERN_KDWRITEMAP
:
3542 case KERN_KDWRITEMAP_V3
:
3544 struct vfs_context context
;
3545 struct fileproc
*fp
;
3550 if (name
[0] == KERN_KDWRITETR
|| name
[0] == KERN_KDWRITETR_V3
) {
3551 (void)kdbg_wait(size
, TRUE
);
3557 if ((ret
= fp_lookup(p
, fd
, &fp
, 1))) {
3561 context
.vc_thread
= current_thread();
3562 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
3564 if (FILEGLOB_DTYPE(fp
->f_fglob
) != DTYPE_VNODE
) {
3565 fp_drop(p
, fd
, fp
, 1);
3571 vp
= (struct vnode
*)fp
->f_fglob
->fg_data
;
3574 if ((ret
= vnode_getwithref(vp
)) == 0) {
3575 RAW_file_offset
= fp
->f_fglob
->fg_offset
;
3576 if (name
[0] == KERN_KDWRITETR
|| name
[0] == KERN_KDWRITETR_V3
) {
3577 number
= nkdbufs
* sizeof(kd_buf
);
3579 KDBG_RELEASE(TRACE_WRITING_EVENTS
| DBG_FUNC_START
);
3580 if (name
[0] == KERN_KDWRITETR_V3
) {
3581 ret
= kdbg_read(0, &number
, vp
, &context
, RAW_VERSION3
);
3583 ret
= kdbg_read(0, &number
, vp
, &context
, RAW_VERSION1
);
3585 KDBG_RELEASE(TRACE_WRITING_EVENTS
| DBG_FUNC_END
, number
);
3589 number
= kd_mapcount
* sizeof(kd_threadmap
);
3590 if (name
[0] == KERN_KDWRITEMAP_V3
) {
3591 ret
= kdbg_readthrmap_v3(0, number
, fd
);
3593 ret
= kdbg_write_thread_map(vp
, &context
);
3596 fp
->f_fglob
->fg_offset
= RAW_file_offset
;
3599 fp_drop(p
, fd
, fp
, 0);
3603 case KERN_KDBUFWAIT
:
3604 *sizep
= kdbg_wait(size
, FALSE
);
3608 if (size
< sizeof(kd_regtype
)) {
3612 if (copyin(where
, &kd_Reg
, sizeof(kd_regtype
))) {
3617 ret
= kdbg_setpid(&kd_Reg
);
3621 if (size
< sizeof(kd_regtype
)) {
3625 if (copyin(where
, &kd_Reg
, sizeof(kd_regtype
))) {
3630 ret
= kdbg_setpidex(&kd_Reg
);
3634 ret
= kdbg_readcpumap(where
, sizep
);
3638 ret
= kdbg_copyout_thread_map(where
, sizep
);
3641 case KERN_KDSET_TYPEFILTER
: {
3642 ret
= kdbg_copyin_typefilter(where
, size
);
3647 ret
= kdbg_test(size
);
3662 * This code can run for the most part concurrently with kernel_debug_internal()...
3663 * 'release_storage_unit' will take the kds_spin_lock which may cause us to briefly
3664 * synchronize with the recording side of this puzzle... otherwise, we are able to
3665 * move through the lists w/o use of any locks
3668 kdbg_read(user_addr_t buffer
, size_t *number
, vnode_t vp
, vfs_context_t ctx
, uint32_t file_version
)
3671 unsigned int cpu
, min_cpu
;
3672 uint64_t barrier_min
= 0, barrier_max
= 0, t
, earliest_time
;
3678 bool traced_retrograde
= false;
3679 struct kd_storage
*kdsp_actual
;
3680 struct kd_bufinfo
*kdbp
;
3681 struct kd_bufinfo
*min_kdbp
;
3682 uint32_t tempbuf_count
;
3683 uint32_t tempbuf_number
;
3684 uint32_t old_kdebug_flags
;
3685 uint32_t old_kdebug_slowcheck
;
3686 boolean_t out_of_events
= FALSE
;
3687 boolean_t wrapped
= FALSE
;
3690 count
= *number
/ sizeof(kd_buf
);
3693 ktrace_assert_lock_held();
3695 if (count
== 0 || !(kd_ctrl_page
.kdebug_flags
& KDBG_BUFINIT
) || kdcopybuf
== 0) {
3699 thread_set_eager_preempt(current_thread());
3701 memset(&lostevent
, 0, sizeof(lostevent
));
3702 lostevent
.debugid
= TRACE_LOST_EVENTS
;
3705 * Capture the current time. Only sort events that have occured
3706 * before now. Since the IOPs are being flushed here, it is possible
3707 * that events occur on the AP while running live tracing. If we are
3708 * disabled, no new events should occur on the AP.
3710 if (kd_ctrl_page
.enabled
) {
3711 barrier_max
= kdbg_timestamp() & KDBG_TIMESTAMP_MASK
;
3715 * Request each IOP to provide us with up to date entries before merging
3718 kdbg_iop_list_callback(kd_ctrl_page
.kdebug_iops
, KD_CALLBACK_SYNC_FLUSH
, NULL
);
3721 * Disable wrap so storage units cannot be stolen out from underneath us
3722 * while merging events.
3724 * Because we hold ktrace_lock, no other control threads can be playing
3725 * with kdebug_flags. The code that emits new events could be running,
3726 * but it grabs kds_spin_lock if it needs to acquire a new storage
3727 * chunk, which is where it examines kdebug_flags. If it is adding to
3728 * the same chunk we're reading from, check for that below.
3730 wrapped
= disable_wrap(&old_kdebug_slowcheck
, &old_kdebug_flags
);
3732 if (count
> nkdbufs
) {
3736 if ((tempbuf_count
= count
) > KDCOPYBUF_COUNT
) {
3737 tempbuf_count
= KDCOPYBUF_COUNT
;
3741 * If the buffers have wrapped, do not emit additional lost events for the
3742 * oldest storage units.
3745 kd_ctrl_page
.kdebug_flags
&= ~KDBG_WRAPPED
;
3747 for (cpu
= 0, kdbp
= &kdbip
[0]; cpu
< kd_ctrl_page
.kdebug_cpus
; cpu
++, kdbp
++) {
3748 if ((kdsp
= kdbp
->kd_list_head
).raw
== KDS_PTR_NULL
) {
3751 kdsp_actual
= POINTER_FROM_KDS_PTR(kdsp
);
3752 kdsp_actual
->kds_lostevents
= FALSE
;
3756 * Capture the earliest time where there are events for all CPUs and don't
3757 * emit events with timestamps prior.
3759 barrier_min
= kd_ctrl_page
.oldest_time
;
3762 tempbuf
= kdcopybuf
;
3767 * Emit a lost events tracepoint to indicate that previous events
3768 * were lost -- the thread map cannot be trusted. A new one must
3769 * be taken so tools can analyze the trace in a backwards-facing
3772 kdbg_set_timestamp_and_cpu(&lostevent
, barrier_min
, 0);
3773 *tempbuf
= lostevent
;
3778 /* While space left in merged events scratch buffer. */
3779 while (tempbuf_count
) {
3780 bool lostevents
= false;
3782 earliest_time
= UINT64_MAX
;
3786 /* Check each CPU's buffers for the earliest event. */
3787 for (cpu
= 0, kdbp
= &kdbip
[0]; cpu
< kd_ctrl_page
.kdebug_cpus
; cpu
++, kdbp
++) {
3788 /* Skip CPUs without data in their oldest storage unit. */
3789 if ((kdsp
= kdbp
->kd_list_head
).raw
== KDS_PTR_NULL
) {
3793 /* From CPU data to buffer header to buffer. */
3794 kdsp_actual
= POINTER_FROM_KDS_PTR(kdsp
);
3797 /* The next event to be read from this buffer. */
3798 rcursor
= kdsp_actual
->kds_readlast
;
3800 /* Skip this buffer if there are no events left. */
3801 if (rcursor
== kdsp_actual
->kds_bufindx
) {
3806 * Check that this storage unit wasn't stolen and events were
3807 * lost. This must have happened while wrapping was disabled
3810 if (kdsp_actual
->kds_lostevents
) {
3812 kdsp_actual
->kds_lostevents
= FALSE
;
3815 * The earliest event we can trust is the first one in this
3816 * stolen storage unit.
3818 uint64_t lost_time
=
3819 kdbg_get_timestamp(&kdsp_actual
->kds_records
[0]);
3820 if (kd_ctrl_page
.oldest_time
< lost_time
) {
3822 * If this is the first time we've seen lost events for
3823 * this gap, record its timestamp as the oldest
3824 * timestamp we're willing to merge for the lost events
3827 kd_ctrl_page
.oldest_time
= barrier_min
= lost_time
;
3832 t
= kdbg_get_timestamp(&kdsp_actual
->kds_records
[rcursor
]);
3834 if ((t
> barrier_max
) && (barrier_max
> 0)) {
3836 printf("kdebug: FUTURE EVENT: debugid %#8x: "
3837 "time %lld from CPU %u "
3838 "(barrier at time %lld, read %lu events)\n",
3839 kdsp_actual
->kds_records
[rcursor
].debugid
,
3840 t
, cpu
, barrier_max
, *number
+ tempbuf_number
);
3843 * Need to flush IOPs again before we can sort any more
3844 * data from the buffers.
3846 out_of_events
= TRUE
;
3849 if (t
< kdsp_actual
->kds_timestamp
) {
3851 * This indicates the event emitter hasn't completed
3852 * filling in the event (becuase we're looking at the
3853 * buffer that the record head is using). The max barrier
3854 * timestamp should have saved us from seeing these kinds
3855 * of things, but other CPUs might be slow on the up-take.
3857 * Bail out so we don't get out-of-order events by
3858 * continuing to read events from other CPUs' events.
3860 out_of_events
= TRUE
;
3865 * Ignore events that have aged out due to wrapping or storage
3866 * unit exhaustion while merging events.
3868 if (t
< barrier_min
) {
3869 kdsp_actual
->kds_readlast
++;
3871 if (kdsp_actual
->kds_readlast
>= EVENTS_PER_STORAGE_UNIT
) {
3872 release_storage_unit(cpu
, kdsp
.raw
);
3874 if ((kdsp
= kdbp
->kd_list_head
).raw
== KDS_PTR_NULL
) {
3877 kdsp_actual
= POINTER_FROM_KDS_PTR(kdsp
);
3884 * Don't worry about merging any events -- just walk through
3885 * the CPUs and find the latest timestamp of lost events.
3891 if (t
< earliest_time
) {
3899 * If any lost events were hit in the buffers, emit an event
3900 * with the latest timestamp.
3902 kdbg_set_timestamp_and_cpu(&lostevent
, barrier_min
, lostcpu
);
3903 *tempbuf
= lostevent
;
3907 if (min_kdbp
== NULL
) {
3908 /* All buffers ran empty. */
3909 out_of_events
= TRUE
;
3911 if (out_of_events
) {
3915 kdsp
= min_kdbp
->kd_list_head
;
3916 kdsp_actual
= POINTER_FROM_KDS_PTR(kdsp
);
3918 /* Copy earliest event into merged events scratch buffer. */
3919 *tempbuf
= kdsp_actual
->kds_records
[kdsp_actual
->kds_readlast
++];
3921 if (kdsp_actual
->kds_readlast
== EVENTS_PER_STORAGE_UNIT
) {
3922 release_storage_unit(min_cpu
, kdsp
.raw
);
3926 * Watch for out of order timestamps (from IOPs).
3928 if (earliest_time
< min_kdbp
->kd_prev_timebase
) {
3930 * If we haven't already, emit a retrograde events event.
3931 * Otherwise, ignore this event.
3933 if (traced_retrograde
) {
3937 kdbg_set_timestamp_and_cpu(tempbuf
, min_kdbp
->kd_prev_timebase
, kdbg_get_cpu(tempbuf
));
3938 tempbuf
->arg1
= tempbuf
->debugid
;
3939 tempbuf
->arg2
= earliest_time
;
3942 tempbuf
->debugid
= TRACE_RETROGRADE_EVENTS
;
3943 traced_retrograde
= true;
3945 min_kdbp
->kd_prev_timebase
= earliest_time
;
3952 if ((RAW_file_written
+= sizeof(kd_buf
)) >= RAW_FLUSH_SIZE
) {
3956 if (tempbuf_number
) {
3958 * Remember the latest timestamp of events that we've merged so we
3959 * don't think we've lost events later.
3961 uint64_t latest_time
= kdbg_get_timestamp(tempbuf
- 1);
3962 if (kd_ctrl_page
.oldest_time
< latest_time
) {
3963 kd_ctrl_page
.oldest_time
= latest_time
;
3965 if (file_version
== RAW_VERSION3
) {
3966 if (!(kdbg_write_v3_event_chunk_header(buffer
, V3_RAW_EVENTS
, (tempbuf_number
* sizeof(kd_buf
)), vp
, ctx
))) {
3971 buffer
+= (sizeof(kd_chunk_header_v3
) + sizeof(uint64_t));
3974 assert(count
>= (sizeof(kd_chunk_header_v3
) + sizeof(uint64_t)));
3975 count
-= (sizeof(kd_chunk_header_v3
) + sizeof(uint64_t));
3976 *number
+= (sizeof(kd_chunk_header_v3
) + sizeof(uint64_t));
3979 size_t write_size
= tempbuf_number
* sizeof(kd_buf
);
3980 error
= kdbg_write_to_vnode((caddr_t
)kdcopybuf
, write_size
, vp
, ctx
, RAW_file_offset
);
3982 RAW_file_offset
+= write_size
;
3985 if (RAW_file_written
>= RAW_FLUSH_SIZE
) {
3986 error
= VNOP_FSYNC(vp
, MNT_NOWAIT
, ctx
);
3988 RAW_file_written
= 0;
3991 error
= copyout(kdcopybuf
, buffer
, tempbuf_number
* sizeof(kd_buf
));
3992 buffer
+= (tempbuf_number
* sizeof(kd_buf
));
4000 count
-= tempbuf_number
;
4001 *number
+= tempbuf_number
;
4003 if (out_of_events
== TRUE
) {
4005 * all trace buffers are empty
4010 if ((tempbuf_count
= count
) > KDCOPYBUF_COUNT
) {
4011 tempbuf_count
= KDCOPYBUF_COUNT
;
4014 if (!(old_kdebug_flags
& KDBG_NOWRAP
)) {
4015 enable_wrap(old_kdebug_slowcheck
);
4017 thread_clear_eager_preempt(current_thread());
4022 kdbg_test(size_t flavor
)
4027 #define KDEBUG_TEST_CODE(code) BSDDBG_CODE(DBG_BSD_KDEBUG_TEST, (code))
4030 /* try each macro */
4031 KDBG(KDEBUG_TEST_CODE(code
)); code
++;
4032 KDBG(KDEBUG_TEST_CODE(code
), 1); code
++;
4033 KDBG(KDEBUG_TEST_CODE(code
), 1, 2); code
++;
4034 KDBG(KDEBUG_TEST_CODE(code
), 1, 2, 3); code
++;
4035 KDBG(KDEBUG_TEST_CODE(code
), 1, 2, 3, 4); code
++;
4037 KDBG_RELEASE(KDEBUG_TEST_CODE(code
)); code
++;
4038 KDBG_RELEASE(KDEBUG_TEST_CODE(code
), 1); code
++;
4039 KDBG_RELEASE(KDEBUG_TEST_CODE(code
), 1, 2); code
++;
4040 KDBG_RELEASE(KDEBUG_TEST_CODE(code
), 1, 2, 3); code
++;
4041 KDBG_RELEASE(KDEBUG_TEST_CODE(code
), 1, 2, 3, 4); code
++;
4043 KDBG_FILTERED(KDEBUG_TEST_CODE(code
)); code
++;
4044 KDBG_FILTERED(KDEBUG_TEST_CODE(code
), 1); code
++;
4045 KDBG_FILTERED(KDEBUG_TEST_CODE(code
), 1, 2); code
++;
4046 KDBG_FILTERED(KDEBUG_TEST_CODE(code
), 1, 2, 3); code
++;
4047 KDBG_FILTERED(KDEBUG_TEST_CODE(code
), 1, 2, 3, 4); code
++;
4049 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code
)); code
++;
4050 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code
), 1); code
++;
4051 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code
), 1, 2); code
++;
4052 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code
), 1, 2, 3); code
++;
4053 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code
), 1, 2, 3, 4); code
++;
4055 KDBG_DEBUG(KDEBUG_TEST_CODE(code
)); code
++;
4056 KDBG_DEBUG(KDEBUG_TEST_CODE(code
), 1); code
++;
4057 KDBG_DEBUG(KDEBUG_TEST_CODE(code
), 1, 2); code
++;
4058 KDBG_DEBUG(KDEBUG_TEST_CODE(code
), 1, 2, 3); code
++;
4059 KDBG_DEBUG(KDEBUG_TEST_CODE(code
), 1, 2, 3, 4); code
++;
4063 if (kd_ctrl_page
.kdebug_iops
) {
4064 /* avoid the assertion in kernel_debug_enter for a valid IOP */
4065 dummy_iop
= kd_ctrl_page
.kdebug_iops
[0].cpu_id
;
4068 /* ensure old timestamps are not emitted from kernel_debug_enter */
4069 kernel_debug_enter(dummy_iop
, KDEBUG_TEST_CODE(code
),
4070 100 /* very old timestamp */, 0, 0, 0,
4071 0, (uintptr_t)thread_tid(current_thread()));
4073 kernel_debug_enter(dummy_iop
, KDEBUG_TEST_CODE(code
),
4074 kdbg_timestamp(), 0, 0, 0, 0,
4075 (uintptr_t)thread_tid(current_thread()));
4082 #undef KDEBUG_TEST_CODE
4088 kdebug_init(unsigned int n_events
, char *filter_desc
, boolean_t wrapping
)
4090 assert(filter_desc
!= NULL
);
4092 #if defined(__x86_64__)
4093 /* only trace MACH events when outputting kdebug to serial */
4094 if (kdebug_serial
) {
4096 if (filter_desc
[0] == '\0') {
4097 filter_desc
[0] = 'C';
4098 filter_desc
[1] = '1';
4099 filter_desc
[2] = '\0';
4102 #endif /* defined(__x86_64__) */
4104 if (log_leaks
&& n_events
== 0) {
4108 kdebug_trace_start(n_events
, filter_desc
, wrapping
, FALSE
);
4112 kdbg_set_typefilter_string(const char *filter_desc
)
4116 ktrace_assert_lock_held();
4118 assert(filter_desc
!= NULL
);
4120 typefilter_reject_all(kdbg_typefilter
);
4121 typefilter_allow_class(kdbg_typefilter
, DBG_TRACE
);
4123 /* if the filter description starts with a number, assume it's a csc */
4124 if (filter_desc
[0] >= '0' && filter_desc
[0] <= '9') {
4125 unsigned long csc
= strtoul(filter_desc
, NULL
, 0);
4126 if (filter_desc
!= end
&& csc
<= KDBG_CSC_MAX
) {
4127 typefilter_allow_csc(kdbg_typefilter
, csc
);
4132 while (filter_desc
[0] != '\0') {
4133 unsigned long allow_value
;
4135 char filter_type
= filter_desc
[0];
4136 if (filter_type
!= 'C' && filter_type
!= 'S') {
4141 allow_value
= strtoul(filter_desc
, &end
, 0);
4142 if (filter_desc
== end
) {
4143 /* cannot parse as integer */
4147 switch (filter_type
) {
4149 if (allow_value
<= KDBG_CLASS_MAX
) {
4150 typefilter_allow_class(kdbg_typefilter
, allow_value
);
4157 if (allow_value
<= KDBG_CSC_MAX
) {
4158 typefilter_allow_csc(kdbg_typefilter
, allow_value
);
4160 /* illegal class subclass */
4168 /* advance to next filter entry */
4170 if (filter_desc
[0] == ',') {
4177 * This function is meant to be called from the bootstrap thread or coming out
4178 * of acpi_idle_kernel.
4181 kdebug_trace_start(unsigned int n_events
, const char *filter_desc
,
4182 boolean_t wrapping
, boolean_t at_wake
)
4185 kd_early_done
= true;
4189 ktrace_start_single_threaded();
4193 ktrace_kernel_configure(KTRACE_KDEBUG
);
4195 kdbg_set_nkdbufs(n_events
);
4197 kernel_debug_string_early("start_kern_tracing");
4199 if (kdbg_reinit(TRUE
)) {
4200 printf("error from kdbg_reinit, kernel tracing not started\n");
4205 * Wrapping is disabled because boot and wake tracing is interested in
4206 * the earliest events, at the expense of later ones.
4209 uint32_t old1
, old2
;
4210 (void)disable_wrap(&old1
, &old2
);
4213 if (filter_desc
&& filter_desc
[0] != '\0') {
4214 if (kdbg_initialize_typefilter(NULL
) == KERN_SUCCESS
) {
4215 kdbg_set_typefilter_string(filter_desc
);
4216 kdbg_enable_typefilter();
4221 * Hold off interrupts between getting a thread map and enabling trace
4222 * and until the early traces are recorded.
4224 boolean_t s
= ml_set_interrupts_enabled(FALSE
);
4230 kdbg_set_tracing_enabled(TRUE
, KDEBUG_ENABLE_TRACE
| (kdebug_serial
?
4231 KDEBUG_ENABLE_SERIAL
: 0));
4235 * Transfer all very early events from the static buffer into the real
4238 kernel_debug_early_end();
4241 ml_set_interrupts_enabled(s
);
4243 printf("kernel tracing started with %u events\n", n_events
);
4245 #if KDEBUG_MOJO_TRACE
4246 if (kdebug_serial
) {
4247 printf("serial output enabled with %lu named events\n",
4248 sizeof(kd_events
) / sizeof(kd_event_t
));
4250 #endif /* KDEBUG_MOJO_TRACE */
4253 ktrace_end_single_threaded();
4257 kdbg_dump_trace_to_file(const char *filename
)
4266 if (!(kdebug_enable
& KDEBUG_ENABLE_TRACE
)) {
4270 if (ktrace_get_owning_pid() != 0) {
4272 * Another process owns ktrace and is still active, disable tracing to
4276 kd_ctrl_page
.enabled
= 0;
4277 commpage_update_kdebug_state();
4281 KDBG_RELEASE(TRACE_WRITING_EVENTS
| DBG_FUNC_START
);
4284 kd_ctrl_page
.enabled
= 0;
4285 commpage_update_kdebug_state();
4287 ctx
= vfs_context_kernel();
4289 if (vnode_open(filename
, (O_CREAT
| FWRITE
| O_NOFOLLOW
), 0600, 0, &vp
, ctx
)) {
4293 kdbg_write_thread_map(vp
, ctx
);
4295 write_size
= nkdbufs
* sizeof(kd_buf
);
4296 ret
= kdbg_read(0, &write_size
, vp
, ctx
, RAW_VERSION1
);
4302 * Wait to synchronize the file to capture the I/O in the
4303 * TRACE_WRITING_EVENTS interval.
4305 ret
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
);
4308 * Balance the starting TRACE_WRITING_EVENTS tracepoint manually.
4310 kd_buf end_event
= {
4311 .debugid
= TRACE_WRITING_EVENTS
| DBG_FUNC_END
,
4314 .arg5
= thread_tid(current_thread()),
4316 kdbg_set_timestamp_and_cpu(&end_event
, kdbg_timestamp(),
4319 /* this is best effort -- ignore any errors */
4320 (void)kdbg_write_to_vnode((caddr_t
)&end_event
, sizeof(kd_buf
), vp
, ctx
,
4324 vnode_close(vp
, FWRITE
, ctx
);
4325 sync(current_proc(), (void *)NULL
, (int *)NULL
);
4332 kdbg_sysctl_continuous SYSCTL_HANDLER_ARGS
4334 #pragma unused(oidp, arg1, arg2)
4335 int value
= kdbg_continuous_time
;
4336 int ret
= sysctl_io_number(req
, value
, sizeof(value
), &value
, NULL
);
4338 if (ret
|| !req
->newptr
) {
4342 kdbg_continuous_time
= value
;
4346 SYSCTL_NODE(_kern
, OID_AUTO
, kdbg
, CTLFLAG_RD
| CTLFLAG_LOCKED
, 0,
4349 SYSCTL_PROC(_kern_kdbg
, OID_AUTO
, experimental_continuous
,
4350 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, 0,
4351 sizeof(int), kdbg_sysctl_continuous
, "I",
4352 "Set kdebug to use mach_continuous_time");
4354 SYSCTL_INT(_kern_kdbg
, OID_AUTO
, debug
,
4355 CTLFLAG_RW
| CTLFLAG_LOCKED
,
4356 &kdbg_debug
, 0, "Set kdebug debug mode");
4358 SYSCTL_QUAD(_kern_kdbg
, OID_AUTO
, oldest_time
,
4359 CTLTYPE_QUAD
| CTLFLAG_RD
| CTLFLAG_LOCKED
,
4360 &kd_ctrl_page
.oldest_time
,
4361 "Find the oldest timestamp still in trace");
4363 #if KDEBUG_MOJO_TRACE
4365 binary_search(uint32_t id
)
4370 high
= (int)(sizeof(kd_events
) / sizeof(kd_event_t
)) - 1;
4373 mid
= (low
+ high
) / 2;
4376 return NULL
; /* failed */
4377 } else if (low
+ 1 >= high
) {
4378 /* We have a match */
4379 if (kd_events
[high
].id
== id
) {
4380 return &kd_events
[high
];
4381 } else if (kd_events
[low
].id
== id
) {
4382 return &kd_events
[low
];
4384 return NULL
; /* search failed */
4386 } else if (id
< kd_events
[mid
].id
) {
4395 * Look up event id to get name string.
4396 * Using a per-cpu cache of a single entry
4397 * before resorting to a binary search of the full table.
4400 static kd_event_t
*last_hit
[MAX_CPUS
];
4402 event_lookup_cache(uint32_t cpu
, uint32_t id
)
4404 if (last_hit
[cpu
] == NULL
|| last_hit
[cpu
]->id
!= id
) {
4405 last_hit
[cpu
] = binary_search(id
);
4407 return last_hit
[cpu
];
4410 static uint64_t kd_last_timstamp
;
4413 kdebug_serial_print(
4424 char kprintf_line
[192];
4426 uint64_t us
= timestamp
/ NSEC_PER_USEC
;
4427 uint64_t us_tenth
= (timestamp
% NSEC_PER_USEC
) / 100;
4428 uint64_t delta
= timestamp
- kd_last_timstamp
;
4429 uint64_t delta_us
= delta
/ NSEC_PER_USEC
;
4430 uint64_t delta_us_tenth
= (delta
% NSEC_PER_USEC
) / 100;
4431 uint32_t event_id
= debugid
& KDBG_EVENTID_MASK
;
4432 const char *command
;
4437 /* event time and delta from last */
4438 snprintf(kprintf_line
, sizeof(kprintf_line
),
4439 "%11llu.%1llu %8llu.%1llu ",
4440 us
, us_tenth
, delta_us
, delta_us_tenth
);
4443 /* event (id or name) - start prefixed by "[", end postfixed by "]" */
4444 bra
= (debugid
& DBG_FUNC_START
) ? "[" : " ";
4445 ket
= (debugid
& DBG_FUNC_END
) ? "]" : " ";
4446 ep
= event_lookup_cache(cpunum
, event_id
);
4448 if (strlen(ep
->name
) < sizeof(event
) - 3) {
4449 snprintf(event
, sizeof(event
), "%s%s%s",
4450 bra
, ep
->name
, ket
);
4452 snprintf(event
, sizeof(event
), "%s%x(name too long)%s",
4453 bra
, event_id
, ket
);
4456 snprintf(event
, sizeof(event
), "%s%x%s",
4457 bra
, event_id
, ket
);
4459 snprintf(kprintf_line
+ strlen(kprintf_line
),
4460 sizeof(kprintf_line
) - strlen(kprintf_line
),
4463 /* arg1 .. arg4 with special cases for strings */
4466 case VFS_LOOKUP_DONE
:
4467 if (debugid
& DBG_FUNC_START
) {
4468 /* arg1 hex then arg2..arg4 chars */
4469 snprintf(kprintf_line
+ strlen(kprintf_line
),
4470 sizeof(kprintf_line
) - strlen(kprintf_line
),
4471 "%-16lx %-8s%-8s%-8s ",
4472 arg1
, (char*)&arg2
, (char*)&arg3
, (char*)&arg4
);
4475 /* else fall through for arg1..arg4 chars */
4476 case TRACE_STRING_EXEC
:
4477 case TRACE_STRING_NEWTHREAD
:
4478 case TRACE_INFO_STRING
:
4479 snprintf(kprintf_line
+ strlen(kprintf_line
),
4480 sizeof(kprintf_line
) - strlen(kprintf_line
),
4481 "%-8s%-8s%-8s%-8s ",
4482 (char*)&arg1
, (char*)&arg2
, (char*)&arg3
, (char*)&arg4
);
4485 snprintf(kprintf_line
+ strlen(kprintf_line
),
4486 sizeof(kprintf_line
) - strlen(kprintf_line
),
4487 "%-16lx %-16lx %-16lx %-16lx",
4488 arg1
, arg2
, arg3
, arg4
);
4491 /* threadid, cpu and command name */
4492 if (threadid
== (uintptr_t)thread_tid(current_thread()) &&
4494 current_proc()->p_comm
[0]) {
4495 command
= current_proc()->p_comm
;
4499 snprintf(kprintf_line
+ strlen(kprintf_line
),
4500 sizeof(kprintf_line
) - strlen(kprintf_line
),
4501 " %-16lx %-2d %s\n",
4502 threadid
, cpunum
, command
);
4504 kprintf("%s", kprintf_line
);
4505 kd_last_timstamp
= timestamp
;