2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
4 * @Apple_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
23 #include <sys/errno.h>
24 #include <sys/param.h>
25 #include <sys/systm.h>
26 #include <sys/proc_internal.h>
28 #include <sys/sysctl.h>
29 #include <sys/kdebug.h>
30 #include <sys/kauth.h>
31 #include <sys/ktrace.h>
32 #include <sys/sysproto.h>
33 #include <sys/bsdtask_info.h>
34 #include <sys/random.h>
36 #include <mach/clock_types.h>
37 #include <mach/mach_types.h>
38 #include <mach/mach_time.h>
39 #include <mach/mach_vm.h>
40 #include <machine/atomic.h>
41 #include <machine/machine_routines.h>
43 #include <mach/machine.h>
44 #include <mach/vm_map.h>
46 #if defined(__i386__) || defined(__x86_64__)
47 #include <i386/rtclock_protos.h>
49 #include <i386/machine_routines.h>
53 #include <kern/clock.h>
55 #include <kern/thread.h>
56 #include <kern/task.h>
57 #include <kern/debug.h>
58 #include <kern/kalloc.h>
59 #include <kern/cpu_number.h>
60 #include <kern/cpu_data.h>
61 #include <kern/assert.h>
62 #include <kern/telemetry.h>
63 #include <kern/sched_prim.h>
64 #include <vm/vm_kern.h>
66 #include <kperf/kperf.h>
67 #include <pexpert/device_tree.h>
69 #include <sys/malloc.h>
70 #include <sys/mcache.h>
72 #include <sys/vnode.h>
73 #include <sys/vnode_internal.h>
74 #include <sys/fcntl.h>
75 #include <sys/file_internal.h>
77 #include <sys/param.h> /* for isset() */
79 #include <mach/mach_host.h> /* for host_info() */
80 #include <libkern/OSAtomic.h>
82 #include <machine/pal_routines.h>
83 #include <machine/atomic.h>
85 extern unsigned int wake_nkdbufs
;
86 extern unsigned int trace_wrap
;
91 * IOP(s) are auxiliary cores that want to participate in kdebug event logging.
92 * They are registered dynamically. Each is assigned a cpu_id at registration.
94 * NOTE: IOP trace events may not use the same clock hardware as "normal"
95 * cpus. There is an effort made to synchronize the IOP timebase with the
96 * AP, but it should be understood that there may be discrepancies.
98 * Once registered, an IOP is permanent, it cannot be unloaded/unregistered.
99 * The current implementation depends on this for thread safety.
101 * New registrations occur by allocating an kd_iop struct and assigning
102 * a provisional cpu_id of list_head->cpu_id + 1. Then a CAS to claim the
103 * list_head pointer resolves any races.
105 * You may safely walk the kd_iops list at any time, without holding locks.
107 * When allocating buffers, the current kd_iops head is captured. Any operations
108 * that depend on the buffer state (such as flushing IOP traces on reads,
109 * etc.) should use the captured list head. This will allow registrations to
110 * take place while trace is in use.
113 typedef struct kd_iop
{
114 kd_callback_t callback
;
116 uint64_t last_timestamp
; /* Prevent timer rollback */
120 static kd_iop_t
* kd_iops
= NULL
;
125 * A typefilter is a 8KB bitmap that is used to selectively filter events
126 * being recorded. It is able to individually address every class & subclass.
128 * There is a shared typefilter in the kernel which is lazily allocated. Once
129 * allocated, the shared typefilter is never deallocated. The shared typefilter
130 * is also mapped on demand into userspace processes that invoke kdebug_trace
131 * API from Libsyscall. When mapped into a userspace process, the memory is
132 * read only, and does not have a fixed address.
134 * It is a requirement that the kernel's shared typefilter always pass DBG_TRACE
135 * events. This is enforced automatically, by having the needed bits set any
136 * time the shared typefilter is mutated.
139 typedef uint8_t* typefilter_t
;
141 static typefilter_t kdbg_typefilter
;
142 static mach_port_t kdbg_typefilter_memory_entry
;
145 * There are 3 combinations of page sizes:
151 * The typefilter is exactly 8KB. In the first two scenarios, we would like
152 * to use 2 pages exactly; in the third scenario we must make certain that
153 * a full page is allocated so we do not inadvertantly share 8KB of random
154 * data to userspace. The round_page_32 macro rounds to kernel page size.
156 #define TYPEFILTER_ALLOC_SIZE MAX(round_page_32(KDBG_TYPEFILTER_BITMAP_SIZE), KDBG_TYPEFILTER_BITMAP_SIZE)
159 typefilter_create(void)
162 if (KERN_SUCCESS
== kmem_alloc(kernel_map
, (vm_offset_t
*)&tf
, TYPEFILTER_ALLOC_SIZE
, VM_KERN_MEMORY_DIAG
)) {
163 memset(&tf
[KDBG_TYPEFILTER_BITMAP_SIZE
], 0, TYPEFILTER_ALLOC_SIZE
- KDBG_TYPEFILTER_BITMAP_SIZE
);
170 typefilter_deallocate(typefilter_t tf
)
173 assert(tf
!= kdbg_typefilter
);
174 kmem_free(kernel_map
, (vm_offset_t
)tf
, TYPEFILTER_ALLOC_SIZE
);
178 typefilter_copy(typefilter_t dst
, typefilter_t src
)
182 memcpy(dst
, src
, KDBG_TYPEFILTER_BITMAP_SIZE
);
186 typefilter_reject_all(typefilter_t tf
)
189 memset(tf
, 0, KDBG_TYPEFILTER_BITMAP_SIZE
);
193 typefilter_allow_all(typefilter_t tf
)
196 memset(tf
, ~0, KDBG_TYPEFILTER_BITMAP_SIZE
);
200 typefilter_allow_class(typefilter_t tf
, uint8_t class)
203 const uint32_t BYTES_PER_CLASS
= 256 / 8; // 256 subclasses, 1 bit each
204 memset(&tf
[class * BYTES_PER_CLASS
], 0xFF, BYTES_PER_CLASS
);
208 typefilter_allow_csc(typefilter_t tf
, uint16_t csc
)
215 typefilter_is_debugid_allowed(typefilter_t tf
, uint32_t id
)
218 return isset(tf
, KDBG_EXTRACT_CSC(id
));
222 typefilter_create_memory_entry(typefilter_t tf
)
226 mach_port_t memory_entry
= MACH_PORT_NULL
;
227 memory_object_size_t size
= TYPEFILTER_ALLOC_SIZE
;
229 mach_make_memory_entry_64(kernel_map
,
231 (memory_object_offset_t
)tf
,
239 static int kdbg_copyin_typefilter(user_addr_t addr
, size_t size
);
240 static void kdbg_enable_typefilter(void);
241 static void kdbg_disable_typefilter(void);
244 * External prototypes
247 void task_act_iterate_wth_args(task_t
, void (*)(thread_t
, void *), void *);
248 void commpage_update_kdebug_state(void); /* XXX sign */
250 extern int log_leaks
;
253 * This flag is for testing purposes only -- it's highly experimental and tools
254 * have not been updated to support it.
256 static bool kdbg_continuous_time
= false;
258 static inline uint64_t
261 if (kdbg_continuous_time
) {
262 return mach_continuous_time();
264 return mach_absolute_time();
268 static int kdbg_debug
= 0;
270 int kdbg_control(int *, u_int
, user_addr_t
, size_t *);
272 static int kdbg_read(user_addr_t
, size_t *, vnode_t
, vfs_context_t
, uint32_t);
273 static int kdbg_readcpumap(user_addr_t
, size_t *);
274 static int kdbg_readthrmap_v3(user_addr_t
, size_t, int);
275 static int kdbg_readcurthrmap(user_addr_t
, size_t *);
276 static int kdbg_setreg(kd_regtype
*);
277 static int kdbg_setpidex(kd_regtype
*);
278 static int kdbg_setpid(kd_regtype
*);
279 static void kdbg_thrmap_init(void);
280 static int kdbg_reinit(bool);
281 static int kdbg_bootstrap(bool);
282 static int kdbg_test(size_t flavor
);
284 static int kdbg_write_v1_header(bool write_thread_map
, vnode_t vp
, vfs_context_t ctx
);
285 static int kdbg_write_thread_map(vnode_t vp
, vfs_context_t ctx
);
286 static int kdbg_copyout_thread_map(user_addr_t buffer
, size_t *buffer_size
);
287 static void kdbg_clear_thread_map(void);
289 static bool kdbg_wait(uint64_t timeout_ms
, bool locked_wait
);
290 static void kdbg_wakeup(void);
292 int kdbg_cpumap_init_internal(kd_iop_t
* iops
, uint32_t cpu_count
,
293 uint8_t** cpumap
, uint32_t* cpumap_size
);
295 static kd_threadmap
*kdbg_thrmap_init_internal(size_t max_count
,
296 vm_size_t
*map_size
, vm_size_t
*map_count
);
298 static bool kdebug_current_proc_enabled(uint32_t debugid
);
299 static errno_t
kdebug_check_trace_string(uint32_t debugid
, uint64_t str_id
);
301 int kdbg_write_v3_header(user_addr_t
, size_t *, int);
302 int kdbg_write_v3_chunk_header(user_addr_t buffer
, uint32_t tag
,
303 uint32_t sub_tag
, uint64_t length
,
304 vnode_t vp
, vfs_context_t ctx
);
306 user_addr_t
kdbg_write_v3_event_chunk_header(user_addr_t buffer
, uint32_t tag
,
307 uint64_t length
, vnode_t vp
,
312 static int create_buffers(bool);
313 static void delete_buffers(void);
315 extern int tasks_count
;
316 extern int threads_count
;
317 extern void IOSleep(int);
319 /* trace enable status */
320 unsigned int kdebug_enable
= 0;
322 /* A static buffer to record events prior to the start of regular logging */
324 #define KD_EARLY_BUFFER_SIZE (16 * 1024)
325 #define KD_EARLY_BUFFER_NBUFS (KD_EARLY_BUFFER_SIZE / sizeof(kd_buf))
326 #if defined(__x86_64__)
327 __attribute__((aligned(KD_EARLY_BUFFER_SIZE
)))
328 static kd_buf kd_early_buffer
[KD_EARLY_BUFFER_NBUFS
];
329 #else /* defined(__x86_64__) */
331 * On ARM, the space for this is carved out by osfmk/arm/data.s -- clang
332 * has problems aligning to greater than 4K.
334 extern kd_buf kd_early_buffer
[KD_EARLY_BUFFER_NBUFS
];
335 #endif /* !defined(__x86_64__) */
337 static unsigned int kd_early_index
= 0;
338 static bool kd_early_overflow
= false;
339 static bool kd_early_done
= false;
341 #define SLOW_NOLOG 0x01
342 #define SLOW_CHECKS 0x02
344 #define EVENTS_PER_STORAGE_UNIT 2048
345 #define MIN_STORAGE_UNITS_PER_CPU 4
347 #define POINTER_FROM_KDS_PTR(x) (&kd_bufs[x.buffer_index].kdsb_addr[x.offset])
351 uint32_t buffer_index
:21;
358 union kds_ptr kds_next
;
359 uint32_t kds_bufindx
;
361 uint32_t kds_readlast
;
363 uint64_t kds_timestamp
;
365 kd_buf kds_records
[EVENTS_PER_STORAGE_UNIT
];
368 #define MAX_BUFFER_SIZE (1024 * 1024 * 128)
369 #define N_STORAGE_UNITS_PER_BUFFER (MAX_BUFFER_SIZE / sizeof(struct kd_storage))
370 static_assert(N_STORAGE_UNITS_PER_BUFFER
<= 0x7ff,
371 "shoudn't overflow kds_ptr.offset");
373 struct kd_storage_buffers
{
374 struct kd_storage
*kdsb_addr
;
378 #define KDS_PTR_NULL 0xffffffff
379 struct kd_storage_buffers
*kd_bufs
= NULL
;
380 int n_storage_units
= 0;
381 unsigned int n_storage_buffers
= 0;
382 int n_storage_threshold
= 0;
387 union kds_ptr kd_list_head
;
388 union kds_ptr kd_list_tail
;
391 uint64_t kd_prev_timebase
;
393 } __attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE
)));
397 * In principle, this control block can be shared in DRAM with other
398 * coprocessors and runtimes, for configuring what tracing is enabled.
400 struct kd_ctrl_page_t
{
401 union kds_ptr kds_free_list
;
405 uint32_t kdebug_flags
;
406 uint32_t kdebug_slowcheck
;
407 uint64_t oldest_time
;
409 * The number of kd_bufinfo structs allocated may not match the current
410 * number of active cpus. We capture the iops list head at initialization
411 * which we could use to calculate the number of cpus we allocated data for,
412 * unless it happens to be null. To avoid that case, we explicitly also
413 * capture a cpu count.
415 kd_iop_t
* kdebug_iops
;
416 uint32_t kdebug_cpus
;
418 .kds_free_list
= {.raw
= KDS_PTR_NULL
},
419 .kdebug_slowcheck
= SLOW_NOLOG
,
425 struct kd_bufinfo
*kdbip
= NULL
;
427 #define KDCOPYBUF_COUNT 8192
428 #define KDCOPYBUF_SIZE (KDCOPYBUF_COUNT * sizeof(kd_buf))
430 #define PAGE_4KB 4096
431 #define PAGE_16KB 16384
433 kd_buf
*kdcopybuf
= NULL
;
435 unsigned int nkdbufs
= 0;
436 unsigned int kdlog_beg
= 0;
437 unsigned int kdlog_end
= 0;
438 unsigned int kdlog_value1
= 0;
439 unsigned int kdlog_value2
= 0;
440 unsigned int kdlog_value3
= 0;
441 unsigned int kdlog_value4
= 0;
443 static lck_spin_t
* kdw_spin_lock
;
444 static lck_spin_t
* kds_spin_lock
;
446 kd_threadmap
*kd_mapptr
= 0;
447 vm_size_t kd_mapsize
= 0;
448 vm_size_t kd_mapcount
= 0;
450 off_t RAW_file_offset
= 0;
451 int RAW_file_written
= 0;
453 #define RAW_FLUSH_SIZE (2 * 1024 * 1024)
456 * A globally increasing counter for identifying strings in trace. Starts at
457 * 1 because 0 is a reserved return value.
459 __attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE
)))
460 static uint64_t g_curr_str_id
= 1;
462 #define STR_ID_SIG_OFFSET (48)
463 #define STR_ID_MASK ((1ULL << STR_ID_SIG_OFFSET) - 1)
464 #define STR_ID_SIG_MASK (~STR_ID_MASK)
467 * A bit pattern for identifying string IDs generated by
468 * kdebug_trace_string(2).
470 static uint64_t g_str_id_signature
= (0x70acULL
<< STR_ID_SIG_OFFSET
);
472 #define INTERRUPT 0x01050000
473 #define MACH_vmfault 0x01300008
474 #define BSC_SysCall 0x040c0000
475 #define MACH_SysCall 0x010c0000
477 struct kd_task_name
{
484 kd_threadmap
*krs_map
;
486 vm_size_t krs_maxcount
;
487 struct kd_task_name
*krs_task
;
491 * TRACE file formats...
495 * uint32_t #threadmaps
501 * RAW_header, with version_no set to RAW_VERSION1
503 * Empty space to pad alignment to the nearest page boundary.
508 * RAW_header, with version_no set to RAW_VERSION1
510 * kd_cpumap_header, with version_no set to RAW_VERSION1
512 * Empty space to pad alignment to the nearest page boundary.
515 * V1+ implementation details...
517 * It would have been nice to add the cpumap data "correctly", but there were
518 * several obstacles. Existing code attempts to parse both V1 and V0 files.
519 * Due to the fact that V0 has no versioning or header, the test looks like
523 * if (header.version_no != RAW_VERSION1) { // Assume V0 }
525 * If we add a VERSION2 file format, all existing code is going to treat that
526 * as a VERSION0 file when reading it, and crash terribly when trying to read
527 * RAW_VERSION2 threadmap entries.
529 * To differentiate between a V1 and V1+ file, read as V1 until you reach
530 * the padding bytes. Then:
532 * boolean_t is_v1plus = FALSE;
533 * if (padding_bytes >= sizeof(kd_cpumap_header)) {
534 * kd_cpumap_header header = // read header;
535 * if (header.version_no == RAW_VERSION1) {
542 #define RAW_VERSION3 0x00001000
545 // The header chunk has the tag 0x00001000 which also serves as a magic word
546 // that identifies the file as a version 3 trace file. The header payload is
547 // a set of fixed fields followed by a variable number of sub-chunks:
549 * ____________________________________________________________________________
550 | Offset | Size | Field |
551 | ----------------------------------------------------------------------------
552 | 0 | 4 | Tag (0x00001000) |
553 | 4 | 4 | Sub-tag. Represents the version of the header. |
554 | 8 | 8 | Length of header payload (40+8x) |
555 | 16 | 8 | Time base info. Two 32-bit numbers, numer/denom, |
556 | | | for converting timestamps to nanoseconds. |
557 | 24 | 8 | Timestamp of trace start. |
558 | 32 | 8 | Wall time seconds since Unix epoch. |
559 | | | As returned by gettimeofday(). |
560 | 40 | 4 | Wall time microseconds. As returned by gettimeofday(). |
561 | 44 | 4 | Local time zone offset in minutes. ( " ) |
562 | 48 | 4 | Type of daylight savings time correction to apply. ( " ) |
563 | 52 | 4 | Flags. 1 = 64-bit. Remaining bits should be written |
564 | | | as 0 and ignored when reading. |
565 | 56 | 8x | Variable number of sub-chunks. None are required. |
566 | | | Ignore unknown chunks. |
567 | ----------------------------------------------------------------------------
569 // NOTE: The header sub-chunks are considered part of the header chunk,
570 // so they must be included in the header chunk’s length field.
571 // The CPU map is an optional sub-chunk of the header chunk. It provides
572 // information about the CPUs that are referenced from the trace events.
577 uint32_t timebase_numer
;
578 uint32_t timebase_denom
;
580 uint64_t walltime_secs
;
581 uint32_t walltime_usecs
;
582 uint32_t timezone_minuteswest
;
583 uint32_t timezone_dst
;
585 } __attribute__((packed
)) kd_header_v3
;
591 } __attribute__((packed
)) kd_chunk_header_v3
;
593 #define V3_CONFIG 0x00001b00
594 #define V3_CPU_MAP 0x00001c00
595 #define V3_THREAD_MAP 0x00001d00
596 #define V3_RAW_EVENTS 0x00001e00
597 #define V3_NULL_CHUNK 0x00002000
599 // The current version of all kernel managed chunks is 1. The
600 // V3_CURRENT_CHUNK_VERSION is added to ease the simple case
601 // when most/all the kernel managed chunks have the same version.
603 #define V3_CURRENT_CHUNK_VERSION 1
604 #define V3_HEADER_VERSION V3_CURRENT_CHUNK_VERSION
605 #define V3_CPUMAP_VERSION V3_CURRENT_CHUNK_VERSION
606 #define V3_THRMAP_VERSION V3_CURRENT_CHUNK_VERSION
607 #define V3_EVENT_DATA_VERSION V3_CURRENT_CHUNK_VERSION
609 typedef struct krt krt_t
;
612 kdbg_cpu_count(bool early_trace
)
615 #if defined(__x86_64__)
617 #else /* defined(__x86_64__) */
618 return ml_get_cpu_count();
619 #endif /* !defined(__x86_64__) */
622 #if defined(__x86_64__)
623 host_basic_info_data_t hinfo
;
624 mach_msg_type_number_t count
= HOST_BASIC_INFO_COUNT
;
625 host_info((host_t
)1 /* BSD_HOST */, HOST_BASIC_INFO
, (host_info_t
)&hinfo
, &count
);
626 assert(hinfo
.logical_cpu_max
> 0);
627 return hinfo
.logical_cpu_max
;
628 #else /* defined(__x86_64__) */
629 return ml_get_topology_info()->max_cpu_id
+ 1;
630 #endif /* !defined(__x86_64__) */
636 kdbg_iop_list_is_valid(kd_iop_t
* iop
)
639 /* Is list sorted by cpu_id? */
640 kd_iop_t
* temp
= iop
;
642 assert(!temp
->next
|| temp
->next
->cpu_id
== temp
->cpu_id
- 1);
643 assert(temp
->next
|| (temp
->cpu_id
== kdbg_cpu_count(false) || temp
->cpu_id
== kdbg_cpu_count(true)));
644 } while ((temp
= temp
->next
));
646 /* Does each entry have a function and a name? */
649 assert(temp
->callback
.func
);
650 assert(strlen(temp
->callback
.iop_name
) < sizeof(temp
->callback
.iop_name
));
651 } while ((temp
= temp
->next
));
657 #endif /* MACH_ASSERT */
660 kdbg_iop_list_callback(kd_iop_t
* iop
, kd_callback_type type
, void* arg
)
663 iop
->callback
.func(iop
->callback
.context
, type
, arg
);
668 static lck_grp_t
*kdebug_lck_grp
= NULL
;
671 kdbg_set_tracing_enabled(bool enabled
, uint32_t trace_type
)
674 * Drain any events from IOPs before making the state change. On
675 * enabling, this removes any stale events from before tracing. On
676 * disabling, this saves any events up to the point tracing is disabled.
678 kdbg_iop_list_callback(kd_ctrl_page
.kdebug_iops
, KD_CALLBACK_SYNC_FLUSH
,
681 int s
= ml_set_interrupts_enabled(false);
682 lck_spin_lock_grp(kds_spin_lock
, kdebug_lck_grp
);
686 * The oldest valid time is now; reject past events from IOPs.
688 kd_ctrl_page
.oldest_time
= kdbg_timestamp();
689 kdebug_enable
|= trace_type
;
690 kd_ctrl_page
.kdebug_slowcheck
&= ~SLOW_NOLOG
;
691 kd_ctrl_page
.enabled
= 1;
692 commpage_update_kdebug_state();
694 kdebug_enable
&= ~(KDEBUG_ENABLE_TRACE
| KDEBUG_ENABLE_PPT
);
695 kd_ctrl_page
.kdebug_slowcheck
|= SLOW_NOLOG
;
696 kd_ctrl_page
.enabled
= 0;
697 commpage_update_kdebug_state();
699 lck_spin_unlock(kds_spin_lock
);
700 ml_set_interrupts_enabled(s
);
703 kdbg_iop_list_callback(kd_ctrl_page
.kdebug_iops
,
704 KD_CALLBACK_KDEBUG_ENABLED
, NULL
);
706 kdbg_iop_list_callback(kd_ctrl_page
.kdebug_iops
,
707 KD_CALLBACK_KDEBUG_DISABLED
, NULL
);
712 kdbg_set_flags(int slowflag
, int enableflag
, bool enabled
)
714 int s
= ml_set_interrupts_enabled(false);
715 lck_spin_lock_grp(kds_spin_lock
, kdebug_lck_grp
);
718 kd_ctrl_page
.kdebug_slowcheck
|= slowflag
;
719 kdebug_enable
|= enableflag
;
721 kd_ctrl_page
.kdebug_slowcheck
&= ~slowflag
;
722 kdebug_enable
&= ~enableflag
;
725 lck_spin_unlock(kds_spin_lock
);
726 ml_set_interrupts_enabled(s
);
730 * Disable wrapping and return true if trace wrapped, false otherwise.
733 disable_wrap(uint32_t *old_slowcheck
, uint32_t *old_flags
)
736 int s
= ml_set_interrupts_enabled(false);
737 lck_spin_lock_grp(kds_spin_lock
, kdebug_lck_grp
);
739 *old_slowcheck
= kd_ctrl_page
.kdebug_slowcheck
;
740 *old_flags
= kd_ctrl_page
.kdebug_flags
;
742 wrapped
= kd_ctrl_page
.kdebug_flags
& KDBG_WRAPPED
;
743 kd_ctrl_page
.kdebug_flags
&= ~KDBG_WRAPPED
;
744 kd_ctrl_page
.kdebug_flags
|= KDBG_NOWRAP
;
746 lck_spin_unlock(kds_spin_lock
);
747 ml_set_interrupts_enabled(s
);
753 enable_wrap(uint32_t old_slowcheck
)
755 int s
= ml_set_interrupts_enabled(false);
756 lck_spin_lock_grp(kds_spin_lock
, kdebug_lck_grp
);
758 kd_ctrl_page
.kdebug_flags
&= ~KDBG_NOWRAP
;
760 if (!(old_slowcheck
& SLOW_NOLOG
)) {
761 kd_ctrl_page
.kdebug_slowcheck
&= ~SLOW_NOLOG
;
764 lck_spin_unlock(kds_spin_lock
);
765 ml_set_interrupts_enabled(s
);
769 create_buffers(bool early_trace
)
772 unsigned int p_buffer_size
;
773 unsigned int f_buffer_size
;
774 unsigned int f_buffers
;
778 * For the duration of this allocation, trace code will only reference
779 * kdebug_iops. Any iops registered after this enabling will not be
780 * messaged until the buffers are reallocated.
782 * TLDR; Must read kd_iops once and only once!
784 kd_ctrl_page
.kdebug_iops
= kd_iops
;
786 assert(kdbg_iop_list_is_valid(kd_ctrl_page
.kdebug_iops
));
789 * If the list is valid, it is sorted, newest -> oldest. Each iop entry
790 * has a cpu_id of "the older entry + 1", so the highest cpu_id will
791 * be the list head + 1.
794 kd_ctrl_page
.kdebug_cpus
= kd_ctrl_page
.kdebug_iops
? kd_ctrl_page
.kdebug_iops
->cpu_id
+ 1 : kdbg_cpu_count(early_trace
);
796 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&kdbip
, sizeof(struct kd_bufinfo
) * kd_ctrl_page
.kdebug_cpus
, VM_KERN_MEMORY_DIAG
) != KERN_SUCCESS
) {
801 if (nkdbufs
< (kd_ctrl_page
.kdebug_cpus
* EVENTS_PER_STORAGE_UNIT
* MIN_STORAGE_UNITS_PER_CPU
)) {
802 n_storage_units
= kd_ctrl_page
.kdebug_cpus
* MIN_STORAGE_UNITS_PER_CPU
;
804 n_storage_units
= nkdbufs
/ EVENTS_PER_STORAGE_UNIT
;
807 nkdbufs
= n_storage_units
* EVENTS_PER_STORAGE_UNIT
;
809 f_buffers
= n_storage_units
/ N_STORAGE_UNITS_PER_BUFFER
;
810 n_storage_buffers
= f_buffers
;
812 f_buffer_size
= N_STORAGE_UNITS_PER_BUFFER
* sizeof(struct kd_storage
);
813 p_buffer_size
= (n_storage_units
% N_STORAGE_UNITS_PER_BUFFER
) * sizeof(struct kd_storage
);
821 if (kdcopybuf
== 0) {
822 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&kdcopybuf
, (vm_size_t
)KDCOPYBUF_SIZE
, VM_KERN_MEMORY_DIAG
) != KERN_SUCCESS
) {
827 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&kd_bufs
, (vm_size_t
)(n_storage_buffers
* sizeof(struct kd_storage_buffers
)), VM_KERN_MEMORY_DIAG
) != KERN_SUCCESS
) {
831 bzero(kd_bufs
, n_storage_buffers
* sizeof(struct kd_storage_buffers
));
833 for (i
= 0; i
< f_buffers
; i
++) {
834 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&kd_bufs
[i
].kdsb_addr
, (vm_size_t
)f_buffer_size
, VM_KERN_MEMORY_DIAG
) != KERN_SUCCESS
) {
838 bzero(kd_bufs
[i
].kdsb_addr
, f_buffer_size
);
840 kd_bufs
[i
].kdsb_size
= f_buffer_size
;
843 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&kd_bufs
[i
].kdsb_addr
, (vm_size_t
)p_buffer_size
, VM_KERN_MEMORY_DIAG
) != KERN_SUCCESS
) {
847 bzero(kd_bufs
[i
].kdsb_addr
, p_buffer_size
);
849 kd_bufs
[i
].kdsb_size
= p_buffer_size
;
853 for (i
= 0; i
< n_storage_buffers
; i
++) {
854 struct kd_storage
*kds
;
856 static_assert(N_STORAGE_UNITS_PER_BUFFER
<= UINT16_MAX
);
857 assert(kd_bufs
[i
].kdsb_size
<= N_STORAGE_UNITS_PER_BUFFER
*
858 sizeof(struct kd_storage
));
860 n_elements
= kd_bufs
[i
].kdsb_size
/ sizeof(struct kd_storage
);
861 kds
= kd_bufs
[i
].kdsb_addr
;
863 for (uint16_t n
= 0; n
< n_elements
; n
++) {
864 kds
[n
].kds_next
.buffer_index
= kd_ctrl_page
.kds_free_list
.buffer_index
;
865 kds
[n
].kds_next
.offset
= kd_ctrl_page
.kds_free_list
.offset
;
867 kd_ctrl_page
.kds_free_list
.buffer_index
= i
;
868 kd_ctrl_page
.kds_free_list
.offset
= n
;
870 n_storage_units
+= n_elements
;
873 bzero((char *)kdbip
, sizeof(struct kd_bufinfo
) * kd_ctrl_page
.kdebug_cpus
);
875 for (i
= 0; i
< kd_ctrl_page
.kdebug_cpus
; i
++) {
876 kdbip
[i
].kd_list_head
.raw
= KDS_PTR_NULL
;
877 kdbip
[i
].kd_list_tail
.raw
= KDS_PTR_NULL
;
878 kdbip
[i
].kd_lostevents
= false;
879 kdbip
[i
].num_bufs
= 0;
882 kd_ctrl_page
.kdebug_flags
|= KDBG_BUFINIT
;
884 kd_ctrl_page
.kds_inuse_count
= 0;
885 n_storage_threshold
= n_storage_units
/ 2;
900 for (i
= 0; i
< n_storage_buffers
; i
++) {
901 if (kd_bufs
[i
].kdsb_addr
) {
902 kmem_free(kernel_map
, (vm_offset_t
)kd_bufs
[i
].kdsb_addr
, (vm_size_t
)kd_bufs
[i
].kdsb_size
);
905 kmem_free(kernel_map
, (vm_offset_t
)kd_bufs
, (vm_size_t
)(n_storage_buffers
* sizeof(struct kd_storage_buffers
)));
908 n_storage_buffers
= 0;
911 kmem_free(kernel_map
, (vm_offset_t
)kdcopybuf
, KDCOPYBUF_SIZE
);
915 kd_ctrl_page
.kds_free_list
.raw
= KDS_PTR_NULL
;
918 kmem_free(kernel_map
, (vm_offset_t
)kdbip
, sizeof(struct kd_bufinfo
) * kd_ctrl_page
.kdebug_cpus
);
922 kd_ctrl_page
.kdebug_iops
= NULL
;
923 kd_ctrl_page
.kdebug_cpus
= 0;
924 kd_ctrl_page
.kdebug_flags
&= ~KDBG_BUFINIT
;
928 release_storage_unit(int cpu
, uint32_t kdsp_raw
)
931 struct kd_storage
*kdsp_actual
;
932 struct kd_bufinfo
*kdbp
;
937 s
= ml_set_interrupts_enabled(false);
938 lck_spin_lock_grp(kds_spin_lock
, kdebug_lck_grp
);
942 if (kdsp
.raw
== kdbp
->kd_list_head
.raw
) {
944 * it's possible for the storage unit pointed to
945 * by kdsp to have already been stolen... so
946 * check to see if it's still the head of the list
947 * now that we're behind the lock that protects
948 * adding and removing from the queue...
949 * since we only ever release and steal units from
950 * that position, if it's no longer the head
951 * we having nothing to do in this context
953 kdsp_actual
= POINTER_FROM_KDS_PTR(kdsp
);
954 kdbp
->kd_list_head
= kdsp_actual
->kds_next
;
956 kdsp_actual
->kds_next
= kd_ctrl_page
.kds_free_list
;
957 kd_ctrl_page
.kds_free_list
= kdsp
;
959 kd_ctrl_page
.kds_inuse_count
--;
961 lck_spin_unlock(kds_spin_lock
);
962 ml_set_interrupts_enabled(s
);
966 allocate_storage_unit(int cpu
)
969 struct kd_storage
*kdsp_actual
, *kdsp_next_actual
;
970 struct kd_bufinfo
*kdbp
, *kdbp_vict
, *kdbp_try
;
971 uint64_t oldest_ts
, ts
;
975 s
= ml_set_interrupts_enabled(false);
976 lck_spin_lock_grp(kds_spin_lock
, kdebug_lck_grp
);
980 /* If someone beat us to the allocate, return success */
981 if (kdbp
->kd_list_tail
.raw
!= KDS_PTR_NULL
) {
982 kdsp_actual
= POINTER_FROM_KDS_PTR(kdbp
->kd_list_tail
);
984 if (kdsp_actual
->kds_bufindx
< EVENTS_PER_STORAGE_UNIT
) {
989 if ((kdsp
= kd_ctrl_page
.kds_free_list
).raw
!= KDS_PTR_NULL
) {
991 * If there's a free page, grab it from the free list.
993 kdsp_actual
= POINTER_FROM_KDS_PTR(kdsp
);
994 kd_ctrl_page
.kds_free_list
= kdsp_actual
->kds_next
;
996 kd_ctrl_page
.kds_inuse_count
++;
999 * Otherwise, we're going to lose events and repurpose the oldest
1000 * storage unit we can find.
1002 if (kd_ctrl_page
.kdebug_flags
& KDBG_NOWRAP
) {
1003 kd_ctrl_page
.kdebug_slowcheck
|= SLOW_NOLOG
;
1004 kdbp
->kd_lostevents
= true;
1009 oldest_ts
= UINT64_MAX
;
1011 for (kdbp_try
= &kdbip
[0]; kdbp_try
< &kdbip
[kd_ctrl_page
.kdebug_cpus
]; kdbp_try
++) {
1012 if (kdbp_try
->kd_list_head
.raw
== KDS_PTR_NULL
) {
1014 * no storage unit to steal
1019 kdsp_actual
= POINTER_FROM_KDS_PTR(kdbp_try
->kd_list_head
);
1021 if (kdsp_actual
->kds_bufcnt
< EVENTS_PER_STORAGE_UNIT
) {
1023 * make sure we don't steal the storage unit
1024 * being actively recorded to... need to
1025 * move on because we don't want an out-of-order
1026 * set of events showing up later
1032 * When wrapping, steal the storage unit with the
1033 * earliest timestamp on its last event, instead of the
1034 * earliest timestamp on the first event. This allows a
1035 * storage unit with more recent events to be preserved,
1036 * even if the storage unit contains events that are
1037 * older than those found in other CPUs.
1039 ts
= kdbg_get_timestamp(&kdsp_actual
->kds_records
[EVENTS_PER_STORAGE_UNIT
- 1]);
1040 if (ts
< oldest_ts
) {
1042 kdbp_vict
= kdbp_try
;
1045 if (kdbp_vict
== NULL
) {
1047 kd_ctrl_page
.enabled
= 0;
1048 commpage_update_kdebug_state();
1052 kdsp
= kdbp_vict
->kd_list_head
;
1053 kdsp_actual
= POINTER_FROM_KDS_PTR(kdsp
);
1054 kdbp_vict
->kd_list_head
= kdsp_actual
->kds_next
;
1056 if (kdbp_vict
->kd_list_head
.raw
!= KDS_PTR_NULL
) {
1057 kdsp_next_actual
= POINTER_FROM_KDS_PTR(kdbp_vict
->kd_list_head
);
1058 kdsp_next_actual
->kds_lostevents
= true;
1060 kdbp_vict
->kd_lostevents
= true;
1063 if (kd_ctrl_page
.oldest_time
< oldest_ts
) {
1064 kd_ctrl_page
.oldest_time
= oldest_ts
;
1066 kd_ctrl_page
.kdebug_flags
|= KDBG_WRAPPED
;
1068 kdsp_actual
->kds_timestamp
= kdbg_timestamp();
1069 kdsp_actual
->kds_next
.raw
= KDS_PTR_NULL
;
1070 kdsp_actual
->kds_bufcnt
= 0;
1071 kdsp_actual
->kds_readlast
= 0;
1073 kdsp_actual
->kds_lostevents
= kdbp
->kd_lostevents
;
1074 kdbp
->kd_lostevents
= false;
1075 kdsp_actual
->kds_bufindx
= 0;
1077 if (kdbp
->kd_list_head
.raw
== KDS_PTR_NULL
) {
1078 kdbp
->kd_list_head
= kdsp
;
1080 POINTER_FROM_KDS_PTR(kdbp
->kd_list_tail
)->kds_next
= kdsp
;
1082 kdbp
->kd_list_tail
= kdsp
;
1084 lck_spin_unlock(kds_spin_lock
);
1085 ml_set_interrupts_enabled(s
);
1091 kernel_debug_register_callback(kd_callback_t callback
)
1094 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&iop
, sizeof(kd_iop_t
), VM_KERN_MEMORY_DIAG
) == KERN_SUCCESS
) {
1095 memcpy(&iop
->callback
, &callback
, sizeof(kd_callback_t
));
1098 * <rdar://problem/13351477> Some IOP clients are not providing a name.
1100 * Remove when fixed.
1103 bool is_valid_name
= false;
1104 for (uint32_t length
= 0; length
< sizeof(callback
.iop_name
); ++length
) {
1105 /* This is roughly isprintable(c) */
1106 if (callback
.iop_name
[length
] > 0x20 && callback
.iop_name
[length
] < 0x7F) {
1109 if (callback
.iop_name
[length
] == 0) {
1111 is_valid_name
= true;
1117 if (!is_valid_name
) {
1118 strlcpy(iop
->callback
.iop_name
, "IOP-???", sizeof(iop
->callback
.iop_name
));
1122 iop
->last_timestamp
= 0;
1126 * We use two pieces of state, the old list head
1127 * pointer, and the value of old_list_head->cpu_id.
1128 * If we read kd_iops more than once, it can change
1131 * TLDR; Must not read kd_iops more than once per loop.
1133 iop
->next
= kd_iops
;
1134 iop
->cpu_id
= iop
->next
? (iop
->next
->cpu_id
+ 1) : kdbg_cpu_count(false);
1137 * Header says OSCompareAndSwapPtr has a memory barrier
1139 } while (!OSCompareAndSwapPtr(iop
->next
, iop
, (void* volatile*)&kd_iops
));
1161 struct kd_bufinfo
*kdbp
;
1162 struct kd_storage
*kdsp_actual
;
1163 union kds_ptr kds_raw
;
1165 if (kd_ctrl_page
.kdebug_slowcheck
) {
1166 if ((kd_ctrl_page
.kdebug_slowcheck
& SLOW_NOLOG
) || !(kdebug_enable
& (KDEBUG_ENABLE_TRACE
| KDEBUG_ENABLE_PPT
))) {
1170 if (kd_ctrl_page
.kdebug_flags
& KDBG_TYPEFILTER_CHECK
) {
1171 if (typefilter_is_debugid_allowed(kdbg_typefilter
, debugid
)) {
1175 } else if (kd_ctrl_page
.kdebug_flags
& KDBG_RANGECHECK
) {
1176 if (debugid
>= kdlog_beg
&& debugid
<= kdlog_end
) {
1180 } else if (kd_ctrl_page
.kdebug_flags
& KDBG_VALCHECK
) {
1181 if ((debugid
& KDBG_EVENTID_MASK
) != kdlog_value1
&&
1182 (debugid
& KDBG_EVENTID_MASK
) != kdlog_value2
&&
1183 (debugid
& KDBG_EVENTID_MASK
) != kdlog_value3
&&
1184 (debugid
& KDBG_EVENTID_MASK
) != kdlog_value4
) {
1191 if (timestamp
< kd_ctrl_page
.oldest_time
) {
1195 disable_preemption();
1197 if (kd_ctrl_page
.enabled
== 0) {
1201 kdbp
= &kdbip
[coreid
];
1202 timestamp
&= KDBG_TIMESTAMP_MASK
;
1205 kds_raw
= kdbp
->kd_list_tail
;
1207 if (kds_raw
.raw
!= KDS_PTR_NULL
) {
1208 kdsp_actual
= POINTER_FROM_KDS_PTR(kds_raw
);
1209 bindx
= kdsp_actual
->kds_bufindx
;
1212 bindx
= EVENTS_PER_STORAGE_UNIT
;
1215 if (kdsp_actual
== NULL
|| bindx
>= EVENTS_PER_STORAGE_UNIT
) {
1216 if (allocate_storage_unit(coreid
) == false) {
1218 * this can only happen if wrapping
1225 if (!OSCompareAndSwap(bindx
, bindx
+ 1, &kdsp_actual
->kds_bufindx
)) {
1229 // IOP entries can be allocated before xnu allocates and inits the buffer
1230 if (timestamp
< kdsp_actual
->kds_timestamp
) {
1231 kdsp_actual
->kds_timestamp
= timestamp
;
1234 kd
= &kdsp_actual
->kds_records
[bindx
];
1236 kd
->debugid
= debugid
;
1241 kd
->arg5
= threadid
;
1243 kdbg_set_timestamp_and_cpu(kd
, timestamp
, coreid
);
1245 OSAddAtomic(1, &kdsp_actual
->kds_bufcnt
);
1247 enable_preemption();
1249 if ((kds_waiter
&& kd_ctrl_page
.kds_inuse_count
>= n_storage_threshold
)) {
1255 * Check if the given debug ID is allowed to be traced on the current process.
1257 * Returns true if allowed and false otherwise.
1260 kdebug_debugid_procfilt_allowed(uint32_t debugid
)
1262 uint32_t procfilt_flags
= kd_ctrl_page
.kdebug_flags
&
1263 (KDBG_PIDCHECK
| KDBG_PIDEXCLUDE
);
1265 if (!procfilt_flags
) {
1270 * DBG_TRACE and MACH_SCHED tracepoints ignore the process filter.
1272 if ((debugid
& 0xffff0000) == MACHDBG_CODE(DBG_MACH_SCHED
, 0) ||
1273 (debugid
>> 24 == DBG_TRACE
)) {
1277 struct proc
*curproc
= current_proc();
1279 * If the process is missing (early in boot), allow it.
1285 if (procfilt_flags
& KDBG_PIDCHECK
) {
1287 * Allow only processes marked with the kdebug bit.
1289 return curproc
->p_kdebug
;
1290 } else if (procfilt_flags
& KDBG_PIDEXCLUDE
) {
1292 * Exclude any process marked with the kdebug bit.
1294 return !curproc
->p_kdebug
;
1296 panic("kdebug: invalid procfilt flags %x", kd_ctrl_page
.kdebug_flags
);
1297 __builtin_unreachable();
1302 kernel_debug_internal(
1315 struct kd_bufinfo
*kdbp
;
1316 struct kd_storage
*kdsp_actual
;
1317 union kds_ptr kds_raw
;
1318 bool only_filter
= flags
& KDBG_FLAG_FILTERED
;
1319 bool observe_procfilt
= !(flags
& KDBG_FLAG_NOPROCFILT
);
1321 if (kd_ctrl_page
.kdebug_slowcheck
) {
1322 if ((kd_ctrl_page
.kdebug_slowcheck
& SLOW_NOLOG
) ||
1323 !(kdebug_enable
& (KDEBUG_ENABLE_TRACE
| KDEBUG_ENABLE_PPT
))) {
1327 if (!ml_at_interrupt_context() && observe_procfilt
&&
1328 !kdebug_debugid_procfilt_allowed(debugid
)) {
1332 if (kd_ctrl_page
.kdebug_flags
& KDBG_TYPEFILTER_CHECK
) {
1333 if (typefilter_is_debugid_allowed(kdbg_typefilter
, debugid
)) {
1338 } else if (only_filter
) {
1340 } else if (kd_ctrl_page
.kdebug_flags
& KDBG_RANGECHECK
) {
1341 /* Always record trace system info */
1342 if (KDBG_EXTRACT_CLASS(debugid
) == DBG_TRACE
) {
1346 if (debugid
< kdlog_beg
|| debugid
> kdlog_end
) {
1349 } else if (kd_ctrl_page
.kdebug_flags
& KDBG_VALCHECK
) {
1350 /* Always record trace system info */
1351 if (KDBG_EXTRACT_CLASS(debugid
) == DBG_TRACE
) {
1355 if ((debugid
& KDBG_EVENTID_MASK
) != kdlog_value1
&&
1356 (debugid
& KDBG_EVENTID_MASK
) != kdlog_value2
&&
1357 (debugid
& KDBG_EVENTID_MASK
) != kdlog_value3
&&
1358 (debugid
& KDBG_EVENTID_MASK
) != kdlog_value4
) {
1362 } else if (only_filter
) {
1367 disable_preemption();
1369 if (kd_ctrl_page
.enabled
== 0) {
1377 kds_raw
= kdbp
->kd_list_tail
;
1379 if (kds_raw
.raw
!= KDS_PTR_NULL
) {
1380 kdsp_actual
= POINTER_FROM_KDS_PTR(kds_raw
);
1381 bindx
= kdsp_actual
->kds_bufindx
;
1384 bindx
= EVENTS_PER_STORAGE_UNIT
;
1387 if (kdsp_actual
== NULL
|| bindx
>= EVENTS_PER_STORAGE_UNIT
) {
1388 if (allocate_storage_unit(cpu
) == false) {
1390 * this can only happen if wrapping
1398 now
= kdbg_timestamp() & KDBG_TIMESTAMP_MASK
;
1400 if (!OSCompareAndSwap(bindx
, bindx
+ 1, &kdsp_actual
->kds_bufindx
)) {
1404 kd
= &kdsp_actual
->kds_records
[bindx
];
1406 kd
->debugid
= debugid
;
1413 kdbg_set_timestamp_and_cpu(kd
, now
, cpu
);
1415 OSAddAtomic(1, &kdsp_actual
->kds_bufcnt
);
1418 kperf_kdebug_callback(debugid
, __builtin_frame_address(0));
1421 enable_preemption();
1423 if (kds_waiter
&& kd_ctrl_page
.kds_inuse_count
>= n_storage_threshold
) {
1427 etype
= debugid
& KDBG_EVENTID_MASK
;
1428 stype
= debugid
& KDBG_CSC_MASK
;
1430 if (etype
== INTERRUPT
|| etype
== MACH_vmfault
||
1431 stype
== BSC_SysCall
|| stype
== MACH_SysCall
) {
1437 __attribute__((noinline
))
1445 __unused
uintptr_t arg5
)
1447 kernel_debug_internal(debugid
, arg1
, arg2
, arg3
, arg4
,
1448 (uintptr_t)thread_tid(current_thread()), 0);
1451 __attribute__((noinline
))
1461 kernel_debug_internal(debugid
, arg1
, arg2
, arg3
, arg4
, arg5
, 0);
1464 __attribute__((noinline
))
1474 kernel_debug_internal(debugid
, arg1
, arg2
, arg3
, arg4
,
1475 (uintptr_t)thread_tid(current_thread()), flags
);
1478 __attribute__((noinline
))
1480 kernel_debug_filtered(
1487 kernel_debug_flags(debugid
, arg1
, arg2
, arg3
, arg4
, KDBG_FLAG_FILTERED
);
1491 kernel_debug_string_early(const char *message
)
1493 uintptr_t arg
[4] = {0, 0, 0, 0};
1495 /* Stuff the message string in the args and log it. */
1496 strncpy((char *)arg
, message
, MIN(sizeof(arg
), strlen(message
)));
1499 arg
[0], arg
[1], arg
[2], arg
[3]);
1502 #define SIMPLE_STR_LEN (64)
1503 static_assert(SIMPLE_STR_LEN
% sizeof(uintptr_t) == 0);
1506 kernel_debug_string_simple(uint32_t eventid
, const char *str
)
1508 if (!kdebug_enable
) {
1512 /* array of uintptr_ts simplifies emitting the string as arguments */
1513 uintptr_t str_buf
[(SIMPLE_STR_LEN
/ sizeof(uintptr_t)) + 1] = { 0 };
1514 size_t len
= strlcpy((char *)str_buf
, str
, SIMPLE_STR_LEN
+ 1);
1516 uintptr_t thread_id
= (uintptr_t)thread_tid(current_thread());
1517 uint32_t debugid
= eventid
| DBG_FUNC_START
;
1519 /* string can fit in a single tracepoint */
1520 if (len
<= (4 * sizeof(uintptr_t))) {
1521 debugid
|= DBG_FUNC_END
;
1524 kernel_debug_internal(debugid
, str_buf
[0],
1527 str_buf
[3], thread_id
, 0);
1529 debugid
&= KDBG_EVENTID_MASK
;
1531 size_t written
= 4 * sizeof(uintptr_t);
1533 for (; written
< len
; i
+= 4, written
+= 4 * sizeof(uintptr_t)) {
1534 /* if this is the last tracepoint to be emitted */
1535 if ((written
+ (4 * sizeof(uintptr_t))) >= len
) {
1536 debugid
|= DBG_FUNC_END
;
1538 kernel_debug_internal(debugid
, str_buf
[i
],
1541 str_buf
[i
+ 3], thread_id
, 0);
1545 extern int master_cpu
; /* MACH_KERNEL_PRIVATE */
1547 * Used prior to start_kern_tracing() being called.
1548 * Log temporarily into a static buffer.
1558 #if defined(__x86_64__)
1559 extern int early_boot
;
1561 * Note that "early" isn't early enough in some cases where
1562 * we're invoked before gsbase is set on x86, hence the
1563 * check of "early_boot".
1570 /* If early tracing is over, use the normal path. */
1571 if (kd_early_done
) {
1572 KDBG_RELEASE(debugid
, arg1
, arg2
, arg3
, arg4
);
1576 /* Do nothing if the buffer is full or we're not on the boot cpu. */
1577 kd_early_overflow
= kd_early_index
>= KD_EARLY_BUFFER_NBUFS
;
1578 if (kd_early_overflow
|| cpu_number() != master_cpu
) {
1582 kd_early_buffer
[kd_early_index
].debugid
= debugid
;
1583 kd_early_buffer
[kd_early_index
].timestamp
= mach_absolute_time();
1584 kd_early_buffer
[kd_early_index
].arg1
= arg1
;
1585 kd_early_buffer
[kd_early_index
].arg2
= arg2
;
1586 kd_early_buffer
[kd_early_index
].arg3
= arg3
;
1587 kd_early_buffer
[kd_early_index
].arg4
= arg4
;
1588 kd_early_buffer
[kd_early_index
].arg5
= 0;
1593 * Transfer the contents of the temporary buffer into the trace buffers.
1594 * Precede that by logging the rebase time (offset) - the TSC-based time (in ns)
1595 * when mach_absolute_time is set to 0.
1598 kernel_debug_early_end(void)
1600 if (cpu_number() != master_cpu
) {
1601 panic("kernel_debug_early_end() not call on boot processor");
1604 /* reset the current oldest time to allow early events */
1605 kd_ctrl_page
.oldest_time
= 0;
1607 #if defined(__x86_64__)
1608 /* Fake sentinel marking the start of kernel time relative to TSC */
1609 kernel_debug_enter(0, TRACE_TIMESTAMPS
, 0,
1610 (uint32_t)(tsc_rebase_abs_time
>> 32), (uint32_t)tsc_rebase_abs_time
,
1612 #endif /* defined(__x86_64__) */
1613 for (unsigned int i
= 0; i
< kd_early_index
; i
++) {
1614 kernel_debug_enter(0,
1615 kd_early_buffer
[i
].debugid
,
1616 kd_early_buffer
[i
].timestamp
,
1617 kd_early_buffer
[i
].arg1
,
1618 kd_early_buffer
[i
].arg2
,
1619 kd_early_buffer
[i
].arg3
,
1620 kd_early_buffer
[i
].arg4
,
1624 /* Cut events-lost event on overflow */
1625 if (kd_early_overflow
) {
1626 KDBG_RELEASE(TRACE_LOST_EVENTS
, 1);
1629 kd_early_done
= true;
1631 /* This trace marks the start of kernel tracing */
1632 kernel_debug_string_early("early trace done");
1636 kernel_debug_disable(void)
1638 if (kdebug_enable
) {
1639 kdbg_set_tracing_enabled(false, 0);
1644 * Returns non-zero if debugid is in a reserved class.
1647 kdebug_validate_debugid(uint32_t debugid
)
1649 uint8_t debugid_class
;
1651 debugid_class
= KDBG_EXTRACT_CLASS(debugid
);
1652 switch (debugid_class
) {
1661 * Support syscall SYS_kdebug_typefilter.
1664 kdebug_typefilter(__unused
struct proc
* p
,
1665 struct kdebug_typefilter_args
* uap
,
1666 __unused
int *retval
)
1668 int ret
= KERN_SUCCESS
;
1670 if (uap
->addr
== USER_ADDR_NULL
||
1671 uap
->size
== USER_ADDR_NULL
) {
1676 * The atomic load is to close a race window with setting the typefilter
1677 * and memory entry values. A description follows:
1681 * Allocate Typefilter
1682 * Allocate MemoryEntry
1683 * Write Global MemoryEntry Ptr
1684 * Atomic Store (Release) Global Typefilter Ptr
1686 * Thread 2 (reader, AKA us)
1688 * if ((Atomic Load (Acquire) Global Typefilter Ptr) == NULL)
1691 * Without the atomic store, it isn't guaranteed that the write of
1692 * Global MemoryEntry Ptr is visible before we can see the write of
1693 * Global Typefilter Ptr.
1695 * Without the atomic load, it isn't guaranteed that the loads of
1696 * Global MemoryEntry Ptr aren't speculated.
1698 * The global pointers transition from NULL -> valid once and only once,
1699 * and never change after becoming valid. This means that having passed
1700 * the first atomic load test of Global Typefilter Ptr, this function
1701 * can then safely use the remaining global state without atomic checks.
1703 if (!os_atomic_load(&kdbg_typefilter
, acquire
)) {
1707 assert(kdbg_typefilter_memory_entry
);
1709 mach_vm_offset_t user_addr
= 0;
1710 vm_map_t user_map
= current_map();
1712 ret
= mach_to_bsd_errno(
1713 mach_vm_map_kernel(user_map
, // target map
1714 &user_addr
, // [in, out] target address
1715 TYPEFILTER_ALLOC_SIZE
, // initial size
1716 0, // mask (alignment?)
1717 VM_FLAGS_ANYWHERE
, // flags
1718 VM_MAP_KERNEL_FLAGS_NONE
,
1719 VM_KERN_MEMORY_NONE
,
1720 kdbg_typefilter_memory_entry
, // port (memory entry!)
1721 0, // offset (in memory entry)
1722 false, // should copy
1723 VM_PROT_READ
, // cur_prot
1724 VM_PROT_READ
, // max_prot
1725 VM_INHERIT_SHARE
)); // inherit behavior on fork
1727 if (ret
== KERN_SUCCESS
) {
1728 vm_size_t user_ptr_size
= vm_map_is_64bit(user_map
) ? 8 : 4;
1729 ret
= copyout(CAST_DOWN(void *, &user_addr
), uap
->addr
, user_ptr_size
);
1731 if (ret
!= KERN_SUCCESS
) {
1732 mach_vm_deallocate(user_map
, user_addr
, TYPEFILTER_ALLOC_SIZE
);
1740 * Support syscall SYS_kdebug_trace. U64->K32 args may get truncated in kdebug_trace64
1743 kdebug_trace(struct proc
*p
, struct kdebug_trace_args
*uap
, int32_t *retval
)
1745 struct kdebug_trace64_args uap64
;
1747 uap64
.code
= uap
->code
;
1748 uap64
.arg1
= uap
->arg1
;
1749 uap64
.arg2
= uap
->arg2
;
1750 uap64
.arg3
= uap
->arg3
;
1751 uap64
.arg4
= uap
->arg4
;
1753 return kdebug_trace64(p
, &uap64
, retval
);
1757 * Support syscall SYS_kdebug_trace64. 64-bit args on K32 will get truncated
1758 * to fit in 32-bit record format.
1760 * It is intentional that error conditions are not checked until kdebug is
1761 * enabled. This is to match the userspace wrapper behavior, which is optimizing
1762 * for non-error case performance.
1765 kdebug_trace64(__unused
struct proc
*p
, struct kdebug_trace64_args
*uap
, __unused
int32_t *retval
)
1769 if (__probable(kdebug_enable
== 0)) {
1773 if ((err
= kdebug_validate_debugid(uap
->code
)) != 0) {
1777 kernel_debug_internal(uap
->code
, (uintptr_t)uap
->arg1
,
1778 (uintptr_t)uap
->arg2
, (uintptr_t)uap
->arg3
, (uintptr_t)uap
->arg4
,
1779 (uintptr_t)thread_tid(current_thread()), 0);
1785 * Adding enough padding to contain a full tracepoint for the last
1786 * portion of the string greatly simplifies the logic of splitting the
1787 * string between tracepoints. Full tracepoints can be generated using
1788 * the buffer itself, without having to manually add zeros to pad the
1792 /* 2 string args in first tracepoint and 9 string data tracepoints */
1793 #define STR_BUF_ARGS (2 + (9 * 4))
1794 /* times the size of each arg on K64 */
1795 #define MAX_STR_LEN (STR_BUF_ARGS * sizeof(uint64_t))
1796 /* on K32, ending straddles a tracepoint, so reserve blanks */
1797 #define STR_BUF_SIZE (MAX_STR_LEN + (2 * sizeof(uint32_t)))
1800 * This function does no error checking and assumes that it is called with
1801 * the correct arguments, including that the buffer pointed to by str is at
1802 * least STR_BUF_SIZE bytes. However, str must be aligned to word-size and
1803 * be NUL-terminated. In cases where a string can fit evenly into a final
1804 * tracepoint without its NUL-terminator, this function will not end those
1805 * strings with a NUL in trace. It's up to clients to look at the function
1806 * qualifier for DBG_FUNC_END in this case, to end the string.
1809 kernel_debug_string_internal(uint32_t debugid
, uint64_t str_id
, void *vstr
,
1812 /* str must be word-aligned */
1813 uintptr_t *str
= vstr
;
1815 uintptr_t thread_id
;
1817 uint32_t trace_debugid
= TRACEDBG_CODE(DBG_TRACE_STRING
,
1818 TRACE_STRING_GLOBAL
);
1820 thread_id
= (uintptr_t)thread_tid(current_thread());
1822 /* if the ID is being invalidated, just emit that */
1823 if (str_id
!= 0 && str_len
== 0) {
1824 kernel_debug_internal(trace_debugid
| DBG_FUNC_START
| DBG_FUNC_END
,
1825 (uintptr_t)debugid
, (uintptr_t)str_id
, 0, 0, thread_id
, 0);
1829 /* generate an ID, if necessary */
1831 str_id
= OSIncrementAtomic64((SInt64
*)&g_curr_str_id
);
1832 str_id
= (str_id
& STR_ID_MASK
) | g_str_id_signature
;
1835 trace_debugid
|= DBG_FUNC_START
;
1836 /* string can fit in a single tracepoint */
1837 if (str_len
<= (2 * sizeof(uintptr_t))) {
1838 trace_debugid
|= DBG_FUNC_END
;
1841 kernel_debug_internal(trace_debugid
, (uintptr_t)debugid
, (uintptr_t)str_id
,
1842 str
[0], str
[1], thread_id
, 0);
1844 trace_debugid
&= KDBG_EVENTID_MASK
;
1846 written
+= 2 * sizeof(uintptr_t);
1848 for (; written
< str_len
; i
+= 4, written
+= 4 * sizeof(uintptr_t)) {
1849 if ((written
+ (4 * sizeof(uintptr_t))) >= str_len
) {
1850 trace_debugid
|= DBG_FUNC_END
;
1852 kernel_debug_internal(trace_debugid
, str
[i
],
1855 str
[i
+ 3], thread_id
, 0);
1862 * Returns true if the current process can emit events, and false otherwise.
1863 * Trace system and scheduling events circumvent this check, as do events
1864 * emitted in interrupt context.
1867 kdebug_current_proc_enabled(uint32_t debugid
)
1869 /* can't determine current process in interrupt context */
1870 if (ml_at_interrupt_context()) {
1874 /* always emit trace system and scheduling events */
1875 if ((KDBG_EXTRACT_CLASS(debugid
) == DBG_TRACE
||
1876 (debugid
& KDBG_CSC_MASK
) == MACHDBG_CODE(DBG_MACH_SCHED
, 0))) {
1880 if (kd_ctrl_page
.kdebug_flags
& KDBG_PIDCHECK
) {
1881 proc_t cur_proc
= current_proc();
1883 /* only the process with the kdebug bit set is allowed */
1884 if (cur_proc
&& !(cur_proc
->p_kdebug
)) {
1887 } else if (kd_ctrl_page
.kdebug_flags
& KDBG_PIDEXCLUDE
) {
1888 proc_t cur_proc
= current_proc();
1890 /* every process except the one with the kdebug bit set is allowed */
1891 if (cur_proc
&& cur_proc
->p_kdebug
) {
1900 kdebug_debugid_enabled(uint32_t debugid
)
1902 /* if no filtering is enabled */
1903 if (!kd_ctrl_page
.kdebug_slowcheck
) {
1907 return kdebug_debugid_explicitly_enabled(debugid
);
1911 kdebug_debugid_explicitly_enabled(uint32_t debugid
)
1913 if (kd_ctrl_page
.kdebug_flags
& KDBG_TYPEFILTER_CHECK
) {
1914 return typefilter_is_debugid_allowed(kdbg_typefilter
, debugid
);
1915 } else if (KDBG_EXTRACT_CLASS(debugid
) == DBG_TRACE
) {
1917 } else if (kd_ctrl_page
.kdebug_flags
& KDBG_RANGECHECK
) {
1918 if (debugid
< kdlog_beg
|| debugid
> kdlog_end
) {
1921 } else if (kd_ctrl_page
.kdebug_flags
& KDBG_VALCHECK
) {
1922 if ((debugid
& KDBG_EVENTID_MASK
) != kdlog_value1
&&
1923 (debugid
& KDBG_EVENTID_MASK
) != kdlog_value2
&&
1924 (debugid
& KDBG_EVENTID_MASK
) != kdlog_value3
&&
1925 (debugid
& KDBG_EVENTID_MASK
) != kdlog_value4
) {
1934 kdebug_using_continuous_time(void)
1936 return kdebug_enable
& KDEBUG_ENABLE_CONT_TIME
;
1940 * Returns 0 if a string can be traced with these arguments. Returns errno
1941 * value if error occurred.
1944 kdebug_check_trace_string(uint32_t debugid
, uint64_t str_id
)
1946 /* if there are function qualifiers on the debugid */
1947 if (debugid
& ~KDBG_EVENTID_MASK
) {
1951 if (kdebug_validate_debugid(debugid
)) {
1955 if (str_id
!= 0 && (str_id
& STR_ID_SIG_MASK
) != g_str_id_signature
) {
1963 * Implementation of KPI kernel_debug_string.
1966 kernel_debug_string(uint32_t debugid
, uint64_t *str_id
, const char *str
)
1968 /* arguments to tracepoints must be word-aligned */
1969 __attribute__((aligned(sizeof(uintptr_t)))) char str_buf
[STR_BUF_SIZE
];
1970 static_assert(sizeof(str_buf
) > MAX_STR_LEN
);
1971 vm_size_t len_copied
;
1976 if (__probable(kdebug_enable
== 0)) {
1980 if (!kdebug_current_proc_enabled(debugid
)) {
1984 if (!kdebug_debugid_enabled(debugid
)) {
1988 if ((err
= kdebug_check_trace_string(debugid
, *str_id
)) != 0) {
1997 *str_id
= kernel_debug_string_internal(debugid
, *str_id
, NULL
, 0);
2001 memset(str_buf
, 0, sizeof(str_buf
));
2002 len_copied
= strlcpy(str_buf
, str
, MAX_STR_LEN
+ 1);
2003 *str_id
= kernel_debug_string_internal(debugid
, *str_id
, str_buf
,
2009 * Support syscall kdebug_trace_string.
2012 kdebug_trace_string(__unused
struct proc
*p
,
2013 struct kdebug_trace_string_args
*uap
,
2016 __attribute__((aligned(sizeof(uintptr_t)))) char str_buf
[STR_BUF_SIZE
];
2017 static_assert(sizeof(str_buf
) > MAX_STR_LEN
);
2021 if (__probable(kdebug_enable
== 0)) {
2025 if (!kdebug_current_proc_enabled(uap
->debugid
)) {
2029 if (!kdebug_debugid_enabled(uap
->debugid
)) {
2033 if ((err
= kdebug_check_trace_string(uap
->debugid
, uap
->str_id
)) != 0) {
2037 if (uap
->str
== USER_ADDR_NULL
) {
2038 if (uap
->str_id
== 0) {
2042 *retval
= kernel_debug_string_internal(uap
->debugid
, uap
->str_id
,
2047 memset(str_buf
, 0, sizeof(str_buf
));
2048 err
= copyinstr(uap
->str
, str_buf
, MAX_STR_LEN
+ 1, &len_copied
);
2050 /* it's alright to truncate the string, so allow ENAMETOOLONG */
2051 if (err
== ENAMETOOLONG
) {
2052 str_buf
[MAX_STR_LEN
] = '\0';
2057 if (len_copied
<= 1) {
2061 /* convert back to a length */
2064 *retval
= kernel_debug_string_internal(uap
->debugid
, uap
->str_id
, str_buf
,
2070 kdbg_lock_init(void)
2072 static lck_grp_attr_t
*kdebug_lck_grp_attr
= NULL
;
2073 static lck_attr_t
*kdebug_lck_attr
= NULL
;
2075 if (kd_ctrl_page
.kdebug_flags
& KDBG_LOCKINIT
) {
2079 assert(kdebug_lck_grp_attr
== NULL
);
2080 kdebug_lck_grp_attr
= lck_grp_attr_alloc_init();
2081 kdebug_lck_grp
= lck_grp_alloc_init("kdebug", kdebug_lck_grp_attr
);
2082 kdebug_lck_attr
= lck_attr_alloc_init();
2084 kds_spin_lock
= lck_spin_alloc_init(kdebug_lck_grp
, kdebug_lck_attr
);
2085 kdw_spin_lock
= lck_spin_alloc_init(kdebug_lck_grp
, kdebug_lck_attr
);
2087 kd_ctrl_page
.kdebug_flags
|= KDBG_LOCKINIT
;
2091 kdbg_bootstrap(bool early_trace
)
2093 kd_ctrl_page
.kdebug_flags
&= ~KDBG_WRAPPED
;
2095 return create_buffers(early_trace
);
2099 kdbg_reinit(bool early_trace
)
2104 * Disable trace collecting
2105 * First make sure we're not in
2106 * the middle of cutting a trace
2108 kernel_debug_disable();
2111 * make sure the SLOW_NOLOG is seen
2112 * by everyone that might be trying
2119 kdbg_clear_thread_map();
2120 ret
= kdbg_bootstrap(early_trace
);
2122 RAW_file_offset
= 0;
2123 RAW_file_written
= 0;
2129 kdbg_trace_data(struct proc
*proc
, long *arg_pid
, long *arg_uniqueid
)
2135 *arg_pid
= proc
->p_pid
;
2136 /* Fit in a trace point */
2137 *arg_uniqueid
= (long)proc
->p_uniqueid
;
2138 if ((uint64_t) *arg_uniqueid
!= proc
->p_uniqueid
) {
2146 kdbg_trace_string(struct proc
*proc
, long *arg1
, long *arg2
, long *arg3
,
2157 const char *procname
= proc_best_name(proc
);
2158 size_t namelen
= strlen(procname
);
2160 long args
[4] = { 0 };
2162 if (namelen
> sizeof(args
)) {
2163 namelen
= sizeof(args
);
2166 strncpy((char *)args
, procname
, namelen
);
2176 * Writes a cpumap for the given iops_list/cpu_count to the provided buffer.
2178 * You may provide a buffer and size, or if you set the buffer to NULL, a
2179 * buffer of sufficient size will be allocated.
2181 * If you provide a buffer and it is too small, sets cpumap_size to the number
2182 * of bytes required and returns EINVAL.
2184 * On success, if you provided a buffer, cpumap_size is set to the number of
2185 * bytes written. If you did not provide a buffer, cpumap is set to the newly
2186 * allocated buffer and cpumap_size is set to the number of bytes allocated.
2188 * NOTE: It may seem redundant to pass both iops and a cpu_count.
2190 * We may be reporting data from "now", or from the "past".
2192 * The "past" data would be for kdbg_readcpumap().
2194 * If we do not pass both iops and cpu_count, and iops is NULL, this function
2195 * will need to read "now" state to get the number of cpus, which would be in
2196 * error if we were reporting "past" state.
2200 kdbg_cpumap_init_internal(kd_iop_t
* iops
, uint32_t cpu_count
, uint8_t** cpumap
, uint32_t* cpumap_size
)
2203 assert(cpumap_size
);
2205 assert(!iops
|| iops
->cpu_id
+ 1 == cpu_count
);
2207 uint32_t bytes_needed
= sizeof(kd_cpumap_header
) + cpu_count
* sizeof(kd_cpumap
);
2208 uint32_t bytes_available
= *cpumap_size
;
2209 *cpumap_size
= bytes_needed
;
2211 if (*cpumap
== NULL
) {
2212 if (kmem_alloc(kernel_map
, (vm_offset_t
*)cpumap
, (vm_size_t
)*cpumap_size
, VM_KERN_MEMORY_DIAG
) != KERN_SUCCESS
) {
2215 bzero(*cpumap
, *cpumap_size
);
2216 } else if (bytes_available
< bytes_needed
) {
2220 kd_cpumap_header
* header
= (kd_cpumap_header
*)(uintptr_t)*cpumap
;
2222 header
->version_no
= RAW_VERSION1
;
2223 header
->cpu_count
= cpu_count
;
2225 kd_cpumap
* cpus
= (kd_cpumap
*)&header
[1];
2227 int32_t index
= cpu_count
- 1;
2229 cpus
[index
].cpu_id
= iops
->cpu_id
;
2230 cpus
[index
].flags
= KDBG_CPUMAP_IS_IOP
;
2231 strlcpy(cpus
[index
].name
, iops
->callback
.iop_name
, sizeof(cpus
->name
));
2237 while (index
>= 0) {
2238 cpus
[index
].cpu_id
= index
;
2239 cpus
[index
].flags
= 0;
2240 strlcpy(cpus
[index
].name
, "AP", sizeof(cpus
->name
));
2245 return KERN_SUCCESS
;
2249 kdbg_thrmap_init(void)
2251 ktrace_assert_lock_held();
2253 if (kd_ctrl_page
.kdebug_flags
& KDBG_MAPINIT
) {
2257 kd_mapptr
= kdbg_thrmap_init_internal(0, &kd_mapsize
, &kd_mapcount
);
2260 kd_ctrl_page
.kdebug_flags
|= KDBG_MAPINIT
;
2265 kd_resolve_map(thread_t thread
, void *opaque
)
2267 struct kd_resolver
*resolve
= opaque
;
2269 if (resolve
->krs_count
< resolve
->krs_maxcount
) {
2270 kd_threadmap
*map
= &resolve
->krs_map
[resolve
->krs_count
];
2271 struct kd_task_name
*task_name
= resolve
->krs_task
;
2272 map
->thread
= (uintptr_t)thread_tid(thread
);
2274 (void)strlcpy(map
->command
, task_name
->ktn_name
, sizeof(map
->command
));
2276 * Kernel threads should still be marked with non-zero valid bit.
2278 pid_t pid
= resolve
->krs_task
->ktn_pid
;
2279 map
->valid
= pid
== 0 ? 1 : pid
;
2280 resolve
->krs_count
++;
2285 kd_resolve_tasks(struct kd_task_name
*task_names
, vm_size_t ntasks
)
2288 proc_t p
= PROC_NULL
;
2291 ALLPROC_FOREACH(p
) {
2296 * Only record processes that can be referenced and are not exiting.
2298 if (p
->task
&& (p
->p_lflag
& P_LEXIT
) == 0) {
2299 task_reference(p
->task
);
2300 task_names
[i
].ktn_task
= p
->task
;
2301 task_names
[i
].ktn_pid
= p
->p_pid
;
2302 (void)strlcpy(task_names
[i
].ktn_name
, proc_best_name(p
),
2303 sizeof(task_names
[i
].ktn_name
));
2313 kd_resolve_threads(kd_threadmap
*map
, struct kd_task_name
*task_names
,
2314 vm_size_t ntasks
, vm_size_t nthreads
)
2316 struct kd_resolver resolver
= {
2317 .krs_map
= map
, .krs_count
= 0, .krs_maxcount
= nthreads
,
2320 for (int i
= 0; i
< ntasks
; i
++) {
2321 struct kd_task_name
*cur_task
= &task_names
[i
];
2322 resolver
.krs_task
= cur_task
;
2323 task_act_iterate_wth_args(cur_task
->ktn_task
, kd_resolve_map
,
2325 task_deallocate(cur_task
->ktn_task
);
2328 return resolver
.krs_count
;
2331 static kd_threadmap
*
2332 kdbg_thrmap_init_internal(size_t maxthreads
, vm_size_t
*mapsize
,
2333 vm_size_t
*mapcount
)
2335 kd_threadmap
*thread_map
= NULL
;
2336 struct kd_task_name
*task_names
;
2337 vm_size_t names_size
= 0;
2339 assert(mapsize
!= NULL
);
2340 assert(mapcount
!= NULL
);
2342 vm_size_t nthreads
= threads_count
;
2343 vm_size_t ntasks
= tasks_count
;
2346 * Allow 25% more threads and tasks to be created between now and taking the
2349 if (os_add_overflow(nthreads
, nthreads
/ 4, &nthreads
) ||
2350 os_add_overflow(ntasks
, ntasks
/ 4, &ntasks
)) {
2354 *mapcount
= nthreads
;
2355 if (os_mul_overflow(nthreads
, sizeof(kd_threadmap
), mapsize
)) {
2358 if (os_mul_overflow(ntasks
, sizeof(task_names
[0]), &names_size
)) {
2363 * Wait until the out-parameters have been filled with the needed size to
2364 * do the bounds checking on the provided maximum.
2366 if (maxthreads
!= 0 && maxthreads
< nthreads
) {
2370 thread_map
= kalloc_tag(*mapsize
, VM_KERN_MEMORY_DIAG
);
2371 bzero(thread_map
, *mapsize
);
2372 task_names
= kheap_alloc(KHEAP_TEMP
, names_size
, Z_WAITOK
| Z_ZERO
);
2373 ntasks
= kd_resolve_tasks(task_names
, ntasks
);
2374 *mapcount
= kd_resolve_threads(thread_map
, task_names
, ntasks
, nthreads
);
2375 kheap_free(KHEAP_TEMP
, task_names
, names_size
);
2383 * Clean up the trace buffer
2384 * First make sure we're not in
2385 * the middle of cutting a trace
2387 kernel_debug_disable();
2388 kdbg_disable_typefilter();
2391 * make sure the SLOW_NOLOG is seen
2392 * by everyone that might be trying
2397 /* reset kdebug state for each process */
2398 if (kd_ctrl_page
.kdebug_flags
& (KDBG_PIDCHECK
| KDBG_PIDEXCLUDE
)) {
2401 ALLPROC_FOREACH(p
) {
2407 kd_ctrl_page
.kdebug_flags
&= (unsigned int)~KDBG_CKTYPES
;
2408 kd_ctrl_page
.kdebug_flags
&= ~(KDBG_NOWRAP
| KDBG_RANGECHECK
| KDBG_VALCHECK
);
2409 kd_ctrl_page
.kdebug_flags
&= ~(KDBG_PIDCHECK
| KDBG_PIDEXCLUDE
);
2411 kd_ctrl_page
.oldest_time
= 0;
2416 /* Clean up the thread map buffer */
2417 kdbg_clear_thread_map();
2419 RAW_file_offset
= 0;
2420 RAW_file_written
= 0;
2426 ktrace_assert_lock_held();
2431 if (kdbg_typefilter
) {
2432 typefilter_reject_all(kdbg_typefilter
);
2433 typefilter_allow_class(kdbg_typefilter
, DBG_TRACE
);
2438 kdebug_free_early_buf(void)
2440 #if defined(__x86_64__)
2442 * Make Intel aware that the early buffer is no longer being used. ARM
2443 * handles this as part of the BOOTDATA segment.
2445 ml_static_mfree((vm_offset_t
)&kd_early_buffer
, sizeof(kd_early_buffer
));
2446 #endif /* defined(__x86_64__) */
2450 kdbg_setpid(kd_regtype
*kdr
)
2456 pid
= (pid_t
)kdr
->value1
;
2457 flag
= (int)kdr
->value2
;
2460 if ((p
= proc_find(pid
)) == NULL
) {
2465 * turn on pid check for this and all pids
2467 kd_ctrl_page
.kdebug_flags
|= KDBG_PIDCHECK
;
2468 kd_ctrl_page
.kdebug_flags
&= ~KDBG_PIDEXCLUDE
;
2469 kdbg_set_flags(SLOW_CHECKS
, 0, true);
2474 * turn off pid check for this pid value
2475 * Don't turn off all pid checking though
2477 * kd_ctrl_page.kdebug_flags &= ~KDBG_PIDCHECK;
2490 /* This is for pid exclusion in the trace buffer */
2492 kdbg_setpidex(kd_regtype
*kdr
)
2498 pid
= (pid_t
)kdr
->value1
;
2499 flag
= (int)kdr
->value2
;
2502 if ((p
= proc_find(pid
)) == NULL
) {
2507 * turn on pid exclusion
2509 kd_ctrl_page
.kdebug_flags
|= KDBG_PIDEXCLUDE
;
2510 kd_ctrl_page
.kdebug_flags
&= ~KDBG_PIDCHECK
;
2511 kdbg_set_flags(SLOW_CHECKS
, 0, true);
2516 * turn off pid exclusion for this pid value
2517 * Don't turn off all pid exclusion though
2519 * kd_ctrl_page.kdebug_flags &= ~KDBG_PIDEXCLUDE;
2533 * The following functions all operate on the "global" typefilter singleton.
2537 * The tf param is optional, you may pass either a valid typefilter or NULL.
2538 * If you pass a valid typefilter, you release ownership of that typefilter.
2541 kdbg_initialize_typefilter(typefilter_t tf
)
2543 ktrace_assert_lock_held();
2544 assert(!kdbg_typefilter
);
2545 assert(!kdbg_typefilter_memory_entry
);
2546 typefilter_t deallocate_tf
= NULL
;
2548 if (!tf
&& ((tf
= deallocate_tf
= typefilter_create()) == NULL
)) {
2552 if ((kdbg_typefilter_memory_entry
= typefilter_create_memory_entry(tf
)) == MACH_PORT_NULL
) {
2553 if (deallocate_tf
) {
2554 typefilter_deallocate(deallocate_tf
);
2560 * The atomic store closes a race window with
2561 * the kdebug_typefilter syscall, which assumes
2562 * that any non-null kdbg_typefilter means a
2563 * valid memory_entry is available.
2565 os_atomic_store(&kdbg_typefilter
, tf
, release
);
2567 return KERN_SUCCESS
;
2571 kdbg_copyin_typefilter(user_addr_t addr
, size_t size
)
2576 ktrace_assert_lock_held();
2578 if (size
!= KDBG_TYPEFILTER_BITMAP_SIZE
) {
2582 if ((tf
= typefilter_create())) {
2583 if ((ret
= copyin(addr
, tf
, KDBG_TYPEFILTER_BITMAP_SIZE
)) == 0) {
2584 /* The kernel typefilter must always allow DBG_TRACE */
2585 typefilter_allow_class(tf
, DBG_TRACE
);
2588 * If this is the first typefilter; claim it.
2589 * Otherwise copy and deallocate.
2591 * Allocating a typefilter for the copyin allows
2592 * the kernel to hold the invariant that DBG_TRACE
2593 * must always be allowed.
2595 if (!kdbg_typefilter
) {
2596 if ((ret
= kdbg_initialize_typefilter(tf
))) {
2601 typefilter_copy(kdbg_typefilter
, tf
);
2604 kdbg_enable_typefilter();
2605 kdbg_iop_list_callback(kd_ctrl_page
.kdebug_iops
, KD_CALLBACK_TYPEFILTER_CHANGED
, kdbg_typefilter
);
2609 typefilter_deallocate(tf
);
2617 * Enable the flags in the control page for the typefilter. Assumes that
2618 * kdbg_typefilter has already been allocated, so events being written
2619 * don't see a bad typefilter.
2622 kdbg_enable_typefilter(void)
2624 assert(kdbg_typefilter
);
2625 kd_ctrl_page
.kdebug_flags
&= ~(KDBG_RANGECHECK
| KDBG_VALCHECK
);
2626 kd_ctrl_page
.kdebug_flags
|= KDBG_TYPEFILTER_CHECK
;
2627 kdbg_set_flags(SLOW_CHECKS
, 0, true);
2628 commpage_update_kdebug_state();
2632 * Disable the flags in the control page for the typefilter. The typefilter
2633 * may be safely deallocated shortly after this function returns.
2636 kdbg_disable_typefilter(void)
2638 bool notify_iops
= kd_ctrl_page
.kdebug_flags
& KDBG_TYPEFILTER_CHECK
;
2639 kd_ctrl_page
.kdebug_flags
&= ~KDBG_TYPEFILTER_CHECK
;
2641 if ((kd_ctrl_page
.kdebug_flags
& (KDBG_PIDCHECK
| KDBG_PIDEXCLUDE
))) {
2642 kdbg_set_flags(SLOW_CHECKS
, 0, true);
2644 kdbg_set_flags(SLOW_CHECKS
, 0, false);
2646 commpage_update_kdebug_state();
2650 * Notify IOPs that the typefilter will now allow everything.
2651 * Otherwise, they won't know a typefilter is no longer in
2654 typefilter_allow_all(kdbg_typefilter
);
2655 kdbg_iop_list_callback(kd_ctrl_page
.kdebug_iops
,
2656 KD_CALLBACK_TYPEFILTER_CHANGED
, kdbg_typefilter
);
2661 kdebug_commpage_state(void)
2663 if (kdebug_enable
) {
2664 if (kd_ctrl_page
.kdebug_flags
& KDBG_TYPEFILTER_CHECK
) {
2665 return KDEBUG_COMMPAGE_ENABLE_TYPEFILTER
| KDEBUG_COMMPAGE_ENABLE_TRACE
;
2668 return KDEBUG_COMMPAGE_ENABLE_TRACE
;
2675 kdbg_setreg(kd_regtype
* kdr
)
2678 unsigned int val_1
, val_2
, val
;
2679 switch (kdr
->type
) {
2680 case KDBG_CLASSTYPE
:
2681 val_1
= (kdr
->value1
& 0xff);
2682 val_2
= (kdr
->value2
& 0xff);
2683 kdlog_beg
= (val_1
<< 24);
2684 kdlog_end
= (val_2
<< 24);
2685 kd_ctrl_page
.kdebug_flags
&= (unsigned int)~KDBG_CKTYPES
;
2686 kd_ctrl_page
.kdebug_flags
&= ~KDBG_VALCHECK
; /* Turn off specific value check */
2687 kd_ctrl_page
.kdebug_flags
|= (KDBG_RANGECHECK
| KDBG_CLASSTYPE
);
2688 kdbg_set_flags(SLOW_CHECKS
, 0, true);
2690 case KDBG_SUBCLSTYPE
:
2691 val_1
= (kdr
->value1
& 0xff);
2692 val_2
= (kdr
->value2
& 0xff);
2694 kdlog_beg
= ((val_1
<< 24) | (val_2
<< 16));
2695 kdlog_end
= ((val_1
<< 24) | (val
<< 16));
2696 kd_ctrl_page
.kdebug_flags
&= (unsigned int)~KDBG_CKTYPES
;
2697 kd_ctrl_page
.kdebug_flags
&= ~KDBG_VALCHECK
; /* Turn off specific value check */
2698 kd_ctrl_page
.kdebug_flags
|= (KDBG_RANGECHECK
| KDBG_SUBCLSTYPE
);
2699 kdbg_set_flags(SLOW_CHECKS
, 0, true);
2701 case KDBG_RANGETYPE
:
2702 kdlog_beg
= (kdr
->value1
);
2703 kdlog_end
= (kdr
->value2
);
2704 kd_ctrl_page
.kdebug_flags
&= (unsigned int)~KDBG_CKTYPES
;
2705 kd_ctrl_page
.kdebug_flags
&= ~KDBG_VALCHECK
; /* Turn off specific value check */
2706 kd_ctrl_page
.kdebug_flags
|= (KDBG_RANGECHECK
| KDBG_RANGETYPE
);
2707 kdbg_set_flags(SLOW_CHECKS
, 0, true);
2710 kdlog_value1
= (kdr
->value1
);
2711 kdlog_value2
= (kdr
->value2
);
2712 kdlog_value3
= (kdr
->value3
);
2713 kdlog_value4
= (kdr
->value4
);
2714 kd_ctrl_page
.kdebug_flags
&= (unsigned int)~KDBG_CKTYPES
;
2715 kd_ctrl_page
.kdebug_flags
&= ~KDBG_RANGECHECK
; /* Turn off range check */
2716 kd_ctrl_page
.kdebug_flags
|= KDBG_VALCHECK
; /* Turn on specific value check */
2717 kdbg_set_flags(SLOW_CHECKS
, 0, true);
2720 kd_ctrl_page
.kdebug_flags
&= (unsigned int)~KDBG_CKTYPES
;
2722 if ((kd_ctrl_page
.kdebug_flags
& (KDBG_RANGECHECK
| KDBG_VALCHECK
|
2723 KDBG_PIDCHECK
| KDBG_PIDEXCLUDE
|
2724 KDBG_TYPEFILTER_CHECK
))) {
2725 kdbg_set_flags(SLOW_CHECKS
, 0, true);
2727 kdbg_set_flags(SLOW_CHECKS
, 0, false);
2741 kdbg_write_to_vnode(caddr_t buffer
, size_t size
, vnode_t vp
, vfs_context_t ctx
, off_t file_offset
)
2743 assert(size
< INT_MAX
);
2744 return vn_rdwr(UIO_WRITE
, vp
, buffer
, (int)size
, file_offset
, UIO_SYSSPACE
, IO_NODELOCKED
| IO_UNIT
,
2745 vfs_context_ucred(ctx
), (int *) 0, vfs_context_proc(ctx
));
2749 kdbg_write_v3_chunk_header(user_addr_t buffer
, uint32_t tag
, uint32_t sub_tag
, uint64_t length
, vnode_t vp
, vfs_context_t ctx
)
2751 int ret
= KERN_SUCCESS
;
2752 kd_chunk_header_v3 header
= {
2758 // Check that only one of them is valid
2759 assert(!buffer
^ !vp
);
2760 assert((vp
== NULL
) || (ctx
!= NULL
));
2762 // Write the 8-byte future_chunk_timestamp field in the payload
2765 ret
= kdbg_write_to_vnode((caddr_t
)&header
, sizeof(kd_chunk_header_v3
), vp
, ctx
, RAW_file_offset
);
2769 RAW_file_offset
+= (sizeof(kd_chunk_header_v3
));
2771 ret
= copyout(&header
, buffer
, sizeof(kd_chunk_header_v3
));
2782 kdbg_write_v3_chunk_to_fd(uint32_t tag
, uint32_t sub_tag
, uint64_t length
, void *payload
, uint64_t payload_size
, int fd
)
2785 struct vfs_context context
;
2786 struct fileproc
*fp
;
2790 if (fp_get_ftype(p
, fd
, DTYPE_VNODE
, EBADF
, &fp
)) {
2794 vp
= fp
->fp_glob
->fg_data
;
2795 context
.vc_thread
= current_thread();
2796 context
.vc_ucred
= fp
->fp_glob
->fg_cred
;
2798 if ((vnode_getwithref(vp
)) == 0) {
2799 RAW_file_offset
= fp
->fp_glob
->fg_offset
;
2801 kd_chunk_header_v3 chunk_header
= {
2807 int ret
= kdbg_write_to_vnode((caddr_t
) &chunk_header
, sizeof(kd_chunk_header_v3
), vp
, &context
, RAW_file_offset
);
2809 RAW_file_offset
+= sizeof(kd_chunk_header_v3
);
2812 ret
= kdbg_write_to_vnode((caddr_t
) payload
, (size_t) payload_size
, vp
, &context
, RAW_file_offset
);
2814 RAW_file_offset
+= payload_size
;
2817 fp
->fp_glob
->fg_offset
= RAW_file_offset
;
2821 fp_drop(p
, fd
, fp
, 0);
2822 return KERN_SUCCESS
;
2826 kdbg_write_v3_event_chunk_header(user_addr_t buffer
, uint32_t tag
, uint64_t length
, vnode_t vp
, vfs_context_t ctx
)
2828 uint64_t future_chunk_timestamp
= 0;
2829 length
+= sizeof(uint64_t);
2831 if (kdbg_write_v3_chunk_header(buffer
, tag
, V3_EVENT_DATA_VERSION
, length
, vp
, ctx
)) {
2835 buffer
+= sizeof(kd_chunk_header_v3
);
2838 // Check that only one of them is valid
2839 assert(!buffer
^ !vp
);
2840 assert((vp
== NULL
) || (ctx
!= NULL
));
2842 // Write the 8-byte future_chunk_timestamp field in the payload
2845 int ret
= kdbg_write_to_vnode((caddr_t
)&future_chunk_timestamp
, sizeof(uint64_t), vp
, ctx
, RAW_file_offset
);
2847 RAW_file_offset
+= (sizeof(uint64_t));
2850 if (copyout(&future_chunk_timestamp
, buffer
, sizeof(uint64_t))) {
2856 return buffer
+ sizeof(uint64_t);
2860 kdbg_write_v3_header(user_addr_t user_header
, size_t *user_header_size
, int fd
)
2862 int ret
= KERN_SUCCESS
;
2864 uint8_t* cpumap
= 0;
2865 uint32_t cpumap_size
= 0;
2866 uint32_t thrmap_size
= 0;
2868 size_t bytes_needed
= 0;
2870 // Check that only one of them is valid
2871 assert(!user_header
^ !fd
);
2872 assert(user_header_size
);
2874 if (!(kd_ctrl_page
.kdebug_flags
& KDBG_BUFINIT
)) {
2879 if (!(user_header
|| fd
)) {
2884 // Initialize the cpu map
2885 ret
= kdbg_cpumap_init_internal(kd_ctrl_page
.kdebug_iops
, kd_ctrl_page
.kdebug_cpus
, &cpumap
, &cpumap_size
);
2886 if (ret
!= KERN_SUCCESS
) {
2890 // Check if a thread map is initialized
2895 if (os_mul_overflow(kd_mapcount
, sizeof(kd_threadmap
), &thrmap_size
)) {
2900 mach_timebase_info_data_t timebase
= {0, 0};
2901 clock_timebase_info(&timebase
);
2903 // Setup the header.
2904 // See v3 header description in sys/kdebug.h for more inforamtion.
2905 kd_header_v3 header
= {
2906 .tag
= RAW_VERSION3
,
2907 .sub_tag
= V3_HEADER_VERSION
,
2908 .length
= (sizeof(kd_header_v3
) + cpumap_size
- sizeof(kd_cpumap_header
)),
2909 .timebase_numer
= timebase
.numer
,
2910 .timebase_denom
= timebase
.denom
,
2911 .timestamp
= 0, /* FIXME rdar://problem/22053009 */
2913 .walltime_usecs
= 0,
2914 .timezone_minuteswest
= 0,
2916 #if defined(__LP64__)
2923 // If its a buffer, check if we have enough space to copy the header and the maps.
2925 bytes_needed
= (size_t)header
.length
+ thrmap_size
+ (2 * sizeof(kd_chunk_header_v3
));
2926 if (*user_header_size
< bytes_needed
) {
2932 // Start writing the header
2934 void *hdr_ptr
= (void *)(((uintptr_t) &header
) + sizeof(kd_chunk_header_v3
));
2935 size_t payload_size
= (sizeof(kd_header_v3
) - sizeof(kd_chunk_header_v3
));
2937 ret
= kdbg_write_v3_chunk_to_fd(RAW_VERSION3
, V3_HEADER_VERSION
, header
.length
, hdr_ptr
, payload_size
, fd
);
2942 if (copyout(&header
, user_header
, sizeof(kd_header_v3
))) {
2946 // Update the user pointer
2947 user_header
+= sizeof(kd_header_v3
);
2950 // Write a cpu map. This is a sub chunk of the header
2951 cpumap
= (uint8_t*)((uintptr_t) cpumap
+ sizeof(kd_cpumap_header
));
2952 size_t payload_size
= (size_t)(cpumap_size
- sizeof(kd_cpumap_header
));
2954 ret
= kdbg_write_v3_chunk_to_fd(V3_CPU_MAP
, V3_CPUMAP_VERSION
, payload_size
, (void *)cpumap
, payload_size
, fd
);
2959 ret
= kdbg_write_v3_chunk_header(user_header
, V3_CPU_MAP
, V3_CPUMAP_VERSION
, payload_size
, NULL
, NULL
);
2963 user_header
+= sizeof(kd_chunk_header_v3
);
2964 if (copyout(cpumap
, user_header
, payload_size
)) {
2968 // Update the user pointer
2969 user_header
+= payload_size
;
2972 // Write a thread map
2974 ret
= kdbg_write_v3_chunk_to_fd(V3_THREAD_MAP
, V3_THRMAP_VERSION
, thrmap_size
, (void *)kd_mapptr
, thrmap_size
, fd
);
2979 ret
= kdbg_write_v3_chunk_header(user_header
, V3_THREAD_MAP
, V3_THRMAP_VERSION
, thrmap_size
, NULL
, NULL
);
2983 user_header
+= sizeof(kd_chunk_header_v3
);
2984 if (copyout(kd_mapptr
, user_header
, thrmap_size
)) {
2988 user_header
+= thrmap_size
;
2992 RAW_file_written
+= bytes_needed
;
2995 *user_header_size
= bytes_needed
;
2998 kmem_free(kernel_map
, (vm_offset_t
)cpumap
, cpumap_size
);
3004 kdbg_readcpumap(user_addr_t user_cpumap
, size_t *user_cpumap_size
)
3006 uint8_t* cpumap
= NULL
;
3007 uint32_t cpumap_size
= 0;
3008 int ret
= KERN_SUCCESS
;
3010 if (kd_ctrl_page
.kdebug_flags
& KDBG_BUFINIT
) {
3011 if (kdbg_cpumap_init_internal(kd_ctrl_page
.kdebug_iops
, kd_ctrl_page
.kdebug_cpus
, &cpumap
, &cpumap_size
) == KERN_SUCCESS
) {
3013 size_t bytes_to_copy
= (*user_cpumap_size
>= cpumap_size
) ? cpumap_size
: *user_cpumap_size
;
3014 if (copyout(cpumap
, user_cpumap
, (size_t)bytes_to_copy
)) {
3018 *user_cpumap_size
= cpumap_size
;
3019 kmem_free(kernel_map
, (vm_offset_t
)cpumap
, cpumap_size
);
3031 kdbg_readcurthrmap(user_addr_t buffer
, size_t *bufsize
)
3033 kd_threadmap
*mapptr
;
3037 size_t count
= *bufsize
/ sizeof(kd_threadmap
);
3041 if ((mapptr
= kdbg_thrmap_init_internal(count
, &mapsize
, &mapcount
))) {
3042 if (copyout(mapptr
, buffer
, mapcount
* sizeof(kd_threadmap
))) {
3045 *bufsize
= (mapcount
* sizeof(kd_threadmap
));
3048 kfree(mapptr
, mapsize
);
3057 kdbg_write_v1_header(bool write_thread_map
, vnode_t vp
, vfs_context_t ctx
)
3065 uint32_t extra_thread_count
= 0;
3066 uint32_t cpumap_size
;
3067 size_t map_size
= 0;
3068 uint32_t map_count
= 0;
3070 if (write_thread_map
) {
3071 assert(kd_ctrl_page
.kdebug_flags
& KDBG_MAPINIT
);
3072 if (kd_mapcount
> UINT32_MAX
) {
3075 map_count
= (uint32_t)kd_mapcount
;
3076 if (os_mul_overflow(map_count
, sizeof(kd_threadmap
), &map_size
)) {
3079 if (map_size
>= INT_MAX
) {
3085 * Without the buffers initialized, we cannot construct a CPU map or a
3086 * thread map, and cannot write a header.
3088 if (!(kd_ctrl_page
.kdebug_flags
& KDBG_BUFINIT
)) {
3093 * To write a RAW_VERSION1+ file, we must embed a cpumap in the
3094 * "padding" used to page align the events following the threadmap. If
3095 * the threadmap happens to not require enough padding, we artificially
3096 * increase its footprint until it needs enough padding.
3102 pad_size
= PAGE_16KB
- ((sizeof(RAW_header
) + map_size
) & PAGE_MASK
);
3103 cpumap_size
= sizeof(kd_cpumap_header
) + kd_ctrl_page
.kdebug_cpus
* sizeof(kd_cpumap
);
3105 if (cpumap_size
> pad_size
) {
3106 /* If the cpu map doesn't fit in the current available pad_size,
3107 * we increase the pad_size by 16K. We do this so that the event
3108 * data is always available on a page aligned boundary for both
3109 * 4k and 16k systems. We enforce this alignment for the event
3110 * data so that we can take advantage of optimized file/disk writes.
3112 pad_size
+= PAGE_16KB
;
3115 /* The way we are silently embedding a cpumap in the "padding" is by artificially
3116 * increasing the number of thread entries. However, we'll also need to ensure that
3117 * the cpumap is embedded in the last 4K page before when the event data is expected.
3118 * This way the tools can read the data starting the next page boundary on both
3119 * 4K and 16K systems preserving compatibility with older versions of the tools
3121 if (pad_size
> PAGE_4KB
) {
3122 pad_size
-= PAGE_4KB
;
3123 extra_thread_count
= (pad_size
/ sizeof(kd_threadmap
)) + 1;
3126 memset(&header
, 0, sizeof(header
));
3127 header
.version_no
= RAW_VERSION1
;
3128 header
.thread_count
= map_count
+ extra_thread_count
;
3130 clock_get_calendar_microtime(&secs
, &usecs
);
3131 header
.TOD_secs
= secs
;
3132 header
.TOD_usecs
= usecs
;
3134 ret
= vn_rdwr(UIO_WRITE
, vp
, (caddr_t
)&header
, (int)sizeof(RAW_header
), RAW_file_offset
,
3135 UIO_SYSSPACE
, IO_NODELOCKED
| IO_UNIT
, vfs_context_ucred(ctx
), (int *) 0, vfs_context_proc(ctx
));
3139 RAW_file_offset
+= sizeof(RAW_header
);
3140 RAW_file_written
+= sizeof(RAW_header
);
3142 if (write_thread_map
) {
3143 assert(map_size
< INT_MAX
);
3144 ret
= vn_rdwr(UIO_WRITE
, vp
, (caddr_t
)kd_mapptr
, (int)map_size
, RAW_file_offset
,
3145 UIO_SYSSPACE
, IO_NODELOCKED
| IO_UNIT
, vfs_context_ucred(ctx
), (int *) 0, vfs_context_proc(ctx
));
3150 RAW_file_offset
+= map_size
;
3151 RAW_file_written
+= map_size
;
3154 if (extra_thread_count
) {
3155 pad_size
= extra_thread_count
* sizeof(kd_threadmap
);
3156 pad_buf
= kheap_alloc(KHEAP_TEMP
, pad_size
, Z_WAITOK
| Z_ZERO
);
3162 assert(pad_size
< INT_MAX
);
3163 ret
= vn_rdwr(UIO_WRITE
, vp
, (caddr_t
)pad_buf
, (int)pad_size
, RAW_file_offset
,
3164 UIO_SYSSPACE
, IO_NODELOCKED
| IO_UNIT
, vfs_context_ucred(ctx
), (int *) 0, vfs_context_proc(ctx
));
3165 kheap_free(KHEAP_TEMP
, pad_buf
, pad_size
);
3170 RAW_file_offset
+= pad_size
;
3171 RAW_file_written
+= pad_size
;
3174 pad_size
= PAGE_SIZE
- (RAW_file_offset
& PAGE_MASK
);
3176 pad_buf
= (char *)kheap_alloc(KHEAP_TEMP
, pad_size
, Z_WAITOK
| Z_ZERO
);
3183 * embed a cpumap in the padding bytes.
3184 * older code will skip this.
3185 * newer code will know how to read it.
3187 uint32_t temp
= pad_size
;
3188 if (kdbg_cpumap_init_internal(kd_ctrl_page
.kdebug_iops
, kd_ctrl_page
.kdebug_cpus
, (uint8_t**)&pad_buf
, &temp
) != KERN_SUCCESS
) {
3189 memset(pad_buf
, 0, pad_size
);
3192 assert(pad_size
< INT_MAX
);
3193 ret
= vn_rdwr(UIO_WRITE
, vp
, (caddr_t
)pad_buf
, (int)pad_size
, RAW_file_offset
,
3194 UIO_SYSSPACE
, IO_NODELOCKED
| IO_UNIT
, vfs_context_ucred(ctx
), (int *) 0, vfs_context_proc(ctx
));
3195 kheap_free(KHEAP_TEMP
, pad_buf
, pad_size
);
3200 RAW_file_offset
+= pad_size
;
3201 RAW_file_written
+= pad_size
;
3209 kdbg_clear_thread_map(void)
3211 ktrace_assert_lock_held();
3213 if (kd_ctrl_page
.kdebug_flags
& KDBG_MAPINIT
) {
3214 assert(kd_mapptr
!= NULL
);
3215 kfree(kd_mapptr
, kd_mapsize
);
3219 kd_ctrl_page
.kdebug_flags
&= ~KDBG_MAPINIT
;
3224 * Write out a version 1 header and the thread map, if it is initialized, to a
3225 * vnode. Used by KDWRITEMAP and kdbg_dump_trace_to_file.
3227 * Returns write errors from vn_rdwr if a write fails. Returns ENODATA if the
3228 * thread map has not been initialized, but the header will still be written.
3229 * Returns ENOMEM if padding could not be allocated. Returns 0 otherwise.
3232 kdbg_write_thread_map(vnode_t vp
, vfs_context_t ctx
)
3235 bool map_initialized
;
3237 ktrace_assert_lock_held();
3238 assert(ctx
!= NULL
);
3240 map_initialized
= (kd_ctrl_page
.kdebug_flags
& KDBG_MAPINIT
);
3242 ret
= kdbg_write_v1_header(map_initialized
, vp
, ctx
);
3244 if (map_initialized
) {
3245 kdbg_clear_thread_map();
3255 * Copy out the thread map to a user space buffer. Used by KDTHRMAP.
3257 * Returns copyout errors if the copyout fails. Returns ENODATA if the thread
3258 * map has not been initialized. Returns EINVAL if the buffer provided is not
3259 * large enough for the entire thread map. Returns 0 otherwise.
3262 kdbg_copyout_thread_map(user_addr_t buffer
, size_t *buffer_size
)
3264 bool map_initialized
;
3268 ktrace_assert_lock_held();
3269 assert(buffer_size
!= NULL
);
3271 map_initialized
= (kd_ctrl_page
.kdebug_flags
& KDBG_MAPINIT
);
3272 if (!map_initialized
) {
3276 map_size
= kd_mapcount
* sizeof(kd_threadmap
);
3277 if (*buffer_size
< map_size
) {
3281 ret
= copyout(kd_mapptr
, buffer
, map_size
);
3283 kdbg_clear_thread_map();
3290 kdbg_readthrmap_v3(user_addr_t buffer
, size_t buffer_size
, int fd
)
3293 bool map_initialized
;
3296 ktrace_assert_lock_held();
3298 if ((!fd
&& !buffer
) || (fd
&& buffer
)) {
3302 map_initialized
= (kd_ctrl_page
.kdebug_flags
& KDBG_MAPINIT
);
3303 map_size
= kd_mapcount
* sizeof(kd_threadmap
);
3305 if (map_initialized
&& (buffer_size
>= map_size
)) {
3306 ret
= kdbg_write_v3_header(buffer
, &buffer_size
, fd
);
3309 kdbg_clear_thread_map();
3319 kdbg_set_nkdbufs(unsigned int req_nkdbufs
)
3322 * Only allow allocation up to half the available memory (sane_size).
3324 uint64_t max_nkdbufs
= (sane_size
/ 2) / sizeof(kd_buf
);
3325 nkdbufs
= (req_nkdbufs
> max_nkdbufs
) ? (unsigned int)max_nkdbufs
:
3330 * Block until there are `n_storage_threshold` storage units filled with
3331 * events or `timeout_ms` milliseconds have passed. If `locked_wait` is true,
3332 * `ktrace_lock` is held while waiting. This is necessary while waiting to
3333 * write events out of the buffers.
3335 * Returns true if the threshold was reached and false otherwise.
3337 * Called with `ktrace_lock` locked and interrupts enabled.
3340 kdbg_wait(uint64_t timeout_ms
, bool locked_wait
)
3342 int wait_result
= THREAD_AWAKENED
;
3343 uint64_t abstime
= 0;
3345 ktrace_assert_lock_held();
3347 if (timeout_ms
!= 0) {
3348 uint64_t ns
= timeout_ms
* NSEC_PER_MSEC
;
3349 nanoseconds_to_absolutetime(ns
, &abstime
);
3350 clock_absolutetime_interval_to_deadline(abstime
, &abstime
);
3353 bool s
= ml_set_interrupts_enabled(false);
3355 panic("kdbg_wait() called with interrupts disabled");
3357 lck_spin_lock_grp(kdw_spin_lock
, kdebug_lck_grp
);
3360 /* drop the mutex to allow others to access trace */
3364 while (wait_result
== THREAD_AWAKENED
&&
3365 kd_ctrl_page
.kds_inuse_count
< n_storage_threshold
) {
3369 wait_result
= lck_spin_sleep_deadline(kdw_spin_lock
, 0, &kds_waiter
, THREAD_ABORTSAFE
, abstime
);
3371 wait_result
= lck_spin_sleep(kdw_spin_lock
, 0, &kds_waiter
, THREAD_ABORTSAFE
);
3377 /* check the count under the spinlock */
3378 bool threshold_exceeded
= (kd_ctrl_page
.kds_inuse_count
>= n_storage_threshold
);
3380 lck_spin_unlock(kdw_spin_lock
);
3381 ml_set_interrupts_enabled(s
);
3384 /* pick the mutex back up again */
3388 /* write out whether we've exceeded the threshold */
3389 return threshold_exceeded
;
3393 * Wakeup a thread waiting using `kdbg_wait` if there are at least
3394 * `n_storage_threshold` storage units in use.
3399 bool need_kds_wakeup
= false;
3402 * Try to take the lock here to synchronize with the waiter entering
3403 * the blocked state. Use the try mode to prevent deadlocks caused by
3404 * re-entering this routine due to various trace points triggered in the
3405 * lck_spin_sleep_xxxx routines used to actually enter one of our 2 wait
3406 * conditions. No problem if we fail, there will be lots of additional
3407 * events coming in that will eventually succeed in grabbing this lock.
3409 bool s
= ml_set_interrupts_enabled(false);
3411 if (lck_spin_try_lock(kdw_spin_lock
)) {
3413 (kd_ctrl_page
.kds_inuse_count
>= n_storage_threshold
)) {
3415 need_kds_wakeup
= true;
3417 lck_spin_unlock(kdw_spin_lock
);
3420 ml_set_interrupts_enabled(s
);
3422 if (need_kds_wakeup
== true) {
3423 wakeup(&kds_waiter
);
3428 kdbg_control(int *name
, u_int namelen
, user_addr_t where
, size_t *sizep
)
3431 size_t size
= *sizep
;
3432 unsigned int value
= 0;
3434 kbufinfo_t kd_bufinfo
;
3437 if (name
[0] == KERN_KDWRITETR
||
3438 name
[0] == KERN_KDWRITETR_V3
||
3439 name
[0] == KERN_KDWRITEMAP
||
3440 name
[0] == KERN_KDWRITEMAP_V3
||
3441 name
[0] == KERN_KDEFLAGS
||
3442 name
[0] == KERN_KDDFLAGS
||
3443 name
[0] == KERN_KDENABLE
||
3444 name
[0] == KERN_KDSETBUF
) {
3452 assert(kd_ctrl_page
.kdebug_flags
& KDBG_LOCKINIT
);
3457 * Some requests only require "read" access to kdebug trace. Regardless,
3458 * tell ktrace that a configuration or read is occurring (and see if it's
3461 if (name
[0] != KERN_KDGETBUF
&&
3462 name
[0] != KERN_KDGETREG
&&
3463 name
[0] != KERN_KDREADCURTHRMAP
) {
3464 if ((ret
= ktrace_configure(KTRACE_KDEBUG
))) {
3468 if ((ret
= ktrace_read_check())) {
3475 if (size
< sizeof(kd_bufinfo
.nkdbufs
)) {
3477 * There is not enough room to return even
3478 * the first element of the info structure.
3484 memset(&kd_bufinfo
, 0, sizeof(kd_bufinfo
));
3486 kd_bufinfo
.nkdbufs
= nkdbufs
;
3487 kd_bufinfo
.nkdthreads
= kd_mapcount
< INT_MAX
? (int)kd_mapcount
:
3489 if ((kd_ctrl_page
.kdebug_slowcheck
& SLOW_NOLOG
)) {
3490 kd_bufinfo
.nolog
= 1;
3492 kd_bufinfo
.nolog
= 0;
3495 kd_bufinfo
.flags
= kd_ctrl_page
.kdebug_flags
;
3496 #if defined(__LP64__)
3497 kd_bufinfo
.flags
|= KDBG_LP64
;
3500 int pid
= ktrace_get_owning_pid();
3501 kd_bufinfo
.bufid
= (pid
== 0 ? -1 : pid
);
3504 if (size
>= sizeof(kd_bufinfo
)) {
3506 * Provide all the info we have
3508 if (copyout(&kd_bufinfo
, where
, sizeof(kd_bufinfo
))) {
3513 * For backwards compatibility, only provide
3514 * as much info as there is room for.
3516 if (copyout(&kd_bufinfo
, where
, size
)) {
3522 case KERN_KDREADCURTHRMAP
:
3523 ret
= kdbg_readcurthrmap(where
, sizep
);
3527 value
&= KDBG_USERFLAGS
;
3528 kd_ctrl_page
.kdebug_flags
|= value
;
3532 value
&= KDBG_USERFLAGS
;
3533 kd_ctrl_page
.kdebug_flags
&= ~value
;
3538 * Enable tracing mechanism. Two types:
3539 * KDEBUG_TRACE is the standard one,
3540 * and KDEBUG_PPT which is a carefully
3541 * chosen subset to avoid performance impact.
3545 * enable only if buffer is initialized
3547 if (!(kd_ctrl_page
.kdebug_flags
& KDBG_BUFINIT
) ||
3548 !(value
== KDEBUG_ENABLE_TRACE
|| value
== KDEBUG_ENABLE_PPT
)) {
3554 kdbg_set_tracing_enabled(true, value
);
3556 if (!kdebug_enable
) {
3560 kernel_debug_disable();
3565 kdbg_set_nkdbufs(value
);
3569 ret
= kdbg_reinit(false);
3573 ktrace_reset(KTRACE_KDEBUG
);
3577 if (size
< sizeof(kd_regtype
)) {
3581 if (copyin(where
, &kd_Reg
, sizeof(kd_regtype
))) {
3586 ret
= kdbg_setreg(&kd_Reg
);
3594 ret
= kdbg_read(where
, sizep
, NULL
, NULL
, RAW_VERSION1
);
3597 case KERN_KDWRITETR
:
3598 case KERN_KDWRITETR_V3
:
3599 case KERN_KDWRITEMAP
:
3600 case KERN_KDWRITEMAP_V3
:
3602 struct vfs_context context
;
3603 struct fileproc
*fp
;
3608 if (name
[0] == KERN_KDWRITETR
|| name
[0] == KERN_KDWRITETR_V3
) {
3609 (void)kdbg_wait(size
, true);
3615 if (fp_get_ftype(p
, fd
, DTYPE_VNODE
, EBADF
, &fp
)) {
3620 vp
= fp
->fp_glob
->fg_data
;
3621 context
.vc_thread
= current_thread();
3622 context
.vc_ucred
= fp
->fp_glob
->fg_cred
;
3624 if ((ret
= vnode_getwithref(vp
)) == 0) {
3625 RAW_file_offset
= fp
->fp_glob
->fg_offset
;
3626 if (name
[0] == KERN_KDWRITETR
|| name
[0] == KERN_KDWRITETR_V3
) {
3627 number
= nkdbufs
* sizeof(kd_buf
);
3629 KDBG_RELEASE(TRACE_WRITING_EVENTS
| DBG_FUNC_START
);
3630 if (name
[0] == KERN_KDWRITETR_V3
) {
3631 ret
= kdbg_read(0, &number
, vp
, &context
, RAW_VERSION3
);
3633 ret
= kdbg_read(0, &number
, vp
, &context
, RAW_VERSION1
);
3635 KDBG_RELEASE(TRACE_WRITING_EVENTS
| DBG_FUNC_END
, number
);
3639 number
= kd_mapcount
* sizeof(kd_threadmap
);
3640 if (name
[0] == KERN_KDWRITEMAP_V3
) {
3641 ret
= kdbg_readthrmap_v3(0, number
, fd
);
3643 ret
= kdbg_write_thread_map(vp
, &context
);
3646 fp
->fp_glob
->fg_offset
= RAW_file_offset
;
3649 fp_drop(p
, fd
, fp
, 0);
3653 case KERN_KDBUFWAIT
:
3654 *sizep
= kdbg_wait(size
, false);
3658 if (size
< sizeof(kd_regtype
)) {
3662 if (copyin(where
, &kd_Reg
, sizeof(kd_regtype
))) {
3667 ret
= kdbg_setpid(&kd_Reg
);
3671 if (size
< sizeof(kd_regtype
)) {
3675 if (copyin(where
, &kd_Reg
, sizeof(kd_regtype
))) {
3680 ret
= kdbg_setpidex(&kd_Reg
);
3684 ret
= kdbg_readcpumap(where
, sizep
);
3688 ret
= kdbg_copyout_thread_map(where
, sizep
);
3691 case KERN_KDSET_TYPEFILTER
: {
3692 ret
= kdbg_copyin_typefilter(where
, size
);
3697 ret
= kdbg_test(size
);
3712 * This code can run for the most part concurrently with kernel_debug_internal()...
3713 * 'release_storage_unit' will take the kds_spin_lock which may cause us to briefly
3714 * synchronize with the recording side of this puzzle... otherwise, we are able to
3715 * move through the lists w/o use of any locks
3718 kdbg_read(user_addr_t buffer
, size_t *number
, vnode_t vp
, vfs_context_t ctx
, uint32_t file_version
)
3721 unsigned int cpu
, min_cpu
;
3722 uint64_t barrier_min
= 0, barrier_max
= 0, t
, earliest_time
;
3728 bool traced_retrograde
= false;
3729 struct kd_storage
*kdsp_actual
;
3730 struct kd_bufinfo
*kdbp
;
3731 struct kd_bufinfo
*min_kdbp
;
3732 size_t tempbuf_count
;
3733 uint32_t tempbuf_number
;
3734 uint32_t old_kdebug_flags
;
3735 uint32_t old_kdebug_slowcheck
;
3736 bool out_of_events
= false;
3737 bool wrapped
= false;
3739 assert(number
!= NULL
);
3740 count
= *number
/ sizeof(kd_buf
);
3743 ktrace_assert_lock_held();
3745 if (count
== 0 || !(kd_ctrl_page
.kdebug_flags
& KDBG_BUFINIT
) || kdcopybuf
== 0) {
3749 thread_set_eager_preempt(current_thread());
3751 memset(&lostevent
, 0, sizeof(lostevent
));
3752 lostevent
.debugid
= TRACE_LOST_EVENTS
;
3755 * Request each IOP to provide us with up to date entries before merging
3758 kdbg_iop_list_callback(kd_ctrl_page
.kdebug_iops
, KD_CALLBACK_SYNC_FLUSH
, NULL
);
3761 * Capture the current time. Only sort events that have occured
3762 * before now. Since the IOPs are being flushed here, it is possible
3763 * that events occur on the AP while running live tracing.
3765 barrier_max
= kdbg_timestamp() & KDBG_TIMESTAMP_MASK
;
3768 * Disable wrap so storage units cannot be stolen out from underneath us
3769 * while merging events.
3771 * Because we hold ktrace_lock, no other control threads can be playing
3772 * with kdebug_flags. The code that emits new events could be running,
3773 * but it grabs kds_spin_lock if it needs to acquire a new storage
3774 * chunk, which is where it examines kdebug_flags. If it is adding to
3775 * the same chunk we're reading from, check for that below.
3777 wrapped
= disable_wrap(&old_kdebug_slowcheck
, &old_kdebug_flags
);
3779 if (count
> nkdbufs
) {
3783 if ((tempbuf_count
= count
) > KDCOPYBUF_COUNT
) {
3784 tempbuf_count
= KDCOPYBUF_COUNT
;
3788 * If the buffers have wrapped, do not emit additional lost events for the
3789 * oldest storage units.
3792 kd_ctrl_page
.kdebug_flags
&= ~KDBG_WRAPPED
;
3794 for (cpu
= 0, kdbp
= &kdbip
[0]; cpu
< kd_ctrl_page
.kdebug_cpus
; cpu
++, kdbp
++) {
3795 if ((kdsp
= kdbp
->kd_list_head
).raw
== KDS_PTR_NULL
) {
3798 kdsp_actual
= POINTER_FROM_KDS_PTR(kdsp
);
3799 kdsp_actual
->kds_lostevents
= false;
3803 * Capture the earliest time where there are events for all CPUs and don't
3804 * emit events with timestamps prior.
3806 barrier_min
= kd_ctrl_page
.oldest_time
;
3809 tempbuf
= kdcopybuf
;
3814 * Emit a lost events tracepoint to indicate that previous events
3815 * were lost -- the thread map cannot be trusted. A new one must
3816 * be taken so tools can analyze the trace in a backwards-facing
3819 kdbg_set_timestamp_and_cpu(&lostevent
, barrier_min
, 0);
3820 *tempbuf
= lostevent
;
3825 /* While space left in merged events scratch buffer. */
3826 while (tempbuf_count
) {
3827 bool lostevents
= false;
3829 earliest_time
= UINT64_MAX
;
3833 /* Check each CPU's buffers for the earliest event. */
3834 for (cpu
= 0, kdbp
= &kdbip
[0]; cpu
< kd_ctrl_page
.kdebug_cpus
; cpu
++, kdbp
++) {
3835 /* Skip CPUs without data in their oldest storage unit. */
3836 if ((kdsp
= kdbp
->kd_list_head
).raw
== KDS_PTR_NULL
) {
3840 /* From CPU data to buffer header to buffer. */
3841 kdsp_actual
= POINTER_FROM_KDS_PTR(kdsp
);
3844 /* The next event to be read from this buffer. */
3845 rcursor
= kdsp_actual
->kds_readlast
;
3847 /* Skip this buffer if there are no events left. */
3848 if (rcursor
== kdsp_actual
->kds_bufindx
) {
3853 * Check that this storage unit wasn't stolen and events were
3854 * lost. This must have happened while wrapping was disabled
3857 if (kdsp_actual
->kds_lostevents
) {
3859 kdsp_actual
->kds_lostevents
= false;
3862 * The earliest event we can trust is the first one in this
3863 * stolen storage unit.
3865 uint64_t lost_time
=
3866 kdbg_get_timestamp(&kdsp_actual
->kds_records
[0]);
3867 if (kd_ctrl_page
.oldest_time
< lost_time
) {
3869 * If this is the first time we've seen lost events for
3870 * this gap, record its timestamp as the oldest
3871 * timestamp we're willing to merge for the lost events
3874 kd_ctrl_page
.oldest_time
= barrier_min
= lost_time
;
3879 t
= kdbg_get_timestamp(&kdsp_actual
->kds_records
[rcursor
]);
3881 if (t
> barrier_max
) {
3883 printf("kdebug: FUTURE EVENT: debugid %#8x: "
3884 "time %lld from CPU %u "
3885 "(barrier at time %lld, read %lu events)\n",
3886 kdsp_actual
->kds_records
[rcursor
].debugid
,
3887 t
, cpu
, barrier_max
, *number
+ tempbuf_number
);
3891 if (t
< kdsp_actual
->kds_timestamp
) {
3893 * This indicates the event emitter hasn't completed
3894 * filling in the event (becuase we're looking at the
3895 * buffer that the record head is using). The max barrier
3896 * timestamp should have saved us from seeing these kinds
3897 * of things, but other CPUs might be slow on the up-take.
3899 * Bail out so we don't get out-of-order events by
3900 * continuing to read events from other CPUs' events.
3902 out_of_events
= true;
3907 * Ignore events that have aged out due to wrapping or storage
3908 * unit exhaustion while merging events.
3910 if (t
< barrier_min
) {
3911 kdsp_actual
->kds_readlast
++;
3913 printf("kdebug: PAST EVENT: debugid %#8x: "
3914 "time %lld from CPU %u "
3915 "(barrier at time %lld)\n",
3916 kdsp_actual
->kds_records
[rcursor
].debugid
,
3917 t
, cpu
, barrier_min
);
3920 if (kdsp_actual
->kds_readlast
>= EVENTS_PER_STORAGE_UNIT
) {
3921 release_storage_unit(cpu
, kdsp
.raw
);
3923 if ((kdsp
= kdbp
->kd_list_head
).raw
== KDS_PTR_NULL
) {
3926 kdsp_actual
= POINTER_FROM_KDS_PTR(kdsp
);
3933 * Don't worry about merging any events -- just walk through
3934 * the CPUs and find the latest timestamp of lost events.
3940 if (t
< earliest_time
) {
3948 * If any lost events were hit in the buffers, emit an event
3949 * with the latest timestamp.
3951 kdbg_set_timestamp_and_cpu(&lostevent
, barrier_min
, lostcpu
);
3952 *tempbuf
= lostevent
;
3956 if (min_kdbp
== NULL
) {
3957 /* All buffers ran empty. */
3958 out_of_events
= true;
3960 if (out_of_events
) {
3964 kdsp
= min_kdbp
->kd_list_head
;
3965 kdsp_actual
= POINTER_FROM_KDS_PTR(kdsp
);
3967 /* Copy earliest event into merged events scratch buffer. */
3968 *tempbuf
= kdsp_actual
->kds_records
[kdsp_actual
->kds_readlast
++];
3970 if (kdsp_actual
->kds_readlast
== EVENTS_PER_STORAGE_UNIT
) {
3971 release_storage_unit(min_cpu
, kdsp
.raw
);
3975 * Watch for out of order timestamps (from IOPs).
3977 if (earliest_time
< min_kdbp
->kd_prev_timebase
) {
3979 * If we haven't already, emit a retrograde events event.
3980 * Otherwise, ignore this event.
3982 if (traced_retrograde
) {
3986 printf("kdebug: RETRO EVENT: debugid %#8x: "
3987 "time %lld from CPU %u "
3988 "(barrier at time %lld)\n",
3989 kdsp_actual
->kds_records
[rcursor
].debugid
,
3990 t
, cpu
, barrier_min
);
3993 kdbg_set_timestamp_and_cpu(tempbuf
, min_kdbp
->kd_prev_timebase
, kdbg_get_cpu(tempbuf
));
3994 tempbuf
->arg1
= tempbuf
->debugid
;
3995 tempbuf
->arg2
= (kd_buf_argtype
)earliest_time
;
3998 tempbuf
->debugid
= TRACE_RETROGRADE_EVENTS
;
3999 traced_retrograde
= true;
4001 min_kdbp
->kd_prev_timebase
= earliest_time
;
4008 if ((RAW_file_written
+= sizeof(kd_buf
)) >= RAW_FLUSH_SIZE
) {
4012 if (tempbuf_number
) {
4014 * Remember the latest timestamp of events that we've merged so we
4015 * don't think we've lost events later.
4017 uint64_t latest_time
= kdbg_get_timestamp(tempbuf
- 1);
4018 if (kd_ctrl_page
.oldest_time
< latest_time
) {
4019 kd_ctrl_page
.oldest_time
= latest_time
;
4021 if (file_version
== RAW_VERSION3
) {
4022 if (!(kdbg_write_v3_event_chunk_header(buffer
, V3_RAW_EVENTS
, (tempbuf_number
* sizeof(kd_buf
)), vp
, ctx
))) {
4027 buffer
+= (sizeof(kd_chunk_header_v3
) + sizeof(uint64_t));
4030 assert(count
>= (sizeof(kd_chunk_header_v3
) + sizeof(uint64_t)));
4031 count
-= (sizeof(kd_chunk_header_v3
) + sizeof(uint64_t));
4032 *number
+= (sizeof(kd_chunk_header_v3
) + sizeof(uint64_t));
4035 size_t write_size
= tempbuf_number
* sizeof(kd_buf
);
4036 error
= kdbg_write_to_vnode((caddr_t
)kdcopybuf
, write_size
, vp
, ctx
, RAW_file_offset
);
4038 RAW_file_offset
+= write_size
;
4041 if (RAW_file_written
>= RAW_FLUSH_SIZE
) {
4042 error
= VNOP_FSYNC(vp
, MNT_NOWAIT
, ctx
);
4044 RAW_file_written
= 0;
4047 error
= copyout(kdcopybuf
, buffer
, tempbuf_number
* sizeof(kd_buf
));
4048 buffer
+= (tempbuf_number
* sizeof(kd_buf
));
4056 count
-= tempbuf_number
;
4057 *number
+= tempbuf_number
;
4059 if (out_of_events
== true) {
4061 * all trace buffers are empty
4066 if ((tempbuf_count
= count
) > KDCOPYBUF_COUNT
) {
4067 tempbuf_count
= KDCOPYBUF_COUNT
;
4070 if (!(old_kdebug_flags
& KDBG_NOWRAP
)) {
4071 enable_wrap(old_kdebug_slowcheck
);
4073 thread_clear_eager_preempt(current_thread());
4077 #define KDEBUG_TEST_CODE(code) BSDDBG_CODE(DBG_BSD_KDEBUG_TEST, (code))
4080 * A test IOP for the SYNC_FLUSH callback.
4083 static int sync_flush_iop
= 0;
4086 sync_flush_callback(void * __unused context
, kd_callback_type reason
,
4087 void * __unused arg
)
4089 assert(sync_flush_iop
> 0);
4091 if (reason
== KD_CALLBACK_SYNC_FLUSH
) {
4092 kernel_debug_enter(sync_flush_iop
, KDEBUG_TEST_CODE(0xff),
4093 kdbg_timestamp(), 0, 0, 0, 0, 0);
4097 static struct kd_callback sync_flush_kdcb
= {
4098 .func
= sync_flush_callback
,
4099 .iop_name
= "test_sf",
4103 kdbg_test(size_t flavor
)
4110 /* try each macro */
4111 KDBG(KDEBUG_TEST_CODE(code
)); code
++;
4112 KDBG(KDEBUG_TEST_CODE(code
), 1); code
++;
4113 KDBG(KDEBUG_TEST_CODE(code
), 1, 2); code
++;
4114 KDBG(KDEBUG_TEST_CODE(code
), 1, 2, 3); code
++;
4115 KDBG(KDEBUG_TEST_CODE(code
), 1, 2, 3, 4); code
++;
4117 KDBG_RELEASE(KDEBUG_TEST_CODE(code
)); code
++;
4118 KDBG_RELEASE(KDEBUG_TEST_CODE(code
), 1); code
++;
4119 KDBG_RELEASE(KDEBUG_TEST_CODE(code
), 1, 2); code
++;
4120 KDBG_RELEASE(KDEBUG_TEST_CODE(code
), 1, 2, 3); code
++;
4121 KDBG_RELEASE(KDEBUG_TEST_CODE(code
), 1, 2, 3, 4); code
++;
4123 KDBG_FILTERED(KDEBUG_TEST_CODE(code
)); code
++;
4124 KDBG_FILTERED(KDEBUG_TEST_CODE(code
), 1); code
++;
4125 KDBG_FILTERED(KDEBUG_TEST_CODE(code
), 1, 2); code
++;
4126 KDBG_FILTERED(KDEBUG_TEST_CODE(code
), 1, 2, 3); code
++;
4127 KDBG_FILTERED(KDEBUG_TEST_CODE(code
), 1, 2, 3, 4); code
++;
4129 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code
)); code
++;
4130 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code
), 1); code
++;
4131 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code
), 1, 2); code
++;
4132 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code
), 1, 2, 3); code
++;
4133 KDBG_RELEASE_NOPROCFILT(KDEBUG_TEST_CODE(code
), 1, 2, 3, 4); code
++;
4135 KDBG_DEBUG(KDEBUG_TEST_CODE(code
)); code
++;
4136 KDBG_DEBUG(KDEBUG_TEST_CODE(code
), 1); code
++;
4137 KDBG_DEBUG(KDEBUG_TEST_CODE(code
), 1, 2); code
++;
4138 KDBG_DEBUG(KDEBUG_TEST_CODE(code
), 1, 2, 3); code
++;
4139 KDBG_DEBUG(KDEBUG_TEST_CODE(code
), 1, 2, 3, 4); code
++;
4143 if (kd_ctrl_page
.kdebug_iops
) {
4144 /* avoid the assertion in kernel_debug_enter for a valid IOP */
4145 dummy_iop
= kd_ctrl_page
.kdebug_iops
[0].cpu_id
;
4148 /* ensure old timestamps are not emitted from kernel_debug_enter */
4149 kernel_debug_enter(dummy_iop
, KDEBUG_TEST_CODE(code
),
4150 100 /* very old timestamp */, 0, 0, 0, 0, 0);
4152 kernel_debug_enter(dummy_iop
, KDEBUG_TEST_CODE(code
),
4153 kdbg_timestamp(), 0, 0, 0, 0, 0);
4158 if (kd_ctrl_page
.kdebug_iops
) {
4159 dummy_iop
= kd_ctrl_page
.kdebug_iops
[0].cpu_id
;
4161 kernel_debug_enter(dummy_iop
, KDEBUG_TEST_CODE(code
),
4162 kdbg_timestamp() * 2 /* !!! */, 0, 0, 0, 0, 0);
4166 if (!sync_flush_iop
) {
4167 sync_flush_iop
= kernel_debug_register_callback(
4169 assert(sync_flush_iop
> 0);
4180 #undef KDEBUG_TEST_CODE
4183 kdebug_init(unsigned int n_events
, char *filter_desc
, enum kdebug_opts opts
)
4185 assert(filter_desc
!= NULL
);
4187 if (log_leaks
&& n_events
== 0) {
4191 kdebug_trace_start(n_events
, filter_desc
, opts
);
4195 kdbg_set_typefilter_string(const char *filter_desc
)
4199 ktrace_assert_lock_held();
4201 assert(filter_desc
!= NULL
);
4203 typefilter_reject_all(kdbg_typefilter
);
4204 typefilter_allow_class(kdbg_typefilter
, DBG_TRACE
);
4206 /* if the filter description starts with a number, assume it's a csc */
4207 if (filter_desc
[0] >= '0' && filter_desc
[0] <= '9') {
4208 unsigned long csc
= strtoul(filter_desc
, NULL
, 0);
4209 if (filter_desc
!= end
&& csc
<= KDBG_CSC_MAX
) {
4210 typefilter_allow_csc(kdbg_typefilter
, (uint16_t)csc
);
4215 while (filter_desc
[0] != '\0') {
4216 unsigned long allow_value
;
4218 char filter_type
= filter_desc
[0];
4219 if (filter_type
!= 'C' && filter_type
!= 'S') {
4220 printf("kdebug: unexpected filter type `%c'\n", filter_type
);
4225 allow_value
= strtoul(filter_desc
, &end
, 0);
4226 if (filter_desc
== end
) {
4227 printf("kdebug: cannot parse `%s' as integer\n", filter_desc
);
4231 switch (filter_type
) {
4233 if (allow_value
> KDBG_CLASS_MAX
) {
4234 printf("kdebug: class 0x%lx is invalid\n", allow_value
);
4237 printf("kdebug: C 0x%lx\n", allow_value
);
4238 typefilter_allow_class(kdbg_typefilter
, (uint8_t)allow_value
);
4241 if (allow_value
> KDBG_CSC_MAX
) {
4242 printf("kdebug: class-subclass 0x%lx is invalid\n", allow_value
);
4245 printf("kdebug: S 0x%lx\n", allow_value
);
4246 typefilter_allow_csc(kdbg_typefilter
, (uint16_t)allow_value
);
4249 __builtin_unreachable();
4252 /* advance to next filter entry */
4254 if (filter_desc
[0] == ',') {
4263 if (!wake_nkdbufs
) {
4266 uint64_t start
= mach_absolute_time();
4267 kdebug_trace_start(wake_nkdbufs
, NULL
, trace_wrap
? KDOPT_WRAPPING
: 0);
4268 return mach_absolute_time() - start
;
4272 * This function is meant to be called from the bootstrap thread or kdebug_wake.
4275 kdebug_trace_start(unsigned int n_events
, const char *filter_desc
,
4276 enum kdebug_opts opts
)
4279 kd_early_done
= true;
4283 ktrace_start_single_threaded();
4287 ktrace_kernel_configure(KTRACE_KDEBUG
);
4289 kdbg_set_nkdbufs(n_events
);
4291 kernel_debug_string_early("start_kern_tracing");
4293 if (kdbg_reinit((opts
& KDOPT_ATBOOT
))) {
4294 printf("error from kdbg_reinit, kernel tracing not started\n");
4299 * Wrapping is disabled because boot and wake tracing is interested in
4300 * the earliest events, at the expense of later ones.
4302 if (!(opts
& KDOPT_WRAPPING
)) {
4303 uint32_t old1
, old2
;
4304 (void)disable_wrap(&old1
, &old2
);
4307 if (filter_desc
&& filter_desc
[0] != '\0') {
4308 if (kdbg_initialize_typefilter(NULL
) == KERN_SUCCESS
) {
4309 kdbg_set_typefilter_string(filter_desc
);
4310 kdbg_enable_typefilter();
4315 * Hold off interrupts between getting a thread map and enabling trace
4316 * and until the early traces are recorded.
4318 bool s
= ml_set_interrupts_enabled(false);
4320 if (!(opts
& KDOPT_ATBOOT
)) {
4324 kdbg_set_tracing_enabled(true, KDEBUG_ENABLE_TRACE
);
4326 if ((opts
& KDOPT_ATBOOT
)) {
4328 * Transfer all very early events from the static buffer into the real
4331 kernel_debug_early_end();
4334 ml_set_interrupts_enabled(s
);
4336 printf("kernel tracing started with %u events, filter = %s\n", n_events
,
4337 filter_desc
?: "none");
4340 ktrace_end_single_threaded();
4344 kdbg_dump_trace_to_file(const char *filename
)
4353 if (!(kdebug_enable
& KDEBUG_ENABLE_TRACE
)) {
4357 if (ktrace_get_owning_pid() != 0) {
4359 * Another process owns ktrace and is still active, disable tracing to
4363 kd_ctrl_page
.enabled
= 0;
4364 commpage_update_kdebug_state();
4368 KDBG_RELEASE(TRACE_WRITING_EVENTS
| DBG_FUNC_START
);
4371 kd_ctrl_page
.enabled
= 0;
4372 commpage_update_kdebug_state();
4374 ctx
= vfs_context_kernel();
4376 if (vnode_open(filename
, (O_CREAT
| FWRITE
| O_NOFOLLOW
), 0600, 0, &vp
, ctx
)) {
4380 kdbg_write_thread_map(vp
, ctx
);
4382 write_size
= nkdbufs
* sizeof(kd_buf
);
4383 ret
= kdbg_read(0, &write_size
, vp
, ctx
, RAW_VERSION1
);
4389 * Wait to synchronize the file to capture the I/O in the
4390 * TRACE_WRITING_EVENTS interval.
4392 ret
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
);
4395 * Balance the starting TRACE_WRITING_EVENTS tracepoint manually.
4397 kd_buf end_event
= {
4398 .debugid
= TRACE_WRITING_EVENTS
| DBG_FUNC_END
,
4401 .arg5
= (kd_buf_argtype
)thread_tid(current_thread()),
4403 kdbg_set_timestamp_and_cpu(&end_event
, kdbg_timestamp(),
4406 /* this is best effort -- ignore any errors */
4407 (void)kdbg_write_to_vnode((caddr_t
)&end_event
, sizeof(kd_buf
), vp
, ctx
,
4411 vnode_close(vp
, FWRITE
, ctx
);
4412 sync(current_proc(), (void *)NULL
, (int *)NULL
);
4419 kdbg_sysctl_continuous SYSCTL_HANDLER_ARGS
4421 #pragma unused(oidp, arg1, arg2)
4422 int value
= kdbg_continuous_time
;
4423 int ret
= sysctl_io_number(req
, value
, sizeof(value
), &value
, NULL
);
4425 if (ret
|| !req
->newptr
) {
4429 kdbg_continuous_time
= value
;
4433 SYSCTL_NODE(_kern
, OID_AUTO
, kdbg
, CTLFLAG_RD
| CTLFLAG_LOCKED
, 0,
4436 SYSCTL_PROC(_kern_kdbg
, OID_AUTO
, experimental_continuous
,
4437 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, 0,
4438 sizeof(int), kdbg_sysctl_continuous
, "I",
4439 "Set kdebug to use mach_continuous_time");
4441 SYSCTL_INT(_kern_kdbg
, OID_AUTO
, debug
,
4442 CTLFLAG_RW
| CTLFLAG_LOCKED
,
4443 &kdbg_debug
, 0, "Set kdebug debug mode");
4445 SYSCTL_QUAD(_kern_kdbg
, OID_AUTO
, oldest_time
,
4446 CTLTYPE_QUAD
| CTLFLAG_RD
| CTLFLAG_LOCKED
,
4447 &kd_ctrl_page
.oldest_time
,
4448 "Find the oldest timestamp still in trace");