2 * Copyright (c) 2015 Apple Inc. All rights reserved.
4 * @APPLE_APACHE_LICENSE_HEADER_START@
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 * @APPLE_APACHE_LICENSE_HEADER_END@
21 #include <mach/vm_statistics.h> // VM_MEMORY_GENEALOGY
24 #define OS_VOUCHER_ACTIVITY_SPI_TYPES 1
25 #define OS_FIREHOSE_SPI 1
26 #define __OS_EXPOSE_INTERNALS_INDIRECT__ 1
28 #define DISPATCH_PURE_C 1
29 #define _safe_cast_to_long(x) \
30 ({ _Static_assert(sizeof(typeof(x)) <= sizeof(long), \
31 "__builtin_expect doesn't support types wider than long"); \
33 #define fastpath(x) ((typeof(x))__builtin_expect(_safe_cast_to_long(x), ~0l))
34 #define slowpath(x) ((typeof(x))__builtin_expect(_safe_cast_to_long(x), 0l))
35 #define os_likely(x) __builtin_expect(!!(x), 1)
36 #define os_unlikely(x) __builtin_expect(!!(x), 0)
37 #define likely(x) __builtin_expect(!!(x), 1)
38 #define unlikely(x) __builtin_expect(!!(x), 0)
40 #define DISPATCH_INTERNAL_CRASH(ac, msg) ({ panic(msg); __builtin_trap(); })
42 #if defined(__x86_64__) || defined(__i386__)
43 #define dispatch_hardware_pause() __asm__("pause")
44 #elif (defined(__arm__) && defined(_ARM_ARCH_7) && defined(__thumb__)) || \
46 #define dispatch_hardware_pause() __asm__("yield")
47 #define dispatch_hardware_wfe() __asm__("wfe")
49 #define dispatch_hardware_pause() __asm__("")
52 #define _dispatch_wait_until(c) do { \
53 while (!fastpath(c)) { \
54 dispatch_hardware_pause(); \
56 #define dispatch_compiler_barrier() __asm__ __volatile__("" ::: "memory")
58 typedef uint32_t dispatch_lock
;
59 typedef struct dispatch_gate_s
{
60 dispatch_lock dgl_lock
;
61 } dispatch_gate_s
, *dispatch_gate_t
;
62 #define DLOCK_LOCK_DATA_CONTENTION 0
63 static void _dispatch_gate_wait(dispatch_gate_t l
, uint32_t flags
);
65 #include <kern/debug.h>
66 #include <machine/cpu_number.h>
67 #include <kern/thread.h>
68 #include <mach/port.h>
71 #include <sys/param.h>
72 #include <sys/types.h>
73 #include <vm/vm_kern.h>
74 #include <firehose_types_private.h> // <firehose/firehose_types_private.h>
75 #include <tracepoint_private.h> // <firehose/tracepoint_private.h>
76 #include <internal/atomic.h> // os/internal/atomic.h
77 #include "os/firehose_buffer_private.h"
78 #include "firehose_buffer_internal.h"
79 #include "firehose_inline_internal.h"
82 #include "firehose.h" // MiG
83 #include "firehose_replyServer.h" // MiG
88 #if __has_feature(c_static_assert)
89 _Static_assert(sizeof(((firehose_stream_state_u
*)NULL
)->fss_gate
) ==
90 sizeof(((firehose_stream_state_u
*)NULL
)->fss_allocator
),
91 "fss_gate and fss_allocator alias");
92 _Static_assert(offsetof(firehose_stream_state_u
, fss_gate
) ==
93 offsetof(firehose_stream_state_u
, fss_allocator
),
94 "fss_gate and fss_allocator alias");
95 _Static_assert(sizeof(struct firehose_buffer_header_s
) ==
96 FIREHOSE_BUFFER_CHUNK_SIZE
,
97 "firehose buffer header must be 4k");
98 _Static_assert(offsetof(struct firehose_buffer_header_s
, fbh_unused
) <=
99 FIREHOSE_BUFFER_CHUNK_SIZE
- FIREHOSE_BUFFER_LIBTRACE_HEADER_SIZE
,
100 "we must have enough space for the libtrace header");
101 _Static_assert(sizeof(struct firehose_buffer_chunk_s
) ==
102 FIREHOSE_BUFFER_CHUNK_SIZE
,
103 "firehose buffer chunks must be 4k");
104 _Static_assert(powerof2(FIREHOSE_BUFFER_CHUNK_COUNT
),
105 "CHUNK_COUNT Must be a power of two");
106 _Static_assert(FIREHOSE_BUFFER_CHUNK_COUNT
<= 64,
107 "CHUNK_COUNT must be less than 64 (bitmap in uint64_t)");
108 #ifdef FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT
109 _Static_assert(powerof2(FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT
),
110 "madvise chunk count must be a power of two");
112 _Static_assert(howmany(sizeof(struct firehose_tracepoint_s
),
113 sizeof(struct firehose_buffer_chunk_s
)) < 255,
114 "refcount assumes that you cannot have more than 255 tracepoints");
115 // FIXME: we should have an event-count instead here
116 _Static_assert(sizeof(struct firehose_buffer_stream_s
) == 128,
117 "firehose buffer stream must be small (single cacheline if possible)");
118 _Static_assert(offsetof(struct firehose_buffer_chunk_s
, fbc_data
) % 8 == 0,
119 "Page header is 8 byte aligned");
120 _Static_assert(sizeof(struct firehose_tracepoint_s
) == 24,
121 "tracepoint header should be exactly 24 bytes");
125 static firehose_buffer_t kernel_firehose_buffer
= NULL
;
129 #pragma mark Client IPC to the log daemon
133 firehose_client_reconnect(firehose_buffer_t fb
, mach_port_t oldsendp
)
135 mach_port_t sendp
= MACH_PORT_NULL
;
136 mach_port_t mem_port
= MACH_PORT_NULL
, extra_info_port
= MACH_PORT_NULL
;
137 mach_vm_size_t extra_info_size
= 0;
140 dispatch_assert(fb
->fb_header
.fbh_logd_port
);
141 dispatch_assert(fb
->fb_header
.fbh_recvp
);
142 dispatch_assert(fb
->fb_header
.fbh_uniquepid
!= 0);
144 _dispatch_unfair_lock_lock(&fb
->fb_header
.fbh_logd_lock
);
145 sendp
= fb
->fb_header
.fbh_sendp
;
146 if (sendp
!= oldsendp
|| sendp
== MACH_PORT_DEAD
) {
147 // someone beat us to reconnecting or logd was unloaded, just go away
152 // same trick as _xpc_pipe_dispose: keeping a send right
153 // maintains the name, so that we can destroy the receive right
154 // in case we still have it.
155 (void)firehose_mach_port_recv_dispose(oldsendp
, fb
);
156 firehose_mach_port_send_release(oldsendp
);
157 fb
->fb_header
.fbh_sendp
= MACH_PORT_NULL
;
160 /* Create a memory port for the buffer VM region */
161 vm_prot_t flags
= VM_PROT_READ
| MAP_MEM_VM_SHARE
;
162 memory_object_size_t size
= sizeof(union firehose_buffer_u
);
163 mach_vm_address_t addr
= (vm_address_t
)fb
;
165 kr
= mach_make_memory_entry_64(mach_task_self(), &size
, addr
,
166 flags
, &mem_port
, MACH_PORT_NULL
);
167 if (size
< sizeof(union firehose_buffer_u
)) {
168 DISPATCH_CLIENT_CRASH(size
, "Invalid size for the firehose buffer");
171 // the client probably has some form of memory corruption
172 // and/or a port leak
173 DISPATCH_CLIENT_CRASH(kr
, "Unable to make memory port");
176 /* Create a communication port to the logging daemon */
177 uint32_t opts
= MPO_CONTEXT_AS_GUARD
| MPO_TEMPOWNER
| MPO_INSERT_SEND_RIGHT
;
178 sendp
= firehose_mach_port_allocate(opts
, fb
);
180 if (oldsendp
&& _voucher_libtrace_hooks
->vah_version
>= 3) {
181 if (_voucher_libtrace_hooks
->vah_get_reconnect_info
) {
182 kr
= _voucher_libtrace_hooks
->vah_get_reconnect_info(&addr
, &size
);
183 if (likely(kr
== KERN_SUCCESS
) && addr
&& size
) {
184 extra_info_size
= size
;
185 kr
= mach_make_memory_entry_64(mach_task_self(), &size
, addr
,
186 flags
, &extra_info_port
, MACH_PORT_NULL
);
188 // the client probably has some form of memory corruption
189 // and/or a port leak
190 DISPATCH_CLIENT_CRASH(kr
, "Unable to make memory port");
192 kr
= mach_vm_deallocate(mach_task_self(), addr
, size
);
193 (void)dispatch_assume_zero(kr
);
198 /* Call the firehose_register() MIG routine */
199 kr
= firehose_send_register(fb
->fb_header
.fbh_logd_port
, mem_port
,
200 sizeof(union firehose_buffer_u
), sendp
, fb
->fb_header
.fbh_recvp
,
201 extra_info_port
, extra_info_size
);
202 if (likely(kr
== KERN_SUCCESS
)) {
203 fb
->fb_header
.fbh_sendp
= sendp
;
204 } else if (unlikely(kr
== MACH_SEND_INVALID_DEST
)) {
205 // MACH_SEND_INVALID_DEST here means that logd's boostrap port
206 // turned into a dead name, which in turn means that logd has been
207 // unloaded. The only option here, is to give up permanently.
209 // same trick as _xpc_pipe_dispose: keeping a send right
210 // maintains the name, so that we can destroy the receive right
211 // in case we still have it.
212 (void)firehose_mach_port_recv_dispose(sendp
, fb
);
213 firehose_mach_port_send_release(sendp
);
214 firehose_mach_port_send_release(mem_port
);
215 if (extra_info_port
) firehose_mach_port_send_release(extra_info_port
);
216 sendp
= fb
->fb_header
.fbh_sendp
= MACH_PORT_DEAD
;
218 // the client probably has some form of memory corruption
219 // and/or a port leak
220 DISPATCH_CLIENT_CRASH(kr
, "Unable to register with logd");
224 _dispatch_unfair_lock_unlock(&fb
->fb_header
.fbh_logd_lock
);
229 firehose_buffer_update_limits_unlocked(firehose_buffer_t fb
)
231 firehose_bank_state_u old
, new;
232 firehose_buffer_bank_t fbb
= &fb
->fb_header
.fbh_bank
;
233 unsigned long fbb_flags
= fbb
->fbb_flags
;
234 uint16_t io_streams
= 0, mem_streams
= 0;
237 for (size_t i
= 0; i
< countof(fb
->fb_header
.fbh_stream
); i
++) {
238 firehose_buffer_stream_t fbs
= fb
->fb_header
.fbh_stream
+ i
;
240 if (fbs
->fbs_state
.fss_current
== FIREHOSE_STREAM_STATE_PRISTINE
) {
243 if ((1UL << i
) & firehose_stream_uses_io_bank
) {
250 if (fbb_flags
& FIREHOSE_BUFFER_BANK_FLAG_LOW_MEMORY
) {
251 if (fbb_flags
& FIREHOSE_BUFFER_BANK_FLAG_HIGH_RATE
) {
252 total
= 1 + 4 * mem_streams
+ io_streams
; // usually 10
254 total
= 1 + 2 + mem_streams
+ io_streams
; // usually 6
257 if (fbb_flags
& FIREHOSE_BUFFER_BANK_FLAG_HIGH_RATE
) {
258 total
= 1 + 6 * mem_streams
+ 3 * io_streams
; // usually 16
260 total
= 1 + 2 * (mem_streams
+ io_streams
); // usually 7
264 uint16_t ratio
= (uint16_t)(PAGE_SIZE
/ FIREHOSE_BUFFER_CHUNK_SIZE
);
266 total
= roundup(total
, ratio
);
268 total
= MAX(total
, FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT
);
269 if (!(fbb_flags
& FIREHOSE_BUFFER_BANK_FLAG_LOW_MEMORY
)) {
270 total
= MAX(total
, TARGET_OS_EMBEDDED
? 8 : 12);
273 new.fbs_max_ref
= total
;
274 new.fbs_mem_bank
= FIREHOSE_BANK_UNAVAIL_BIT
- (total
- 1);
275 new.fbs_io_bank
= FIREHOSE_BANK_UNAVAIL_BIT
-
276 MAX(3 * total
/ 8, 2 * io_streams
);
279 old
= fbb
->fbb_limits
;
280 fbb
->fbb_limits
= new;
281 if (old
.fbs_atomic_state
== new.fbs_atomic_state
) {
284 os_atomic_add2o(&fb
->fb_header
, fbh_bank
.fbb_state
.fbs_atomic_state
,
285 new.fbs_atomic_state
- old
.fbs_atomic_state
, relaxed
);
290 firehose_buffer_create(mach_port_t logd_port
, uint64_t unique_pid
,
291 unsigned long bank_flags
)
293 firehose_buffer_header_t fbh
;
294 firehose_buffer_t fb
;
297 mach_vm_address_t vm_addr
= 0;
300 vm_addr
= vm_page_size
;
301 const size_t madvise_bytes
= FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT
*
302 FIREHOSE_BUFFER_CHUNK_SIZE
;
303 if (slowpath(madvise_bytes
% PAGE_SIZE
)) {
304 DISPATCH_INTERNAL_CRASH(madvise_bytes
,
305 "Invalid values for MADVISE_CHUNK_COUNT / CHUNK_SIZE");
308 kr
= mach_vm_map(mach_task_self(), &vm_addr
, sizeof(*fb
), 0,
309 VM_FLAGS_ANYWHERE
| VM_FLAGS_PURGABLE
|
310 VM_MAKE_TAG(VM_MEMORY_GENEALOGY
), MEMORY_OBJECT_NULL
, 0, FALSE
,
311 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_NONE
);
313 if (kr
!= KERN_NO_SPACE
) dispatch_assume_zero(kr
);
314 firehose_mach_port_send_release(logd_port
);
318 uint32_t opts
= MPO_CONTEXT_AS_GUARD
| MPO_STRICT
| MPO_INSERT_SEND_RIGHT
;
320 vm_offset_t vm_addr
= 0;
323 size
= FIREHOSE_BUFFER_KERNEL_CHUNK_COUNT
* FIREHOSE_BUFFER_CHUNK_SIZE
;
324 __firehose_allocate(&vm_addr
, size
);
326 (void)logd_port
; (void)unique_pid
;
329 fb
= (firehose_buffer_t
)vm_addr
;
330 fbh
= &fb
->fb_header
;
332 fbh
->fbh_logd_port
= logd_port
;
333 fbh
->fbh_pid
= getpid();
334 fbh
->fbh_uniquepid
= unique_pid
;
335 fbh
->fbh_recvp
= firehose_mach_port_allocate(opts
, fb
);
337 fbh
->fbh_spi_version
= OS_FIREHOSE_SPI_VERSION
;
338 fbh
->fbh_bank
.fbb_flags
= bank_flags
;
341 for (size_t i
= 0; i
< countof(fbh
->fbh_stream
); i
++) {
342 firehose_buffer_stream_t fbs
= fbh
->fbh_stream
+ i
;
343 if (i
!= firehose_stream_metadata
) {
344 fbs
->fbs_state
.fss_current
= FIREHOSE_STREAM_STATE_PRISTINE
;
347 firehose_buffer_update_limits_unlocked(fb
);
349 uint16_t total
= FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT
+ 1;
350 const uint16_t num_kernel_io_pages
= 8;
351 uint16_t io_pages
= num_kernel_io_pages
;
352 fbh
->fbh_bank
.fbb_state
= (firehose_bank_state_u
){
353 .fbs_max_ref
= total
,
354 .fbs_io_bank
= FIREHOSE_BANK_UNAVAIL_BIT
- io_pages
,
355 .fbs_mem_bank
= FIREHOSE_BANK_UNAVAIL_BIT
- (total
- io_pages
- 1),
357 fbh
->fbh_bank
.fbb_limits
= fbh
->fbh_bank
.fbb_state
;
360 // now pre-allocate some chunks in the ring directly
362 const uint16_t pre_allocated
= FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT
- 1;
364 const uint16_t pre_allocated
= FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT
;
367 fbh
->fbh_bank
.fbb_bitmap
= (1U << (1 + pre_allocated
)) - 1;
369 for (uint16_t i
= 0; i
< pre_allocated
; i
++) {
370 fbh
->fbh_mem_ring
[i
] = i
+ 1;
372 fbh
->fbh_bank
.fbb_mem_flushed
= pre_allocated
;
373 fbh
->fbh_ring_mem_head
= pre_allocated
;
377 // install the early boot page as the current one for persist
378 fbh
->fbh_stream
[firehose_stream_persist
].fbs_state
.fss_current
=
379 FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT
;
380 fbh
->fbh_bank
.fbb_state
.fbs_io_bank
+= 1;
383 fbh
->fbh_ring_tail
= (firehose_ring_tail_u
){
384 .frp_mem_flushed
= pre_allocated
,
391 firehose_notify_source_invoke(mach_msg_header_t
*hdr
)
393 const size_t reply_size
=
394 sizeof(union __ReplyUnion__firehose_client_firehoseReply_subsystem
);
396 firehose_mig_server(firehoseReply_server
, reply_size
, hdr
);
400 firehose_client_register_for_notifications(firehose_buffer_t fb
)
402 static const struct dispatch_continuation_s dc
= {
403 .dc_func
= (void *)firehose_notify_source_invoke
,
405 firehose_buffer_header_t fbh
= &fb
->fb_header
;
407 dispatch_once(&fbh
->fbh_notifs_pred
, ^{
408 dispatch_source_t ds
= _dispatch_source_create_mach_msg_direct_recv(
409 fbh
->fbh_recvp
, &dc
);
410 dispatch_set_context(ds
, fb
);
411 dispatch_activate(ds
);
412 fbh
->fbh_notifs_source
= ds
;
417 firehose_client_send_push_async(firehose_buffer_t fb
, qos_class_t qos
,
420 bool ask_for_notifs
= fb
->fb_header
.fbh_notifs_source
!= NULL
;
421 mach_port_t sendp
= fb
->fb_header
.fbh_sendp
;
422 kern_return_t kr
= KERN_FAILURE
;
424 if (!ask_for_notifs
&& _dispatch_is_multithreaded_inline()) {
425 firehose_client_register_for_notifications(fb
);
426 ask_for_notifs
= true;
429 if (slowpath(sendp
== MACH_PORT_DEAD
)) {
433 if (fastpath(sendp
)) {
434 kr
= firehose_send_push_async(sendp
, qos
, for_io
, ask_for_notifs
);
435 if (likely(kr
== KERN_SUCCESS
)) {
438 if (kr
!= MACH_SEND_INVALID_DEST
) {
439 DISPATCH_VERIFY_MIG(kr
);
440 dispatch_assume_zero(kr
);
444 sendp
= firehose_client_reconnect(fb
, sendp
);
445 if (fastpath(MACH_PORT_VALID(sendp
))) {
446 kr
= firehose_send_push_async(sendp
, qos
, for_io
, ask_for_notifs
);
447 if (likely(kr
== KERN_SUCCESS
)) {
450 if (kr
!= MACH_SEND_INVALID_DEST
) {
451 DISPATCH_VERIFY_MIG(kr
);
452 dispatch_assume_zero(kr
);
459 firehose_client_merge_updates(firehose_buffer_t fb
, bool async_notif
,
460 firehose_push_reply_t reply
, firehose_bank_state_u
*state_out
)
462 firehose_bank_state_u state
;
463 firehose_ring_tail_u otail
, ntail
;
464 uint64_t old_flushed_pos
, bank_updates
;
465 uint16_t io_delta
= 0;
466 uint16_t mem_delta
= 0;
468 if (firehose_atomic_maxv2o(&fb
->fb_header
, fbh_bank
.fbb_mem_flushed
,
469 reply
.fpr_mem_flushed_pos
, &old_flushed_pos
, relaxed
)) {
470 mem_delta
= (uint16_t)(reply
.fpr_mem_flushed_pos
- old_flushed_pos
);
472 if (firehose_atomic_maxv2o(&fb
->fb_header
, fbh_bank
.fbb_io_flushed
,
473 reply
.fpr_io_flushed_pos
, &old_flushed_pos
, relaxed
)) {
474 io_delta
= (uint16_t)(reply
.fpr_io_flushed_pos
- old_flushed_pos
);
477 _dispatch_debug("client side: mem: +%d->%llx, io: +%d->%llx",
478 mem_delta
, reply
.fpr_mem_flushed_pos
,
479 io_delta
, reply
.fpr_io_flushed_pos
);
482 if (!mem_delta
&& !io_delta
) {
484 state_out
->fbs_atomic_state
= os_atomic_load2o(&fb
->fb_header
,
485 fbh_bank
.fbb_state
.fbs_atomic_state
, relaxed
);
490 bank_updates
= ((uint64_t)mem_delta
<< FIREHOSE_BANK_SHIFT(0)) |
491 ((uint64_t)io_delta
<< FIREHOSE_BANK_SHIFT(1));
492 state
.fbs_atomic_state
= os_atomic_sub2o(&fb
->fb_header
,
493 fbh_bank
.fbb_state
.fbs_atomic_state
, bank_updates
, relaxed
);
494 if (state_out
) *state_out
= state
;
496 os_atomic_rmw_loop2o(&fb
->fb_header
, fbh_ring_tail
.frp_atomic_tail
,
497 otail
.frp_atomic_tail
, ntail
.frp_atomic_tail
, relaxed
, {
499 // overflow handles the generation wraps
500 ntail
.frp_io_flushed
+= io_delta
;
501 ntail
.frp_mem_flushed
+= mem_delta
;
505 os_atomic_inc2o(&fb
->fb_header
, fbh_bank
.fbb_io_notifs
, relaxed
);
508 os_atomic_inc2o(&fb
->fb_header
, fbh_bank
.fbb_mem_notifs
, relaxed
);
515 firehose_client_send_push(firehose_buffer_t fb
, bool for_io
,
516 firehose_bank_state_u
*state_out
)
518 mach_port_t sendp
= fb
->fb_header
.fbh_sendp
;
519 firehose_push_reply_t push_reply
= { };
520 qos_class_t qos
= qos_class_self();
523 if (slowpath(sendp
== MACH_PORT_DEAD
)) {
526 if (fastpath(sendp
)) {
527 kr
= firehose_send_push(sendp
, qos
, for_io
, &push_reply
);
528 if (likely(kr
== KERN_SUCCESS
)) {
531 if (kr
!= MACH_SEND_INVALID_DEST
) {
532 DISPATCH_VERIFY_MIG(kr
);
533 dispatch_assume_zero(kr
);
537 sendp
= firehose_client_reconnect(fb
, sendp
);
538 if (fastpath(MACH_PORT_VALID(sendp
))) {
539 kr
= firehose_send_push(sendp
, qos
, for_io
, &push_reply
);
540 if (likely(kr
== KERN_SUCCESS
)) {
543 if (kr
!= MACH_SEND_INVALID_DEST
) {
544 DISPATCH_VERIFY_MIG(kr
);
545 dispatch_assume_zero(kr
);
550 state_out
->fbs_atomic_state
= os_atomic_load2o(&fb
->fb_header
,
551 fbh_bank
.fbb_state
.fbs_atomic_state
, relaxed
);
556 if (memcmp(&push_reply
, &FIREHOSE_PUSH_REPLY_CORRUPTED
,
557 sizeof(push_reply
)) == 0) {
558 // TODO: find out the actual cause and log it
559 DISPATCH_CLIENT_CRASH(0, "Memory corruption in the logging buffers");
563 os_atomic_inc2o(&fb
->fb_header
, fbh_bank
.fbb_io_sync_pushes
, relaxed
);
565 os_atomic_inc2o(&fb
->fb_header
, fbh_bank
.fbb_mem_sync_pushes
, relaxed
);
567 // TODO <rdar://problem/22963876>
569 // use fbb_*_flushes and fbb_*_sync_pushes to decide to dynamically
570 // allow using more buffers, if not under memory pressure.
572 // There only is a point for multithreaded clients if:
573 // - enough samples (total_flushes above some limits)
574 // - the ratio is really bad (a push per cycle is definitely a problem)
575 return firehose_client_merge_updates(fb
, false, push_reply
, state_out
);
579 firehose_client_push_reply(mach_port_t req_port OS_UNUSED
,
580 kern_return_t rtc
, firehose_push_reply_t push_reply OS_UNUSED
)
582 DISPATCH_INTERNAL_CRASH(rtc
, "firehose_push_reply should never be sent "
583 "to the buffer receive port");
587 firehose_client_push_notify_async(mach_port_t server_port OS_UNUSED
,
588 firehose_push_reply_t push_reply
)
590 // see _dispatch_source_merge_mach_msg_direct
591 dispatch_queue_t dq
= _dispatch_queue_get_current();
592 firehose_buffer_t fb
= dispatch_get_context(dq
);
593 firehose_client_merge_updates(fb
, true, push_reply
, NULL
);
599 #pragma mark Buffer handling
603 firehose_buffer_update_limits(firehose_buffer_t fb
)
605 dispatch_unfair_lock_t fbb_lock
= &fb
->fb_header
.fbh_bank
.fbb_lock
;
606 _dispatch_unfair_lock_lock(fbb_lock
);
607 firehose_buffer_update_limits_unlocked(fb
);
608 _dispatch_unfair_lock_unlock(fbb_lock
);
613 static inline firehose_tracepoint_t
614 firehose_buffer_chunk_init(firehose_buffer_chunk_t fbc
,
615 firehose_tracepoint_query_t ask
, uint8_t **privptr
)
617 const uint16_t ft_size
= offsetof(struct firehose_tracepoint_s
, ft_data
);
619 uint16_t pub_offs
= offsetof(struct firehose_buffer_chunk_s
, fbc_data
);
620 uint16_t priv_offs
= FIREHOSE_BUFFER_CHUNK_SIZE
;
622 pub_offs
+= roundup(ft_size
+ ask
->pubsize
, 8);
623 priv_offs
-= ask
->privsize
;
625 if (fbc
->fbc_pos
.fbc_atomic_pos
) {
626 // Needed for process death handling (recycle-reuse):
627 // No atomic fences required, we merely want to make sure the observers
628 // will see memory effects in program (asm) order.
629 // 1. the payload part of the chunk is cleared completely
630 // 2. the chunk is marked as reused
631 // This ensures that if we don't see a reference to a chunk in the ring
632 // and it is dirty, when crawling the chunk, we don't see remnants of
635 // We only do that when the fbc_pos is non zero, because zero means
636 // we just faulted the chunk, and the kernel already bzero-ed it.
637 bzero(fbc
->fbc_data
, sizeof(fbc
->fbc_data
));
639 dispatch_compiler_barrier();
640 // <rdar://problem/23562733> boot starts mach absolute time at 0, and
641 // wrapping around to values above UINT64_MAX - FIREHOSE_STAMP_SLOP
642 // breaks firehose_buffer_stream_flush() assumptions
643 if (ask
->stamp
> FIREHOSE_STAMP_SLOP
) {
644 fbc
->fbc_timestamp
= ask
->stamp
- FIREHOSE_STAMP_SLOP
;
646 fbc
->fbc_timestamp
= 0;
648 fbc
->fbc_pos
= (firehose_buffer_pos_u
){
649 .fbc_next_entry_offs
= pub_offs
,
650 .fbc_private_offs
= priv_offs
,
652 .fbc_qos_bits
= firehose_buffer_qos_bits_propagate(),
653 .fbc_stream
= ask
->stream
,
654 .fbc_flag_io
= ask
->for_io
,
658 *privptr
= fbc
->fbc_start
+ priv_offs
;
660 return (firehose_tracepoint_t
)fbc
->fbc_data
;
664 static firehose_tracepoint_t
665 firehose_buffer_stream_chunk_install(firehose_buffer_t fb
,
666 firehose_tracepoint_query_t ask
, uint8_t **privptr
, uint16_t ref
)
668 firehose_stream_state_u state
, new_state
;
669 firehose_tracepoint_t ft
;
670 firehose_buffer_stream_t fbs
= &fb
->fb_header
.fbh_stream
[ask
->stream
];
671 uint64_t stamp_and_len
;
674 firehose_buffer_chunk_t fbc
= firehose_buffer_ref_to_chunk(fb
, ref
);
675 ft
= firehose_buffer_chunk_init(fbc
, ask
, privptr
);
676 // Needed for process death handling (tracepoint-begin):
677 // write the length before making the chunk visible
678 stamp_and_len
= ask
->stamp
- fbc
->fbc_timestamp
;
679 stamp_and_len
|= (uint64_t)ask
->pubsize
<< 48;
680 os_atomic_store2o(ft
, ft_stamp_and_length
, stamp_and_len
, relaxed
);
682 if (ask
->stream
== firehose_stream_metadata
) {
683 os_atomic_or2o(fb
, fb_header
.fbh_bank
.fbb_metadata_bitmap
,
684 1ULL << ref
, relaxed
);
686 // release barrier to make the chunk init visible
687 os_atomic_rmw_loop2o(fbs
, fbs_state
.fss_atomic_state
,
688 state
.fss_atomic_state
, new_state
.fss_atomic_state
, release
, {
689 // We use a generation counter to prevent a theoretical ABA problem:
690 // a thread could try to acquire a tracepoint in a chunk, fail to
691 // do so mark it as to be pushed, enqueue it, and then be preempted
693 // It sleeps for a long time, and then tries to acquire the
694 // allocator bit and uninstalling the chunk. Succeeds in doing so,
695 // but because the chunk actually happened to have cycled all the
696 // way back to being installed. That thread would effectively hide
697 // that unflushed chunk and leak it.
699 // Having a generation counter prevents the uninstallation of the
700 // chunk to spuriously succeed when it was a re-incarnation of it.
701 new_state
= (firehose_stream_state_u
){
703 .fss_generation
= state
.fss_generation
+ 1,
707 // the allocator gave up just clear the allocator + waiter bits
708 firehose_stream_state_u mask
= { .fss_allocator
= ~0u, };
709 state
.fss_atomic_state
= os_atomic_and_orig2o(fbs
,
710 fbs_state
.fss_atomic_state
, ~mask
.fss_atomic_state
, relaxed
);
715 if (unlikely(state
.fss_gate
.dgl_lock
!= _dispatch_tid_self())) {
716 _dispatch_gate_broadcast_slow(&fbs
->fbs_state
.fss_gate
,
717 state
.fss_gate
.dgl_lock
);
720 if (unlikely(state
.fss_current
== FIREHOSE_STREAM_STATE_PRISTINE
)) {
721 firehose_buffer_update_limits(fb
);
725 // pairs with the one in firehose_buffer_tracepoint_reserve()
726 __firehose_critical_region_leave();
732 static inline uint16_t
733 firehose_buffer_ring_try_grow(firehose_buffer_bank_t fbb
, uint16_t limit
)
738 _dispatch_unfair_lock_lock(&fbb
->fbb_lock
);
739 bitmap
= ~(fbb
->fbb_bitmap
| (~0ULL << limit
));
741 ref
= firehose_bitmap_first_set(bitmap
);
742 fbb
->fbb_bitmap
|= 1U << ref
;
744 _dispatch_unfair_lock_unlock(&fbb
->fbb_lock
);
749 static inline uint16_t
750 firehose_buffer_ring_shrink(firehose_buffer_t fb
, uint16_t ref
)
752 const size_t madv_size
=
753 FIREHOSE_BUFFER_CHUNK_SIZE
* FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT
;
754 const size_t madv_mask
=
755 (1ULL << FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT
) - 1;
757 dispatch_unfair_lock_t fbb_lock
= &fb
->fb_header
.fbh_bank
.fbb_lock
;
760 _dispatch_unfair_lock_lock(fbb_lock
);
761 if (ref
< fb
->fb_header
.fbh_bank
.fbb_limits
.fbs_max_ref
) {
765 bitmap
= (fb
->fb_header
.fbh_bank
.fbb_bitmap
&= ~(1UL << ref
));
767 if ((bitmap
& (madv_mask
<< ref
)) == 0) {
768 // if MADVISE_WIDTH consecutive chunks are free, madvise them free
769 madvise(firehose_buffer_ref_to_chunk(fb
, ref
), madv_size
, MADV_FREE
);
773 _dispatch_unfair_lock_unlock(fbb_lock
);
780 firehose_buffer_ring_enqueue(firehose_buffer_t fb
, uint16_t ref
)
782 firehose_buffer_chunk_t fbc
= firehose_buffer_ref_to_chunk(fb
, ref
);
783 uint16_t volatile *fbh_ring
;
784 uint16_t volatile *fbh_ring_head
;
785 uint16_t head
, gen
, dummy
, idx
;
786 firehose_buffer_pos_u fbc_pos
= fbc
->fbc_pos
;
787 bool for_io
= fbc_pos
.fbc_flag_io
;
790 fbh_ring
= fb
->fb_header
.fbh_io_ring
;
791 fbh_ring_head
= &fb
->fb_header
.fbh_ring_io_head
;
793 fbh_ring
= fb
->fb_header
.fbh_mem_ring
;
794 fbh_ring_head
= &fb
->fb_header
.fbh_ring_mem_head
;
798 // The algorithm in the kernel is simpler:
799 // 1. reserve a write position for the head
800 // 2. store the new reference at that position
801 // Enqueuers can't starve each other that way.
803 // However, the dequeuers now have to sometimes wait for the value written
804 // in the ring to appear and have to spin, which is okay since the kernel
805 // disables preemption around these two consecutive atomic operations.
806 // See firehose_client_drain.
807 __firehose_critical_region_enter();
808 head
= os_atomic_inc_orig(fbh_ring_head
, relaxed
);
809 gen
= head
& FIREHOSE_RING_POS_GEN_MASK
;
810 idx
= head
& FIREHOSE_RING_POS_IDX_MASK
;
812 while (unlikely(!os_atomic_cmpxchgvw(&fbh_ring
[idx
], gen
, gen
| ref
, &dummy
,
814 // can only ever happen if a recycler is slow, this requires having
815 // enough cores (>5 for I/O e.g.)
816 _dispatch_wait_until(fbh_ring
[idx
] == gen
);
818 __firehose_critical_region_leave();
819 __firehose_buffer_push_to_logd(fb
, for_io
);
822 // 1. read the head position
823 // 2. cmpxchg head.gen with the (head.gen | ref) at head.idx
824 // 3. if it fails wait until either the head cursor moves,
825 // or the cell becomes free
827 // The most likely stall at (3) is because another enqueuer raced us
828 // and made the cell non empty.
830 // The alternative is to reserve the enqueue slot with an atomic inc.
831 // Then write the ref into the ring. This would be much simpler as the
832 // generation packing wouldn't be required (though setting the ring cell
833 // would still need a cmpxchg loop to avoid clobbering values of slow
836 // But then that means that flushers (logd) could be starved until that
837 // finishes, and logd cannot be held forever (that could even be a logd
838 // DoS from malicious programs). Meaning that logd would stop draining
839 // buffer queues when encountering that issue, leading the program to be
840 // stuck in firehose_client_push() apparently waiting on logd, while
841 // really it's waiting on itself. It's better for the scheduler if we
842 // make it clear that we're waiting on ourselves!
844 head
= os_atomic_load(fbh_ring_head
, relaxed
);
846 gen
= head
& FIREHOSE_RING_POS_GEN_MASK
;
847 idx
= head
& FIREHOSE_RING_POS_IDX_MASK
;
849 // a thread being preempted here for GEN_MASK worth of ring rotations,
850 // it could lead to the cmpxchg succeed, and have a bogus enqueue
851 // (confused enqueuer)
852 if (fastpath(os_atomic_cmpxchgvw(&fbh_ring
[idx
], gen
, gen
| ref
, &dummy
,
854 if (fastpath(os_atomic_cmpxchgv(fbh_ring_head
, head
, head
+ 1,
856 __firehose_critical_region_leave();
859 // this thread is a confused enqueuer, need to undo enqueue
860 os_atomic_store(&fbh_ring
[idx
], gen
, relaxed
);
864 _dispatch_wait_until(({
865 // wait until either the head moves (another enqueuer is done)
866 // or (not very likely) a recycler is very slow
867 // or (very unlikely) the confused thread undoes its enqueue
868 uint16_t old_head
= head
;
869 head
= *fbh_ring_head
;
870 head
!= old_head
|| fbh_ring
[idx
] == gen
;
874 pthread_priority_t pp
= fbc_pos
.fbc_qos_bits
;
875 pp
<<= _PTHREAD_PRIORITY_QOS_CLASS_SHIFT
;
876 firehose_client_send_push_async(fb
, _pthread_qos_class_decode(pp
, NULL
, NULL
),
882 static inline uint16_t
883 firehose_buffer_ring_try_recycle(firehose_buffer_t fb
)
885 firehose_ring_tail_u pos
, old
;
886 uint16_t volatile *fbh_ring
;
887 uint16_t gen
, ref
, entry
, tail
;
888 firehose_buffer_chunk_t fbc
;
891 os_atomic_rmw_loop2o(&fb
->fb_header
, fbh_ring_tail
.frp_atomic_tail
,
892 old
.frp_atomic_tail
, pos
.frp_atomic_tail
, relaxed
, {
894 if (fastpath(old
.frp_mem_tail
!= old
.frp_mem_flushed
)) {
896 } else if (fastpath(old
.frp_io_tail
!= old
.frp_io_flushed
)) {
899 os_atomic_rmw_loop_give_up(return 0);
903 // there's virtually no chance that the lack of acquire barrier above
904 // lets us read a value from the ring so stale that it's still an Empty
905 // marker. For correctness purposes have a cheap loop that should never
906 // really loop, instead of an acquire barrier in the cmpxchg above.
907 for_io
= (pos
.frp_io_tail
!= old
.frp_io_tail
);
909 fbh_ring
= fb
->fb_header
.fbh_io_ring
;
910 tail
= old
.frp_io_tail
& FIREHOSE_RING_POS_IDX_MASK
;
912 fbh_ring
= fb
->fb_header
.fbh_mem_ring
;
913 tail
= old
.frp_mem_tail
& FIREHOSE_RING_POS_IDX_MASK
;
915 _dispatch_wait_until((entry
= fbh_ring
[tail
]) & FIREHOSE_RING_POS_IDX_MASK
);
917 // Needed for process death handling (recycle-dequeue):
918 // No atomic fences required, we merely want to make sure the observers
919 // will see memory effects in program (asm) order.
920 // 1. the chunk is marked as "void&full" (clobbering the pos with FULL_BIT)
921 // 2. then we remove any reference to the chunk from the ring
922 // This ensures that if we don't see a reference to a chunk in the ring
923 // and it is dirty, it is a chunk being written to that needs a flush
924 gen
= (entry
& FIREHOSE_RING_POS_GEN_MASK
) + FIREHOSE_RING_POS_GEN_INC
;
925 ref
= entry
& FIREHOSE_RING_POS_IDX_MASK
;
926 fbc
= firehose_buffer_ref_to_chunk(fb
, ref
);
928 if (!for_io
&& fbc
->fbc_pos
.fbc_stream
== firehose_stream_metadata
) {
929 os_atomic_and2o(fb
, fb_header
.fbh_bank
.fbb_metadata_bitmap
,
930 ~(1ULL << ref
), relaxed
);
932 os_atomic_store2o(fbc
, fbc_pos
.fbc_atomic_pos
,
933 FIREHOSE_BUFFER_POS_FULL_BIT
, relaxed
);
934 dispatch_compiler_barrier();
935 os_atomic_store(&fbh_ring
[tail
], gen
| 0, relaxed
);
941 static firehose_tracepoint_t
942 firehose_buffer_tracepoint_reserve_slow2(firehose_buffer_t fb
,
943 firehose_tracepoint_query_t ask
, uint8_t **privptr
, uint16_t ref
)
945 const uint64_t bank_unavail_mask
= FIREHOSE_BANK_UNAVAIL_MASK(ask
->for_io
);
946 firehose_buffer_bank_t
const fbb
= &fb
->fb_header
.fbh_bank
;
947 firehose_bank_state_u state
;
948 uint16_t fbs_max_ref
;
950 // first wait for our bank to have space, if needed
951 if (!fastpath(ask
->is_bank_ok
)) {
952 state
.fbs_atomic_state
=
953 os_atomic_load2o(fbb
, fbb_state
.fbs_atomic_state
, relaxed
);
954 while (state
.fbs_atomic_state
& bank_unavail_mask
) {
955 firehose_client_send_push(fb
, ask
->for_io
, &state
);
956 if (slowpath(fb
->fb_header
.fbh_sendp
== MACH_PORT_DEAD
)) {
957 // logd was unloaded, give up
961 ask
->is_bank_ok
= true;
962 fbs_max_ref
= state
.fbs_max_ref
;
964 fbs_max_ref
= fbb
->fbb_state
.fbs_max_ref
;
967 // second, if we were passed a chunk, we may need to shrink
972 // third, wait for a chunk to come up, and if not, wait on the daemon
974 if (fastpath(ref
= firehose_buffer_ring_try_recycle(fb
))) {
976 if (slowpath(ref
>= fbs_max_ref
)) {
977 ref
= firehose_buffer_ring_shrink(fb
, ref
);
984 if (fastpath(ref
= firehose_buffer_ring_try_grow(fbb
, fbs_max_ref
))) {
987 firehose_client_send_push(fb
, ask
->for_io
, NULL
);
988 if (slowpath(fb
->fb_header
.fbh_sendp
== MACH_PORT_DEAD
)) {
989 // logd was unloaded, give up
994 return firehose_buffer_stream_chunk_install(fb
, ask
, privptr
, ref
);
997 static inline dispatch_lock
998 _dispatch_gate_lock_load_seq_cst(dispatch_gate_t l
)
1000 return os_atomic_load(&l
->dgl_lock
, seq_cst
);
1004 _dispatch_gate_wait(dispatch_gate_t l
, uint32_t flags
)
1007 _dispatch_wait_until(_dispatch_gate_lock_load_seq_cst(l
) == 0);
1011 firehose_tracepoint_t
1012 firehose_buffer_tracepoint_reserve_slow(firehose_buffer_t fb
,
1013 firehose_tracepoint_query_t ask
, uint8_t **privptr
)
1015 const unsigned for_io
= ask
->for_io
;
1016 const firehose_buffer_bank_t fbb
= &fb
->fb_header
.fbh_bank
;
1017 firehose_bank_state_u state
;
1020 uint64_t unavail_mask
= FIREHOSE_BANK_UNAVAIL_MASK(for_io
);
1022 state
.fbs_atomic_state
= os_atomic_add_orig2o(fbb
,
1023 fbb_state
.fbs_atomic_state
, FIREHOSE_BANK_INC(for_io
), relaxed
);
1024 if (fastpath(!(state
.fbs_atomic_state
& unavail_mask
))) {
1025 ask
->is_bank_ok
= true;
1026 if (fastpath(ref
= firehose_buffer_ring_try_recycle(fb
))) {
1027 if (fastpath(ref
< state
.fbs_max_ref
)) {
1028 return firehose_buffer_stream_chunk_install(fb
, ask
,
1033 return firehose_buffer_tracepoint_reserve_slow2(fb
, ask
, privptr
, ref
);
1035 firehose_bank_state_u value
;
1036 ask
->is_bank_ok
= os_atomic_rmw_loop2o(fbb
, fbb_state
.fbs_atomic_state
,
1037 state
.fbs_atomic_state
, value
.fbs_atomic_state
, relaxed
, {
1039 if (slowpath((value
.fbs_atomic_state
& unavail_mask
) != 0)) {
1040 os_atomic_rmw_loop_give_up(break);
1042 value
.fbs_atomic_state
+= FIREHOSE_BANK_INC(for_io
);
1044 if (ask
->is_bank_ok
) {
1045 ref
= firehose_buffer_ring_try_recycle(fb
);
1046 if (slowpath(ref
== 0)) {
1047 // the kernel has no overlap between I/O and memory chunks,
1048 // having an available bank slot means we should be able to recycle
1049 DISPATCH_INTERNAL_CRASH(0, "Unable to recycle a chunk");
1052 // rdar://25137005 installing `0` unlocks the allocator
1053 return firehose_buffer_stream_chunk_install(fb
, ask
, privptr
, ref
);
1058 firehose_tracepoint_t
1059 __firehose_buffer_tracepoint_reserve(uint64_t stamp
, firehose_stream_t stream
,
1060 uint16_t pubsize
, uint16_t privsize
, uint8_t **privptr
)
1062 firehose_buffer_t fb
= kernel_firehose_buffer
;
1063 if (!fastpath(fb
)) {
1066 return firehose_buffer_tracepoint_reserve(fb
, stamp
, stream
, pubsize
,
1070 firehose_tracepoint_t
1071 __firehose_buffer_tracepoint_reserve_with_chunk(firehose_buffer_chunk_t fbc
,
1072 uint64_t stamp
, firehose_stream_t stream
,
1073 uint16_t pubsize
, uint16_t privsize
, uint8_t **privptr
)
1076 firehose_tracepoint_t ft
;
1079 result
= firehose_buffer_chunk_try_reserve(fbc
, stamp
, stream
,
1080 pubsize
, privsize
, privptr
);
1081 if (fastpath(result
> 0)) {
1082 ft
= (firehose_tracepoint_t
)(fbc
->fbc_start
+ result
);
1083 stamp
-= fbc
->fbc_timestamp
;
1084 stamp
|= (uint64_t)pubsize
<< 48;
1085 // Needed for process death handling (tracepoint-begin)
1086 // see firehose_buffer_stream_chunk_install
1087 os_atomic_store2o(ft
, ft_stamp_and_length
, stamp
, relaxed
);
1088 dispatch_compiler_barrier();
1097 __firehose_buffer_create(size_t *size
)
1099 if (!kernel_firehose_buffer
) {
1100 kernel_firehose_buffer
= firehose_buffer_create(MACH_PORT_NULL
, 0, 0);
1104 *size
= FIREHOSE_BUFFER_KERNEL_CHUNK_COUNT
* FIREHOSE_BUFFER_CHUNK_SIZE
;
1106 return kernel_firehose_buffer
;
1110 __firehose_buffer_tracepoint_flush(firehose_tracepoint_t ft
,
1111 firehose_tracepoint_id_u ftid
)
1113 return firehose_buffer_tracepoint_flush(kernel_firehose_buffer
, ft
, ftid
);
1117 __firehose_buffer_tracepoint_flush_chunk(firehose_buffer_chunk_t fbc
,
1118 firehose_tracepoint_t ft
, firehose_tracepoint_id_u ftid
)
1120 firehose_buffer_pos_u pos
;
1122 // Needed for process death handling (tracepoint-flush):
1123 // We want to make sure the observers
1124 // will see memory effects in program (asm) order.
1125 // 1. write all the data to the tracepoint
1126 // 2. write the tracepoint ID, so that seeing it means the tracepoint
1128 ft
->ft_thread
= thread_tid(current_thread());
1130 // release barrier makes the log writes visible
1131 os_atomic_store2o(ft
, ft_id
.ftid_value
, ftid
.ftid_value
, release
);
1132 pos
.fbc_atomic_pos
= os_atomic_sub2o(fbc
, fbc_pos
.fbc_atomic_pos
,
1133 FIREHOSE_BUFFER_POS_REFCNT_INC
, relaxed
);
1138 __firehose_merge_updates(firehose_push_reply_t update
)
1140 firehose_buffer_t fb
= kernel_firehose_buffer
;
1142 firehose_client_merge_updates(fb
, true, update
, NULL
);
1147 #endif // OS_FIREHOSE_SPI