2 * Copyright (c) 2015 Apple Inc. All rights reserved.
4 * @APPLE_APACHE_LICENSE_HEADER_START@
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 * @APPLE_APACHE_LICENSE_HEADER_END@
21 #include <mach/vm_statistics.h> // VM_MEMORY_GENEALOGY
24 #define OS_VOUCHER_ACTIVITY_SPI_TYPES 1
25 #define OS_FIREHOSE_SPI 1
26 #define __OS_EXPOSE_INTERNALS_INDIRECT__ 1
28 #define DISPATCH_PURE_C 1
29 #define _safe_cast_to_long(x) \
30 ({ _Static_assert(sizeof(typeof(x)) <= sizeof(long), \
31 "__builtin_expect doesn't support types wider than long"); \
33 #define fastpath(x) ((typeof(x))__builtin_expect(_safe_cast_to_long(x), ~0l))
34 #define slowpath(x) ((typeof(x))__builtin_expect(_safe_cast_to_long(x), 0l))
35 #define os_likely(x) __builtin_expect(!!(x), 1)
36 #define os_unlikely(x) __builtin_expect(!!(x), 0)
37 #define likely(x) __builtin_expect(!!(x), 1)
38 #define unlikely(x) __builtin_expect(!!(x), 0)
40 #define DISPATCH_INTERNAL_CRASH(ac, msg) ({ panic(msg); __builtin_trap(); })
42 #if defined(__x86_64__) || defined(__i386__)
43 #define dispatch_hardware_pause() __asm__("pause")
44 #elif (defined(__arm__) && defined(_ARM_ARCH_7) && defined(__thumb__)) || \
46 #define dispatch_hardware_pause() __asm__("yield")
47 #define dispatch_hardware_wfe() __asm__("wfe")
49 #define dispatch_hardware_pause() __asm__("")
52 #define _dispatch_wait_until(c) do { \
53 while (!fastpath(c)) { \
54 dispatch_hardware_pause(); \
56 #define dispatch_compiler_barrier() __asm__ __volatile__("" ::: "memory")
58 typedef uint32_t dispatch_lock
;
59 typedef struct dispatch_gate_s
{
60 dispatch_lock dgl_lock
;
61 } dispatch_gate_s
, *dispatch_gate_t
;
62 #define DLOCK_LOCK_DATA_CONTENTION 0
63 static void _dispatch_gate_wait(dispatch_gate_t l
, uint32_t flags
);
65 #include <kern/debug.h>
66 #include <machine/cpu_number.h>
67 #include <kern/thread.h>
68 #include <mach/port.h>
71 #include <sys/param.h>
72 #include <sys/types.h>
73 #include <vm/vm_kern.h>
74 #include <internal/atomic.h> // os/internal/atomic.h
75 #include <firehose_types_private.h> // <firehose/firehose_types_private.h>
76 #include <tracepoint_private.h> // <firehose/tracepoint_private.h>
77 #include <chunk_private.h> // <firehose/chunk_private.h>
78 #include "os/firehose_buffer_private.h"
79 #include "firehose_buffer_internal.h"
80 #include "firehose_inline_internal.h"
83 #include "firehose.h" // MiG
84 #include "firehose_replyServer.h" // MiG
89 #if __has_feature(c_static_assert)
90 _Static_assert(sizeof(((firehose_stream_state_u
*)NULL
)->fss_gate
) ==
91 sizeof(((firehose_stream_state_u
*)NULL
)->fss_allocator
),
92 "fss_gate and fss_allocator alias");
93 _Static_assert(offsetof(firehose_stream_state_u
, fss_gate
) ==
94 offsetof(firehose_stream_state_u
, fss_allocator
),
95 "fss_gate and fss_allocator alias");
96 _Static_assert(sizeof(struct firehose_buffer_header_s
) ==
98 "firehose buffer header must be 4k");
99 _Static_assert(offsetof(struct firehose_buffer_header_s
, fbh_unused
) <=
100 FIREHOSE_CHUNK_SIZE
- FIREHOSE_BUFFER_LIBTRACE_HEADER_SIZE
,
101 "we must have enough space for the libtrace header");
102 _Static_assert(powerof2(FIREHOSE_BUFFER_CHUNK_COUNT
),
103 "CHUNK_COUNT Must be a power of two");
104 _Static_assert(FIREHOSE_BUFFER_CHUNK_COUNT
<= 64,
105 "CHUNK_COUNT must be less than 64 (bitmap in uint64_t)");
106 #ifdef FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT
107 _Static_assert(powerof2(FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT
),
108 "madvise chunk count must be a power of two");
110 _Static_assert(sizeof(struct firehose_buffer_stream_s
) == 128,
111 "firehose buffer stream must be small (single cacheline if possible)");
112 _Static_assert(sizeof(struct firehose_tracepoint_s
) == 24,
113 "tracepoint header should be exactly 24 bytes");
117 static firehose_buffer_t kernel_firehose_buffer
= NULL
;
121 #pragma mark Client IPC to the log daemon
125 firehose_client_reconnect(firehose_buffer_t fb
, mach_port_t oldsendp
)
127 mach_port_t sendp
= MACH_PORT_NULL
;
128 mach_port_t mem_port
= MACH_PORT_NULL
, extra_info_port
= MACH_PORT_NULL
;
129 mach_vm_size_t extra_info_size
= 0;
132 dispatch_assert(fb
->fb_header
.fbh_logd_port
);
133 dispatch_assert(fb
->fb_header
.fbh_recvp
);
134 dispatch_assert(fb
->fb_header
.fbh_uniquepid
!= 0);
136 _dispatch_unfair_lock_lock(&fb
->fb_header
.fbh_logd_lock
);
137 sendp
= fb
->fb_header
.fbh_sendp
;
138 if (sendp
!= oldsendp
|| sendp
== MACH_PORT_DEAD
) {
139 // someone beat us to reconnecting or logd was unloaded, just go away
144 // same trick as _xpc_pipe_dispose: keeping a send right
145 // maintains the name, so that we can destroy the receive right
146 // in case we still have it.
147 (void)firehose_mach_port_recv_dispose(oldsendp
, fb
);
148 firehose_mach_port_send_release(oldsendp
);
149 fb
->fb_header
.fbh_sendp
= MACH_PORT_NULL
;
152 /* Create a memory port for the buffer VM region */
153 vm_prot_t flags
= VM_PROT_READ
| MAP_MEM_VM_SHARE
;
154 memory_object_size_t size
= sizeof(union firehose_buffer_u
);
155 mach_vm_address_t addr
= (vm_address_t
)fb
;
157 kr
= mach_make_memory_entry_64(mach_task_self(), &size
, addr
,
158 flags
, &mem_port
, MACH_PORT_NULL
);
159 if (size
< sizeof(union firehose_buffer_u
)) {
160 DISPATCH_CLIENT_CRASH(size
, "Invalid size for the firehose buffer");
163 // the client probably has some form of memory corruption
164 // and/or a port leak
165 DISPATCH_CLIENT_CRASH(kr
, "Unable to make memory port");
168 /* Create a communication port to the logging daemon */
169 uint32_t opts
= MPO_CONTEXT_AS_GUARD
| MPO_TEMPOWNER
| MPO_INSERT_SEND_RIGHT
;
170 sendp
= firehose_mach_port_allocate(opts
, fb
);
172 if (oldsendp
&& _voucher_libtrace_hooks
->vah_get_reconnect_info
) {
173 kr
= _voucher_libtrace_hooks
->vah_get_reconnect_info(&addr
, &size
);
174 if (likely(kr
== KERN_SUCCESS
) && addr
&& size
) {
175 extra_info_size
= size
;
176 kr
= mach_make_memory_entry_64(mach_task_self(), &size
, addr
,
177 flags
, &extra_info_port
, MACH_PORT_NULL
);
179 // the client probably has some form of memory corruption
180 // and/or a port leak
181 DISPATCH_CLIENT_CRASH(kr
, "Unable to make memory port");
183 kr
= mach_vm_deallocate(mach_task_self(), addr
, size
);
184 (void)dispatch_assume_zero(kr
);
188 /* Call the firehose_register() MIG routine */
189 kr
= firehose_send_register(fb
->fb_header
.fbh_logd_port
, mem_port
,
190 sizeof(union firehose_buffer_u
), sendp
, fb
->fb_header
.fbh_recvp
,
191 extra_info_port
, extra_info_size
);
192 if (likely(kr
== KERN_SUCCESS
)) {
193 fb
->fb_header
.fbh_sendp
= sendp
;
194 } else if (unlikely(kr
== MACH_SEND_INVALID_DEST
)) {
195 // MACH_SEND_INVALID_DEST here means that logd's boostrap port
196 // turned into a dead name, which in turn means that logd has been
197 // unloaded. The only option here, is to give up permanently.
199 // same trick as _xpc_pipe_dispose: keeping a send right
200 // maintains the name, so that we can destroy the receive right
201 // in case we still have it.
202 (void)firehose_mach_port_recv_dispose(sendp
, fb
);
203 firehose_mach_port_send_release(sendp
);
204 firehose_mach_port_send_release(mem_port
);
205 if (extra_info_port
) firehose_mach_port_send_release(extra_info_port
);
206 sendp
= fb
->fb_header
.fbh_sendp
= MACH_PORT_DEAD
;
208 // the client probably has some form of memory corruption
209 // and/or a port leak
210 DISPATCH_CLIENT_CRASH(kr
, "Unable to register with logd");
214 _dispatch_unfair_lock_unlock(&fb
->fb_header
.fbh_logd_lock
);
219 firehose_buffer_update_limits_unlocked(firehose_buffer_t fb
)
221 firehose_bank_state_u old
, new;
222 firehose_buffer_bank_t fbb
= &fb
->fb_header
.fbh_bank
;
223 unsigned long fbb_flags
= fbb
->fbb_flags
;
224 uint16_t io_streams
= 0, mem_streams
= 0;
227 for (size_t i
= 0; i
< countof(fb
->fb_header
.fbh_stream
); i
++) {
228 firehose_buffer_stream_t fbs
= fb
->fb_header
.fbh_stream
+ i
;
230 if (fbs
->fbs_state
.fss_current
== FIREHOSE_STREAM_STATE_PRISTINE
) {
233 if ((1UL << i
) & firehose_stream_uses_io_bank
) {
240 if (fbb_flags
& FIREHOSE_BUFFER_BANK_FLAG_LOW_MEMORY
) {
241 if (fbb_flags
& FIREHOSE_BUFFER_BANK_FLAG_HIGH_RATE
) {
242 total
= 1 + 4 * mem_streams
+ io_streams
; // usually 10
244 total
= 1 + 2 + mem_streams
+ io_streams
; // usually 6
247 if (fbb_flags
& FIREHOSE_BUFFER_BANK_FLAG_HIGH_RATE
) {
248 total
= 1 + 6 * mem_streams
+ 3 * io_streams
; // usually 16
250 total
= 1 + 2 * (mem_streams
+ io_streams
); // usually 7
254 uint16_t ratio
= (uint16_t)(PAGE_SIZE
/ FIREHOSE_CHUNK_SIZE
);
256 total
= roundup(total
, ratio
);
258 total
= MAX(total
, FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT
);
259 if (!(fbb_flags
& FIREHOSE_BUFFER_BANK_FLAG_LOW_MEMORY
)) {
260 total
= MAX(total
, TARGET_OS_EMBEDDED
? 8 : 12);
263 new.fbs_max_ref
= total
;
264 new.fbs_mem_bank
= FIREHOSE_BANK_UNAVAIL_BIT
- (total
- 1);
265 new.fbs_io_bank
= FIREHOSE_BANK_UNAVAIL_BIT
-
266 MAX(3 * total
/ 8, 2 * io_streams
);
269 old
= fbb
->fbb_limits
;
270 fbb
->fbb_limits
= new;
271 if (old
.fbs_atomic_state
== new.fbs_atomic_state
) {
274 os_atomic_add2o(&fb
->fb_header
, fbh_bank
.fbb_state
.fbs_atomic_state
,
275 new.fbs_atomic_state
- old
.fbs_atomic_state
, relaxed
);
280 firehose_buffer_create(mach_port_t logd_port
, uint64_t unique_pid
,
281 unsigned long bank_flags
)
283 firehose_buffer_header_t fbh
;
284 firehose_buffer_t fb
;
287 mach_vm_address_t vm_addr
= 0;
290 vm_addr
= vm_page_size
;
291 const size_t madvise_bytes
= FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT
*
293 if (slowpath(madvise_bytes
% PAGE_SIZE
)) {
294 DISPATCH_INTERNAL_CRASH(madvise_bytes
,
295 "Invalid values for MADVISE_CHUNK_COUNT / CHUNK_SIZE");
298 kr
= mach_vm_map(mach_task_self(), &vm_addr
, sizeof(*fb
), 0,
299 VM_FLAGS_ANYWHERE
| VM_FLAGS_PURGABLE
|
300 VM_MAKE_TAG(VM_MEMORY_GENEALOGY
), MEMORY_OBJECT_NULL
, 0, FALSE
,
301 VM_PROT_DEFAULT
, VM_PROT_ALL
, VM_INHERIT_NONE
);
303 if (kr
!= KERN_NO_SPACE
) dispatch_assume_zero(kr
);
304 firehose_mach_port_send_release(logd_port
);
308 uint32_t opts
= MPO_CONTEXT_AS_GUARD
| MPO_STRICT
| MPO_INSERT_SEND_RIGHT
;
310 vm_offset_t vm_addr
= 0;
313 size
= FIREHOSE_BUFFER_KERNEL_CHUNK_COUNT
* FIREHOSE_CHUNK_SIZE
;
314 __firehose_allocate(&vm_addr
, size
);
316 (void)logd_port
; (void)unique_pid
;
319 fb
= (firehose_buffer_t
)vm_addr
;
320 fbh
= &fb
->fb_header
;
322 fbh
->fbh_logd_port
= logd_port
;
323 fbh
->fbh_pid
= getpid();
324 fbh
->fbh_uniquepid
= unique_pid
;
325 fbh
->fbh_recvp
= firehose_mach_port_allocate(opts
, fb
);
327 fbh
->fbh_spi_version
= OS_FIREHOSE_SPI_VERSION
;
328 fbh
->fbh_bank
.fbb_flags
= bank_flags
;
331 for (size_t i
= 0; i
< countof(fbh
->fbh_stream
); i
++) {
332 firehose_buffer_stream_t fbs
= fbh
->fbh_stream
+ i
;
333 if (i
!= firehose_stream_metadata
) {
334 fbs
->fbs_state
.fss_current
= FIREHOSE_STREAM_STATE_PRISTINE
;
337 firehose_buffer_update_limits_unlocked(fb
);
339 uint16_t total
= FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT
+ 1;
340 const uint16_t num_kernel_io_pages
= 8;
341 uint16_t io_pages
= num_kernel_io_pages
;
342 fbh
->fbh_bank
.fbb_state
= (firehose_bank_state_u
){
343 .fbs_max_ref
= total
,
344 .fbs_io_bank
= FIREHOSE_BANK_UNAVAIL_BIT
- io_pages
,
345 .fbs_mem_bank
= FIREHOSE_BANK_UNAVAIL_BIT
- (total
- io_pages
- 1),
347 fbh
->fbh_bank
.fbb_limits
= fbh
->fbh_bank
.fbb_state
;
350 // now pre-allocate some chunks in the ring directly
352 const uint16_t pre_allocated
= FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT
- 1;
354 const uint16_t pre_allocated
= FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT
;
357 fbh
->fbh_bank
.fbb_bitmap
= (1U << (1 + pre_allocated
)) - 1;
359 for (uint16_t i
= 0; i
< pre_allocated
; i
++) {
360 fbh
->fbh_mem_ring
[i
] = i
+ 1;
362 fbh
->fbh_bank
.fbb_mem_flushed
= pre_allocated
;
363 fbh
->fbh_ring_mem_head
= pre_allocated
;
367 // install the early boot page as the current one for persist
368 fbh
->fbh_stream
[firehose_stream_persist
].fbs_state
.fss_current
=
369 FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT
;
370 fbh
->fbh_bank
.fbb_state
.fbs_io_bank
+= 1;
373 fbh
->fbh_ring_tail
= (firehose_ring_tail_u
){
374 .frp_mem_flushed
= pre_allocated
,
381 firehose_notify_source_invoke(mach_msg_header_t
*hdr
)
383 const size_t reply_size
=
384 sizeof(union __ReplyUnion__firehose_client_firehoseReply_subsystem
);
386 firehose_mig_server(firehoseReply_server
, reply_size
, hdr
);
390 firehose_client_register_for_notifications(firehose_buffer_t fb
)
392 static const struct dispatch_continuation_s dc
= {
393 .dc_func
= (void *)firehose_notify_source_invoke
,
395 firehose_buffer_header_t fbh
= &fb
->fb_header
;
397 dispatch_once(&fbh
->fbh_notifs_pred
, ^{
398 dispatch_source_t ds
= _dispatch_source_create_mach_msg_direct_recv(
399 fbh
->fbh_recvp
, &dc
);
400 dispatch_set_context(ds
, fb
);
401 dispatch_activate(ds
);
402 fbh
->fbh_notifs_source
= ds
;
407 firehose_client_send_push_async(firehose_buffer_t fb
, qos_class_t qos
,
410 bool ask_for_notifs
= fb
->fb_header
.fbh_notifs_source
!= NULL
;
411 mach_port_t sendp
= fb
->fb_header
.fbh_sendp
;
412 kern_return_t kr
= KERN_FAILURE
;
414 if (!ask_for_notifs
&& _dispatch_is_multithreaded_inline()) {
415 firehose_client_register_for_notifications(fb
);
416 ask_for_notifs
= true;
419 if (slowpath(sendp
== MACH_PORT_DEAD
)) {
423 if (fastpath(sendp
)) {
424 kr
= firehose_send_push_async(sendp
, qos
, for_io
, ask_for_notifs
);
425 if (likely(kr
== KERN_SUCCESS
)) {
428 if (kr
!= MACH_SEND_INVALID_DEST
) {
429 DISPATCH_VERIFY_MIG(kr
);
430 dispatch_assume_zero(kr
);
434 sendp
= firehose_client_reconnect(fb
, sendp
);
435 if (fastpath(MACH_PORT_VALID(sendp
))) {
436 kr
= firehose_send_push_async(sendp
, qos
, for_io
, ask_for_notifs
);
437 if (likely(kr
== KERN_SUCCESS
)) {
440 if (kr
!= MACH_SEND_INVALID_DEST
) {
441 DISPATCH_VERIFY_MIG(kr
);
442 dispatch_assume_zero(kr
);
449 firehose_client_merge_updates(firehose_buffer_t fb
, bool async_notif
,
450 firehose_push_reply_t reply
, firehose_bank_state_u
*state_out
)
452 firehose_bank_state_u state
;
453 firehose_ring_tail_u otail
, ntail
;
454 uint64_t old_flushed_pos
, bank_updates
;
455 uint16_t io_delta
= 0;
456 uint16_t mem_delta
= 0;
458 if (firehose_atomic_maxv2o(&fb
->fb_header
, fbh_bank
.fbb_mem_flushed
,
459 reply
.fpr_mem_flushed_pos
, &old_flushed_pos
, relaxed
)) {
460 mem_delta
= (uint16_t)(reply
.fpr_mem_flushed_pos
- old_flushed_pos
);
462 if (firehose_atomic_maxv2o(&fb
->fb_header
, fbh_bank
.fbb_io_flushed
,
463 reply
.fpr_io_flushed_pos
, &old_flushed_pos
, relaxed
)) {
464 io_delta
= (uint16_t)(reply
.fpr_io_flushed_pos
- old_flushed_pos
);
467 _dispatch_debug("client side: mem: +%d->%llx, io: +%d->%llx",
468 mem_delta
, reply
.fpr_mem_flushed_pos
,
469 io_delta
, reply
.fpr_io_flushed_pos
);
472 if (!mem_delta
&& !io_delta
) {
474 state_out
->fbs_atomic_state
= os_atomic_load2o(&fb
->fb_header
,
475 fbh_bank
.fbb_state
.fbs_atomic_state
, relaxed
);
480 __firehose_critical_region_enter();
481 os_atomic_rmw_loop2o(&fb
->fb_header
, fbh_ring_tail
.frp_atomic_tail
,
482 otail
.frp_atomic_tail
, ntail
.frp_atomic_tail
, relaxed
, {
484 // overflow handles the generation wraps
485 ntail
.frp_io_flushed
+= io_delta
;
486 ntail
.frp_mem_flushed
+= mem_delta
;
489 bank_updates
= ((uint64_t)mem_delta
<< FIREHOSE_BANK_SHIFT(0)) |
490 ((uint64_t)io_delta
<< FIREHOSE_BANK_SHIFT(1));
491 state
.fbs_atomic_state
= os_atomic_sub2o(&fb
->fb_header
,
492 fbh_bank
.fbb_state
.fbs_atomic_state
, bank_updates
, release
);
493 __firehose_critical_region_leave();
495 if (state_out
) *state_out
= state
;
499 os_atomic_inc2o(&fb
->fb_header
, fbh_bank
.fbb_io_notifs
, relaxed
);
502 os_atomic_inc2o(&fb
->fb_header
, fbh_bank
.fbb_mem_notifs
, relaxed
);
509 firehose_client_send_push(firehose_buffer_t fb
, bool for_io
,
510 firehose_bank_state_u
*state_out
)
512 mach_port_t sendp
= fb
->fb_header
.fbh_sendp
;
513 firehose_push_reply_t push_reply
= { };
514 qos_class_t qos
= qos_class_self();
517 if (slowpath(sendp
== MACH_PORT_DEAD
)) {
520 if (fastpath(sendp
)) {
521 kr
= firehose_send_push(sendp
, qos
, for_io
, &push_reply
);
522 if (likely(kr
== KERN_SUCCESS
)) {
525 if (kr
!= MACH_SEND_INVALID_DEST
) {
526 DISPATCH_VERIFY_MIG(kr
);
527 dispatch_assume_zero(kr
);
531 sendp
= firehose_client_reconnect(fb
, sendp
);
532 if (fastpath(MACH_PORT_VALID(sendp
))) {
533 kr
= firehose_send_push(sendp
, qos
, for_io
, &push_reply
);
534 if (likely(kr
== KERN_SUCCESS
)) {
537 if (kr
!= MACH_SEND_INVALID_DEST
) {
538 DISPATCH_VERIFY_MIG(kr
);
539 dispatch_assume_zero(kr
);
544 state_out
->fbs_atomic_state
= os_atomic_load2o(&fb
->fb_header
,
545 fbh_bank
.fbb_state
.fbs_atomic_state
, relaxed
);
550 if (memcmp(&push_reply
, &FIREHOSE_PUSH_REPLY_CORRUPTED
,
551 sizeof(push_reply
)) == 0) {
552 // TODO: find out the actual cause and log it
553 DISPATCH_CLIENT_CRASH(0, "Memory corruption in the logging buffers");
557 os_atomic_inc2o(&fb
->fb_header
, fbh_bank
.fbb_io_sync_pushes
, relaxed
);
559 os_atomic_inc2o(&fb
->fb_header
, fbh_bank
.fbb_mem_sync_pushes
, relaxed
);
561 // TODO <rdar://problem/22963876>
563 // use fbb_*_flushes and fbb_*_sync_pushes to decide to dynamically
564 // allow using more buffers, if not under memory pressure.
566 // There only is a point for multithreaded clients if:
567 // - enough samples (total_flushes above some limits)
568 // - the ratio is really bad (a push per cycle is definitely a problem)
569 return firehose_client_merge_updates(fb
, false, push_reply
, state_out
);
573 firehose_client_push_reply(mach_port_t req_port OS_UNUSED
,
574 kern_return_t rtc
, firehose_push_reply_t push_reply OS_UNUSED
)
576 DISPATCH_INTERNAL_CRASH(rtc
, "firehose_push_reply should never be sent "
577 "to the buffer receive port");
581 firehose_client_push_notify_async(mach_port_t server_port OS_UNUSED
,
582 firehose_push_reply_t push_reply
)
584 // see _dispatch_source_merge_mach_msg_direct
585 dispatch_queue_t dq
= _dispatch_queue_get_current();
586 firehose_buffer_t fb
= dispatch_get_context(dq
);
587 firehose_client_merge_updates(fb
, true, push_reply
, NULL
);
593 #pragma mark Buffer handling
597 firehose_buffer_update_limits(firehose_buffer_t fb
)
599 dispatch_unfair_lock_t fbb_lock
= &fb
->fb_header
.fbh_bank
.fbb_lock
;
600 _dispatch_unfair_lock_lock(fbb_lock
);
601 firehose_buffer_update_limits_unlocked(fb
);
602 _dispatch_unfair_lock_unlock(fbb_lock
);
607 static inline firehose_tracepoint_t
608 firehose_buffer_chunk_init(firehose_chunk_t fc
,
609 firehose_tracepoint_query_t ask
, uint8_t **privptr
)
611 const uint16_t ft_size
= offsetof(struct firehose_tracepoint_s
, ft_data
);
613 uint16_t pub_offs
= offsetof(struct firehose_chunk_s
, fc_data
);
614 uint16_t priv_offs
= FIREHOSE_CHUNK_SIZE
;
616 pub_offs
+= roundup(ft_size
+ ask
->pubsize
, 8);
617 priv_offs
-= ask
->privsize
;
619 if (fc
->fc_pos
.fcp_atomic_pos
) {
620 // Needed for process death handling (recycle-reuse):
621 // No atomic fences required, we merely want to make sure the observers
622 // will see memory effects in program (asm) order.
623 // 1. the payload part of the chunk is cleared completely
624 // 2. the chunk is marked as reused
625 // This ensures that if we don't see a reference to a chunk in the ring
626 // and it is dirty, when crawling the chunk, we don't see remnants of
629 // We only do that when the fc_pos is non zero, because zero means
630 // we just faulted the chunk, and the kernel already bzero-ed it.
631 bzero(fc
->fc_data
, sizeof(fc
->fc_data
));
633 dispatch_compiler_barrier();
634 // <rdar://problem/23562733> boot starts mach absolute time at 0, and
635 // wrapping around to values above UINT64_MAX - FIREHOSE_STAMP_SLOP
636 // breaks firehose_buffer_stream_flush() assumptions
637 if (ask
->stamp
> FIREHOSE_STAMP_SLOP
) {
638 fc
->fc_timestamp
= ask
->stamp
- FIREHOSE_STAMP_SLOP
;
640 fc
->fc_timestamp
= 0;
642 fc
->fc_pos
= (firehose_chunk_pos_u
){
643 .fcp_next_entry_offs
= pub_offs
,
644 .fcp_private_offs
= priv_offs
,
646 .fcp_qos
= firehose_buffer_qos_bits_propagate(),
647 .fcp_stream
= ask
->stream
,
648 .fcp_flag_io
= ask
->for_io
,
652 *privptr
= fc
->fc_start
+ priv_offs
;
654 return (firehose_tracepoint_t
)fc
->fc_data
;
658 static firehose_tracepoint_t
659 firehose_buffer_stream_chunk_install(firehose_buffer_t fb
,
660 firehose_tracepoint_query_t ask
, uint8_t **privptr
, uint16_t ref
)
662 firehose_stream_state_u state
, new_state
;
663 firehose_tracepoint_t ft
;
664 firehose_buffer_stream_t fbs
= &fb
->fb_header
.fbh_stream
[ask
->stream
];
665 uint64_t stamp_and_len
;
668 firehose_chunk_t fc
= firehose_buffer_ref_to_chunk(fb
, ref
);
669 ft
= firehose_buffer_chunk_init(fc
, ask
, privptr
);
670 // Needed for process death handling (tracepoint-begin):
671 // write the length before making the chunk visible
672 stamp_and_len
= ask
->stamp
- fc
->fc_timestamp
;
673 stamp_and_len
|= (uint64_t)ask
->pubsize
<< 48;
674 os_atomic_store2o(ft
, ft_stamp_and_length
, stamp_and_len
, relaxed
);
676 ft
->ft_thread
= thread_tid(current_thread());
678 ft
->ft_thread
= _pthread_threadid_self_np_direct();
680 if (ask
->stream
== firehose_stream_metadata
) {
681 os_atomic_or2o(fb
, fb_header
.fbh_bank
.fbb_metadata_bitmap
,
682 1ULL << ref
, relaxed
);
684 // release barrier to make the chunk init visible
685 os_atomic_rmw_loop2o(fbs
, fbs_state
.fss_atomic_state
,
686 state
.fss_atomic_state
, new_state
.fss_atomic_state
, release
, {
687 // We use a generation counter to prevent a theoretical ABA problem:
688 // a thread could try to acquire a tracepoint in a chunk, fail to
689 // do so mark it as to be pushed, enqueue it, and then be preempted
691 // It sleeps for a long time, and then tries to acquire the
692 // allocator bit and uninstalling the chunk. Succeeds in doing so,
693 // but because the chunk actually happened to have cycled all the
694 // way back to being installed. That thread would effectively hide
695 // that unflushed chunk and leak it.
697 // Having a generation counter prevents the uninstallation of the
698 // chunk to spuriously succeed when it was a re-incarnation of it.
699 new_state
= (firehose_stream_state_u
){
701 .fss_generation
= state
.fss_generation
+ 1,
705 // the allocator gave up just clear the allocator + waiter bits
706 firehose_stream_state_u mask
= { .fss_allocator
= ~0u, };
707 state
.fss_atomic_state
= os_atomic_and_orig2o(fbs
,
708 fbs_state
.fss_atomic_state
, ~mask
.fss_atomic_state
, relaxed
);
713 if (unlikely(state
.fss_gate
.dgl_lock
!= _dispatch_tid_self())) {
714 _dispatch_gate_broadcast_slow(&fbs
->fbs_state
.fss_gate
,
715 state
.fss_gate
.dgl_lock
);
718 if (unlikely(state
.fss_current
== FIREHOSE_STREAM_STATE_PRISTINE
)) {
719 firehose_buffer_update_limits(fb
);
723 // pairs with the one in firehose_buffer_tracepoint_reserve()
724 __firehose_critical_region_leave();
730 static inline uint16_t
731 firehose_buffer_ring_try_grow(firehose_buffer_bank_t fbb
, uint16_t limit
)
736 _dispatch_unfair_lock_lock(&fbb
->fbb_lock
);
737 bitmap
= ~(fbb
->fbb_bitmap
| (~0ULL << limit
));
739 ref
= firehose_bitmap_first_set(bitmap
);
740 fbb
->fbb_bitmap
|= 1U << ref
;
742 _dispatch_unfair_lock_unlock(&fbb
->fbb_lock
);
747 static inline uint16_t
748 firehose_buffer_ring_shrink(firehose_buffer_t fb
, uint16_t ref
)
750 const size_t madv_size
=
751 FIREHOSE_CHUNK_SIZE
* FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT
;
752 const size_t madv_mask
=
753 (1ULL << FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT
) - 1;
755 dispatch_unfair_lock_t fbb_lock
= &fb
->fb_header
.fbh_bank
.fbb_lock
;
758 _dispatch_unfair_lock_lock(fbb_lock
);
759 if (ref
< fb
->fb_header
.fbh_bank
.fbb_limits
.fbs_max_ref
) {
763 bitmap
= (fb
->fb_header
.fbh_bank
.fbb_bitmap
&= ~(1UL << ref
));
765 if ((bitmap
& (madv_mask
<< ref
)) == 0) {
766 // if MADVISE_WIDTH consecutive chunks are free, madvise them free
767 madvise(firehose_buffer_ref_to_chunk(fb
, ref
), madv_size
, MADV_FREE
);
771 _dispatch_unfair_lock_unlock(fbb_lock
);
778 firehose_buffer_ring_enqueue(firehose_buffer_t fb
, uint16_t ref
)
780 firehose_chunk_t fc
= firehose_buffer_ref_to_chunk(fb
, ref
);
781 uint16_t volatile *fbh_ring
;
782 uint16_t volatile *fbh_ring_head
;
783 uint16_t head
, gen
, dummy
, idx
;
784 firehose_chunk_pos_u fc_pos
= fc
->fc_pos
;
785 bool for_io
= fc_pos
.fcp_flag_io
;
788 fbh_ring
= fb
->fb_header
.fbh_io_ring
;
789 fbh_ring_head
= &fb
->fb_header
.fbh_ring_io_head
;
791 fbh_ring
= fb
->fb_header
.fbh_mem_ring
;
792 fbh_ring_head
= &fb
->fb_header
.fbh_ring_mem_head
;
796 // The algorithm in the kernel is simpler:
797 // 1. reserve a write position for the head
798 // 2. store the new reference at that position
799 // Enqueuers can't starve each other that way.
801 // However, the dequeuers now have to sometimes wait for the value written
802 // in the ring to appear and have to spin, which is okay since the kernel
803 // disables preemption around these two consecutive atomic operations.
804 // See firehose_client_drain.
805 __firehose_critical_region_enter();
806 head
= os_atomic_inc_orig(fbh_ring_head
, relaxed
);
807 gen
= head
& FIREHOSE_RING_POS_GEN_MASK
;
808 idx
= head
& FIREHOSE_RING_POS_IDX_MASK
;
810 while (unlikely(!os_atomic_cmpxchgvw(&fbh_ring
[idx
], gen
, gen
| ref
, &dummy
,
812 // can only ever happen if a recycler is slow, this requires having
813 // enough cores (>5 for I/O e.g.)
814 _dispatch_wait_until(fbh_ring
[idx
] == gen
);
816 __firehose_critical_region_leave();
817 __firehose_buffer_push_to_logd(fb
, for_io
);
820 // 1. read the head position
821 // 2. cmpxchg head.gen with the (head.gen | ref) at head.idx
822 // 3. if it fails wait until either the head cursor moves,
823 // or the cell becomes free
825 // The most likely stall at (3) is because another enqueuer raced us
826 // and made the cell non empty.
828 // The alternative is to reserve the enqueue slot with an atomic inc.
829 // Then write the ref into the ring. This would be much simpler as the
830 // generation packing wouldn't be required (though setting the ring cell
831 // would still need a cmpxchg loop to avoid clobbering values of slow
834 // But then that means that flushers (logd) could be starved until that
835 // finishes, and logd cannot be held forever (that could even be a logd
836 // DoS from malicious programs). Meaning that logd would stop draining
837 // buffer queues when encountering that issue, leading the program to be
838 // stuck in firehose_client_push() apparently waiting on logd, while
839 // really it's waiting on itself. It's better for the scheduler if we
840 // make it clear that we're waiting on ourselves!
842 head
= os_atomic_load(fbh_ring_head
, relaxed
);
844 gen
= head
& FIREHOSE_RING_POS_GEN_MASK
;
845 idx
= head
& FIREHOSE_RING_POS_IDX_MASK
;
847 // a thread being preempted here for GEN_MASK worth of ring rotations,
848 // it could lead to the cmpxchg succeed, and have a bogus enqueue
849 // (confused enqueuer)
850 if (fastpath(os_atomic_cmpxchgvw(&fbh_ring
[idx
], gen
, gen
| ref
, &dummy
,
852 if (fastpath(os_atomic_cmpxchgv(fbh_ring_head
, head
, head
+ 1,
854 __firehose_critical_region_leave();
857 // this thread is a confused enqueuer, need to undo enqueue
858 os_atomic_store(&fbh_ring
[idx
], gen
, relaxed
);
862 _dispatch_wait_until(({
863 // wait until either the head moves (another enqueuer is done)
864 // or (not very likely) a recycler is very slow
865 // or (very unlikely) the confused thread undoes its enqueue
866 uint16_t old_head
= head
;
867 head
= *fbh_ring_head
;
868 head
!= old_head
|| fbh_ring
[idx
] == gen
;
872 pthread_priority_t pp
= fc_pos
.fcp_qos
;
873 pp
<<= _PTHREAD_PRIORITY_QOS_CLASS_SHIFT
;
874 firehose_client_send_push_async(fb
, _pthread_qos_class_decode(pp
, NULL
, NULL
),
881 firehose_buffer_force_connect(firehose_buffer_t fb
)
883 mach_port_t sendp
= fb
->fb_header
.fbh_sendp
;
884 if (sendp
== MACH_PORT_NULL
) firehose_client_reconnect(fb
, MACH_PORT_NULL
);
889 static inline uint16_t
890 firehose_buffer_ring_try_recycle(firehose_buffer_t fb
)
892 firehose_ring_tail_u pos
, old
;
893 uint16_t volatile *fbh_ring
;
894 uint16_t gen
, ref
, entry
, tail
;
898 os_atomic_rmw_loop2o(&fb
->fb_header
, fbh_ring_tail
.frp_atomic_tail
,
899 old
.frp_atomic_tail
, pos
.frp_atomic_tail
, relaxed
, {
901 if (fastpath(old
.frp_mem_tail
!= old
.frp_mem_flushed
)) {
903 } else if (fastpath(old
.frp_io_tail
!= old
.frp_io_flushed
)) {
906 os_atomic_rmw_loop_give_up(return 0);
910 // there's virtually no chance that the lack of acquire barrier above
911 // lets us read a value from the ring so stale that it's still an Empty
912 // marker. For correctness purposes have a cheap loop that should never
913 // really loop, instead of an acquire barrier in the cmpxchg above.
914 for_io
= (pos
.frp_io_tail
!= old
.frp_io_tail
);
916 fbh_ring
= fb
->fb_header
.fbh_io_ring
;
917 tail
= old
.frp_io_tail
& FIREHOSE_RING_POS_IDX_MASK
;
919 fbh_ring
= fb
->fb_header
.fbh_mem_ring
;
920 tail
= old
.frp_mem_tail
& FIREHOSE_RING_POS_IDX_MASK
;
922 _dispatch_wait_until((entry
= fbh_ring
[tail
]) & FIREHOSE_RING_POS_IDX_MASK
);
924 // Needed for process death handling (recycle-dequeue):
925 // No atomic fences required, we merely want to make sure the observers
926 // will see memory effects in program (asm) order.
927 // 1. the chunk is marked as "void&full" (clobbering the pos with FULL_BIT)
928 // 2. then we remove any reference to the chunk from the ring
929 // This ensures that if we don't see a reference to a chunk in the ring
930 // and it is dirty, it is a chunk being written to that needs a flush
931 gen
= (entry
& FIREHOSE_RING_POS_GEN_MASK
) + FIREHOSE_RING_POS_GEN_INC
;
932 ref
= entry
& FIREHOSE_RING_POS_IDX_MASK
;
933 fc
= firehose_buffer_ref_to_chunk(fb
, ref
);
935 if (!for_io
&& fc
->fc_pos
.fcp_stream
== firehose_stream_metadata
) {
936 os_atomic_and2o(fb
, fb_header
.fbh_bank
.fbb_metadata_bitmap
,
937 ~(1ULL << ref
), relaxed
);
939 os_atomic_store2o(fc
, fc_pos
.fcp_atomic_pos
,
940 FIREHOSE_CHUNK_POS_FULL_BIT
, relaxed
);
941 dispatch_compiler_barrier();
942 os_atomic_store(&fbh_ring
[tail
], gen
| 0, relaxed
);
948 static firehose_tracepoint_t
949 firehose_buffer_tracepoint_reserve_slow2(firehose_buffer_t fb
,
950 firehose_tracepoint_query_t ask
, uint8_t **privptr
, uint16_t ref
)
952 const uint64_t bank_unavail_mask
= FIREHOSE_BANK_UNAVAIL_MASK(ask
->for_io
);
953 const uint64_t bank_inc
= FIREHOSE_BANK_INC(ask
->for_io
);
954 firehose_buffer_bank_t
const fbb
= &fb
->fb_header
.fbh_bank
;
955 firehose_bank_state_u state
;
956 uint16_t fbs_max_ref
;
958 // first wait for our bank to have space, if needed
959 if (!fastpath(ask
->is_bank_ok
)) {
960 state
.fbs_atomic_state
=
961 os_atomic_load2o(fbb
, fbb_state
.fbs_atomic_state
, relaxed
);
962 while ((state
.fbs_atomic_state
- bank_inc
) & bank_unavail_mask
) {
963 firehose_client_send_push(fb
, ask
->for_io
, &state
);
964 if (slowpath(fb
->fb_header
.fbh_sendp
== MACH_PORT_DEAD
)) {
965 // logd was unloaded, give up
969 ask
->is_bank_ok
= true;
970 fbs_max_ref
= state
.fbs_max_ref
;
972 fbs_max_ref
= fbb
->fbb_state
.fbs_max_ref
;
975 // second, if we were passed a chunk, we may need to shrink
980 // third, wait for a chunk to come up, and if not, wait on the daemon
982 if (fastpath(ref
= firehose_buffer_ring_try_recycle(fb
))) {
984 if (slowpath(ref
>= fbs_max_ref
)) {
985 ref
= firehose_buffer_ring_shrink(fb
, ref
);
992 if (fastpath(ref
= firehose_buffer_ring_try_grow(fbb
, fbs_max_ref
))) {
995 firehose_client_send_push(fb
, ask
->for_io
, NULL
);
996 if (slowpath(fb
->fb_header
.fbh_sendp
== MACH_PORT_DEAD
)) {
997 // logd was unloaded, give up
1002 return firehose_buffer_stream_chunk_install(fb
, ask
, privptr
, ref
);
1005 static inline dispatch_lock
1006 _dispatch_gate_lock_load_seq_cst(dispatch_gate_t l
)
1008 return os_atomic_load(&l
->dgl_lock
, seq_cst
);
1012 _dispatch_gate_wait(dispatch_gate_t l
, uint32_t flags
)
1015 _dispatch_wait_until(_dispatch_gate_lock_load_seq_cst(l
) == 0);
1019 firehose_tracepoint_t
1020 firehose_buffer_tracepoint_reserve_slow(firehose_buffer_t fb
,
1021 firehose_tracepoint_query_t ask
, uint8_t **privptr
)
1023 const unsigned for_io
= ask
->for_io
;
1024 const firehose_buffer_bank_t fbb
= &fb
->fb_header
.fbh_bank
;
1025 firehose_bank_state_u state
;
1028 uint64_t unavail_mask
= FIREHOSE_BANK_UNAVAIL_MASK(for_io
);
1030 state
.fbs_atomic_state
= os_atomic_add_orig2o(fbb
,
1031 fbb_state
.fbs_atomic_state
, FIREHOSE_BANK_INC(for_io
), acquire
);
1032 if (fastpath(!(state
.fbs_atomic_state
& unavail_mask
))) {
1033 ask
->is_bank_ok
= true;
1034 if (fastpath(ref
= firehose_buffer_ring_try_recycle(fb
))) {
1035 if (fastpath(ref
< state
.fbs_max_ref
)) {
1036 return firehose_buffer_stream_chunk_install(fb
, ask
,
1041 return firehose_buffer_tracepoint_reserve_slow2(fb
, ask
, privptr
, ref
);
1043 firehose_bank_state_u value
;
1044 ask
->is_bank_ok
= os_atomic_rmw_loop2o(fbb
, fbb_state
.fbs_atomic_state
,
1045 state
.fbs_atomic_state
, value
.fbs_atomic_state
, acquire
, {
1047 if (slowpath((value
.fbs_atomic_state
& unavail_mask
) != 0)) {
1048 os_atomic_rmw_loop_give_up(break);
1050 value
.fbs_atomic_state
+= FIREHOSE_BANK_INC(for_io
);
1052 if (ask
->is_bank_ok
) {
1053 ref
= firehose_buffer_ring_try_recycle(fb
);
1054 if (slowpath(ref
== 0)) {
1055 // the kernel has no overlap between I/O and memory chunks,
1056 // having an available bank slot means we should be able to recycle
1057 DISPATCH_INTERNAL_CRASH(0, "Unable to recycle a chunk");
1060 // rdar://25137005 installing `0` unlocks the allocator
1061 return firehose_buffer_stream_chunk_install(fb
, ask
, privptr
, ref
);
1066 firehose_tracepoint_t
1067 __firehose_buffer_tracepoint_reserve(uint64_t stamp
, firehose_stream_t stream
,
1068 uint16_t pubsize
, uint16_t privsize
, uint8_t **privptr
)
1070 firehose_buffer_t fb
= kernel_firehose_buffer
;
1071 if (!fastpath(fb
)) {
1074 return firehose_buffer_tracepoint_reserve(fb
, stamp
, stream
, pubsize
,
1079 __firehose_buffer_create(size_t *size
)
1081 if (!kernel_firehose_buffer
) {
1082 kernel_firehose_buffer
= firehose_buffer_create(MACH_PORT_NULL
, 0, 0);
1086 *size
= FIREHOSE_BUFFER_KERNEL_CHUNK_COUNT
* FIREHOSE_CHUNK_SIZE
;
1088 return kernel_firehose_buffer
;
1092 __firehose_buffer_tracepoint_flush(firehose_tracepoint_t ft
,
1093 firehose_tracepoint_id_u ftid
)
1095 return firehose_buffer_tracepoint_flush(kernel_firehose_buffer
, ft
, ftid
);
1099 __firehose_merge_updates(firehose_push_reply_t update
)
1101 firehose_buffer_t fb
= kernel_firehose_buffer
;
1103 firehose_client_merge_updates(fb
, true, update
, NULL
);
1108 #endif // OS_FIREHOSE_SPI