src/firehose/firehose_buffer.c

   1 /*
   2  * Copyright (c) 2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_APACHE_LICENSE_HEADER_START@
   5  *
   6  * Licensed under the Apache License, Version 2.0 (the "License");
   7  * you may not use this file except in compliance with the License.
   8  * You may obtain a copy of the License at
   9  *
  10  *     http://www.apache.org/licenses/LICENSE-2.0
  11  *
  12  * Unless required by applicable law or agreed to in writing, software
  13  * distributed under the License is distributed on an "AS IS" BASIS,
  14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15  * See the License for the specific language governing permissions and
  16  * limitations under the License.
  17  *
  18  * @APPLE_APACHE_LICENSE_HEADER_END@
  19  */
  20
  21 #include <mach/vm_statistics.h> // VM_MEMORY_GENEALOGY
  22 #ifdef KERNEL
  23
  24 #define OS_VOUCHER_ACTIVITY_SPI_TYPES 1
  25 #define OS_FIREHOSE_SPI 1
  26 #define __OS_EXPOSE_INTERNALS_INDIRECT__ 1
  27
  28 #define DISPATCH_PURE_C 1
  29 #define _safe_cast_to_long(x) \
  30                 ({ _Static_assert(sizeof(typeof(x)) <= sizeof(long), \
  31                                 "__builtin_expect doesn't support types wider than long"); \
  32                                 (long)(x); })
  33 #define fastpath(x) ((typeof(x))__builtin_expect(_safe_cast_to_long(x), ~0l))
  34 #define slowpath(x) ((typeof(x))__builtin_expect(_safe_cast_to_long(x), 0l))
  35 #define os_likely(x) __builtin_expect(!!(x), 1)
  36 #define os_unlikely(x) __builtin_expect(!!(x), 0)
  37 #define likely(x)   __builtin_expect(!!(x), 1)
  38 #define unlikely(x) __builtin_expect(!!(x), 0)
  39
  40 #define DISPATCH_INTERNAL_CRASH(ac, msg) ({ panic(msg); __builtin_trap(); })
  41
  42 #if defined(__x86_64__) || defined(__i386__)
  43 #define dispatch_hardware_pause() __asm__("pause")
  44 #elif (defined(__arm__) && defined(_ARM_ARCH_7) && defined(__thumb__)) || \
  45                 defined(__arm64__)
  46 #define dispatch_hardware_pause() __asm__("yield")
  47 #define dispatch_hardware_wfe()   __asm__("wfe")
  48 #else
  49 #define dispatch_hardware_pause() __asm__("")
  50 #endif
  51
  52 #define _dispatch_wait_until(c) do { \
  53                 while (!fastpath(c)) { \
  54                         dispatch_hardware_pause(); \
  55                 } } while (0)
  56 #define dispatch_compiler_barrier()  __asm__ __volatile__("" ::: "memory")
  57
  58 typedef uint32_t dispatch_lock;
  59 typedef struct dispatch_gate_s {
  60         dispatch_lock dgl_lock;
  61 } dispatch_gate_s, *dispatch_gate_t;
  62 #define DLOCK_LOCK_DATA_CONTENTION 0
  63 static void _dispatch_gate_wait(dispatch_gate_t l, uint32_t flags);
  64
  65 #include <kern/debug.h>
  66 #include <machine/cpu_number.h>
  67 #include <kern/thread.h>
  68 #include <mach/port.h>
  69 #include <stdbool.h>
  70 #include <string.h>
  71 #include <sys/param.h>
  72 #include <sys/types.h>
  73 #include <vm/vm_kern.h>
  74 #include <internal/atomic.h> // os/internal/atomic.h
  75 #include <firehose_types_private.h> // <firehose/firehose_types_private.h>
  76 #include <tracepoint_private.h> // <firehose/tracepoint_private.h>
  77 #include <chunk_private.h> // <firehose/chunk_private.h>
  78 #include "os/firehose_buffer_private.h"
  79 #include "firehose_buffer_internal.h"
  80 #include "firehose_inline_internal.h"
  81 #else
  82 #include "internal.h"
  83 #include "firehose.h" // MiG
  84 #include "firehose_replyServer.h" // MiG
  85 #endif
  86
  87 #if OS_FIREHOSE_SPI
  88
  89 #if __has_feature(c_static_assert)
  90 _Static_assert(sizeof(((firehose_stream_state_u *)NULL)->fss_gate) ==
  91                 sizeof(((firehose_stream_state_u *)NULL)->fss_allocator),
  92                 "fss_gate and fss_allocator alias");
  93 _Static_assert(offsetof(firehose_stream_state_u, fss_gate) ==
  94                 offsetof(firehose_stream_state_u, fss_allocator),
  95                 "fss_gate and fss_allocator alias");
  96 _Static_assert(sizeof(struct firehose_buffer_header_s) ==
  97                                 FIREHOSE_CHUNK_SIZE,
  98                 "firehose buffer header must be 4k");
  99 _Static_assert(offsetof(struct firehose_buffer_header_s, fbh_unused) <=
 100                                 FIREHOSE_CHUNK_SIZE - FIREHOSE_BUFFER_LIBTRACE_HEADER_SIZE,
 101                 "we must have enough space for the libtrace header");
 102 _Static_assert(powerof2(FIREHOSE_BUFFER_CHUNK_COUNT),
 103                 "CHUNK_COUNT Must be a power of two");
 104 _Static_assert(FIREHOSE_BUFFER_CHUNK_COUNT <= 64,
 105                 "CHUNK_COUNT must be less than 64 (bitmap in uint64_t)");
 106 #ifdef FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT
 107 _Static_assert(powerof2(FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT),
 108                 "madvise chunk count must be a power of two");
 109 #endif
 110 _Static_assert(sizeof(struct firehose_buffer_stream_s) == 128,
 111                 "firehose buffer stream must be small (single cacheline if possible)");
 112 _Static_assert(sizeof(struct firehose_tracepoint_s) == 24,
 113                 "tracepoint header should be exactly 24 bytes");
 114 #endif
 115
 116 #ifdef KERNEL
 117 static firehose_buffer_t kernel_firehose_buffer = NULL;
 118 #endif
 119
 120 #pragma mark -
 121 #pragma mark Client IPC to the log daemon
 122 #ifndef KERNEL
 123
 124 static mach_port_t
 125 firehose_client_reconnect(firehose_buffer_t fb, mach_port_t oldsendp)
 126 {
 127         mach_port_t sendp = MACH_PORT_NULL;
 128         mach_port_t mem_port = MACH_PORT_NULL, extra_info_port = MACH_PORT_NULL;
 129         mach_vm_size_t extra_info_size = 0;
 130         kern_return_t kr;
 131
 132         dispatch_assert(fb->fb_header.fbh_logd_port);
 133         dispatch_assert(fb->fb_header.fbh_recvp);
 134         dispatch_assert(fb->fb_header.fbh_uniquepid != 0);
 135
 136         _dispatch_unfair_lock_lock(&fb->fb_header.fbh_logd_lock);
 137         sendp = fb->fb_header.fbh_sendp;
 138         if (sendp != oldsendp || sendp == MACH_PORT_DEAD) {
 139                 // someone beat us to reconnecting or logd was unloaded, just go away
 140                 goto unlock;
 141         }
 142
 143         if (oldsendp) {
 144                 // same trick as _xpc_pipe_dispose: keeping a send right
 145                 // maintains the name, so that we can destroy the receive right
 146                 // in case we still have it.
 147                 (void)firehose_mach_port_recv_dispose(oldsendp, fb);
 148                 firehose_mach_port_send_release(oldsendp);
 149                 fb->fb_header.fbh_sendp = MACH_PORT_NULL;
 150         }
 151
 152         /* Create a memory port for the buffer VM region */
 153         vm_prot_t flags = VM_PROT_READ | MAP_MEM_VM_SHARE;
 154         memory_object_size_t size = sizeof(union firehose_buffer_u);
 155         mach_vm_address_t addr = (vm_address_t)fb;
 156
 157         kr = mach_make_memory_entry_64(mach_task_self(), &size, addr,
 158                         flags, &mem_port, MACH_PORT_NULL);
 159         if (size < sizeof(union firehose_buffer_u)) {
 160                 DISPATCH_CLIENT_CRASH(size, "Invalid size for the firehose buffer");
 161         }
 162         if (unlikely(kr)) {
 163                 // the client probably has some form of memory corruption
 164                 // and/or a port leak
 165                 DISPATCH_CLIENT_CRASH(kr, "Unable to make memory port");
 166         }
 167
 168         /* Create a communication port to the logging daemon */
 169         uint32_t opts = MPO_CONTEXT_AS_GUARD | MPO_TEMPOWNER | MPO_INSERT_SEND_RIGHT;
 170         sendp = firehose_mach_port_allocate(opts, fb);
 171
 172         if (oldsendp && _voucher_libtrace_hooks->vah_get_reconnect_info) {
 173                 kr = _voucher_libtrace_hooks->vah_get_reconnect_info(&addr, &size);
 174                 if (likely(kr == KERN_SUCCESS) && addr && size) {
 175                         extra_info_size = size;
 176                         kr = mach_make_memory_entry_64(mach_task_self(), &size, addr,
 177                                         flags, &extra_info_port, MACH_PORT_NULL);
 178                         if (unlikely(kr)) {
 179                                 // the client probably has some form of memory corruption
 180                                 // and/or a port leak
 181                                 DISPATCH_CLIENT_CRASH(kr, "Unable to make memory port");
 182                         }
 183                         kr = mach_vm_deallocate(mach_task_self(), addr, size);
 184                         (void)dispatch_assume_zero(kr);
 185                 }
 186         }
 187
 188         /* Call the firehose_register() MIG routine */
 189         kr = firehose_send_register(fb->fb_header.fbh_logd_port, mem_port,
 190                         sizeof(union firehose_buffer_u), sendp, fb->fb_header.fbh_recvp,
 191                         extra_info_port, extra_info_size);
 192         if (likely(kr == KERN_SUCCESS)) {
 193                 fb->fb_header.fbh_sendp = sendp;
 194         } else if (unlikely(kr == MACH_SEND_INVALID_DEST)) {
 195                 // MACH_SEND_INVALID_DEST here means that logd's boostrap port
 196                 // turned into a dead name, which in turn means that logd has been
 197                 // unloaded. The only option here, is to give up permanently.
 198                 //
 199                 // same trick as _xpc_pipe_dispose: keeping a send right
 200                 // maintains the name, so that we can destroy the receive right
 201                 // in case we still have it.
 202                 (void)firehose_mach_port_recv_dispose(sendp, fb);
 203                 firehose_mach_port_send_release(sendp);
 204                 firehose_mach_port_send_release(mem_port);
 205                 if (extra_info_port) firehose_mach_port_send_release(extra_info_port);
 206                 sendp = fb->fb_header.fbh_sendp = MACH_PORT_DEAD;
 207         } else {
 208                 // the client probably has some form of memory corruption
 209                 // and/or a port leak
 210                 DISPATCH_CLIENT_CRASH(kr, "Unable to register with logd");
 211         }
 212
 213 unlock:
 214         _dispatch_unfair_lock_unlock(&fb->fb_header.fbh_logd_lock);
 215         return sendp;
 216 }
 217
 218 static void
 219 firehose_buffer_update_limits_unlocked(firehose_buffer_t fb)
 220 {
 221         firehose_bank_state_u old, new;
 222         firehose_buffer_bank_t fbb = &fb->fb_header.fbh_bank;
 223         unsigned long fbb_flags = fbb->fbb_flags;
 224         uint16_t io_streams = 0, mem_streams = 0;
 225         uint16_t total = 0;
 226
 227         for (size_t i = 0; i < countof(fb->fb_header.fbh_stream); i++) {
 228                 firehose_buffer_stream_t fbs = fb->fb_header.fbh_stream + i;
 229
 230                 if (fbs->fbs_state.fss_current == FIREHOSE_STREAM_STATE_PRISTINE) {
 231                         continue;
 232                 }
 233                 if ((1UL << i) & firehose_stream_uses_io_bank) {
 234                         io_streams++;
 235                 } else {
 236                         mem_streams++;
 237                 }
 238         }
 239
 240         if (fbb_flags & FIREHOSE_BUFFER_BANK_FLAG_LOW_MEMORY) {
 241                 if (fbb_flags & FIREHOSE_BUFFER_BANK_FLAG_HIGH_RATE) {
 242                         total = 1 + 4 * mem_streams + io_streams;               // usually 10
 243                 } else {
 244                         total = 1 + 2 + mem_streams + io_streams;               // usually 6
 245                 }
 246         } else {
 247                 if (fbb_flags & FIREHOSE_BUFFER_BANK_FLAG_HIGH_RATE) {
 248                         total = 1 + 6 * mem_streams + 3 * io_streams;   // usually 16
 249                 } else {
 250                         total = 1 + 2 * (mem_streams + io_streams);             // usually 7
 251                 }
 252         }
 253
 254         uint16_t ratio = (uint16_t)(PAGE_SIZE / FIREHOSE_CHUNK_SIZE);
 255         if (ratio > 1) {
 256                 total = roundup(total, ratio);
 257         }
 258         total = MAX(total, FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT);
 259         if (!(fbb_flags & FIREHOSE_BUFFER_BANK_FLAG_LOW_MEMORY)) {
 260                 total = MAX(total, TARGET_OS_EMBEDDED ? 8 : 12);
 261         }
 262
 263         new.fbs_max_ref  = total;
 264         new.fbs_mem_bank = FIREHOSE_BANK_UNAVAIL_BIT - (total - 1);
 265         new.fbs_io_bank  = FIREHOSE_BANK_UNAVAIL_BIT -
 266                         MAX(3 * total / 8, 2 * io_streams);
 267         new.fbs_unused   = 0;
 268
 269         old = fbb->fbb_limits;
 270         fbb->fbb_limits = new;
 271         if (old.fbs_atomic_state == new.fbs_atomic_state) {
 272                 return;
 273         }
 274         os_atomic_add2o(&fb->fb_header, fbh_bank.fbb_state.fbs_atomic_state,
 275                         new.fbs_atomic_state - old.fbs_atomic_state, relaxed);
 276 }
 277 #endif // !KERNEL
 278
 279 firehose_buffer_t
 280 firehose_buffer_create(mach_port_t logd_port, uint64_t unique_pid,
 281                 unsigned long bank_flags)
 282 {
 283         firehose_buffer_header_t fbh;
 284         firehose_buffer_t fb;
 285
 286 #ifndef KERNEL
 287         mach_vm_address_t vm_addr = 0;
 288         kern_return_t kr;
 289
 290         vm_addr = vm_page_size;
 291         const size_t madvise_bytes = FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT *
 292                         FIREHOSE_CHUNK_SIZE;
 293         if (slowpath(madvise_bytes % PAGE_SIZE)) {
 294                 DISPATCH_INTERNAL_CRASH(madvise_bytes,
 295                                 "Invalid values for MADVISE_CHUNK_COUNT / CHUNK_SIZE");
 296         }
 297
 298         kr = mach_vm_map(mach_task_self(), &vm_addr, sizeof(*fb), 0,
 299                         VM_FLAGS_ANYWHERE | VM_FLAGS_PURGABLE |
 300                         VM_MAKE_TAG(VM_MEMORY_GENEALOGY), MEMORY_OBJECT_NULL, 0, FALSE,
 301                         VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_NONE);
 302         if (slowpath(kr)) {
 303                 if (kr != KERN_NO_SPACE) dispatch_assume_zero(kr);
 304                 firehose_mach_port_send_release(logd_port);
 305                 return NULL;
 306         }
 307
 308         uint32_t opts = MPO_CONTEXT_AS_GUARD | MPO_STRICT | MPO_INSERT_SEND_RIGHT;
 309 #else
 310         vm_offset_t vm_addr = 0;
 311         vm_size_t size;
 312
 313         size = FIREHOSE_BUFFER_KERNEL_CHUNK_COUNT * FIREHOSE_CHUNK_SIZE;
 314         __firehose_allocate(&vm_addr, size);
 315
 316         (void)logd_port; (void)unique_pid;
 317 #endif // KERNEL
 318
 319         fb = (firehose_buffer_t)vm_addr;
 320         fbh = &fb->fb_header;
 321 #ifndef KERNEL
 322         fbh->fbh_logd_port = logd_port;
 323         fbh->fbh_pid = getpid();
 324         fbh->fbh_uniquepid = unique_pid;
 325         fbh->fbh_recvp = firehose_mach_port_allocate(opts, fb);
 326 #endif // !KERNEL
 327         fbh->fbh_spi_version = OS_FIREHOSE_SPI_VERSION;
 328         fbh->fbh_bank.fbb_flags = bank_flags;
 329
 330 #ifndef KERNEL
 331         for (size_t i = 0; i < countof(fbh->fbh_stream); i++) {
 332                 firehose_buffer_stream_t fbs = fbh->fbh_stream + i;
 333                 if (i != firehose_stream_metadata) {
 334                         fbs->fbs_state.fss_current = FIREHOSE_STREAM_STATE_PRISTINE;
 335                 }
 336         }
 337         firehose_buffer_update_limits_unlocked(fb);
 338 #else
 339         uint16_t total = FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT + 1;
 340         const uint16_t num_kernel_io_pages = 8;
 341         uint16_t io_pages = num_kernel_io_pages;
 342         fbh->fbh_bank.fbb_state = (firehose_bank_state_u){
 343                 .fbs_max_ref = total,
 344                 .fbs_io_bank = FIREHOSE_BANK_UNAVAIL_BIT - io_pages,
 345                 .fbs_mem_bank = FIREHOSE_BANK_UNAVAIL_BIT - (total - io_pages - 1),
 346         };
 347         fbh->fbh_bank.fbb_limits = fbh->fbh_bank.fbb_state;
 348 #endif // KERNEL
 349
 350         // now pre-allocate some chunks in the ring directly
 351 #ifdef KERNEL
 352         const uint16_t pre_allocated = FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT - 1;
 353 #else
 354         const uint16_t pre_allocated = FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT;
 355 #endif
 356
 357         fbh->fbh_bank.fbb_bitmap = (1U << (1 + pre_allocated)) - 1;
 358
 359         for (uint16_t i = 0; i < pre_allocated; i++) {
 360                 fbh->fbh_mem_ring[i] = i + 1;
 361         }
 362         fbh->fbh_bank.fbb_mem_flushed = pre_allocated;
 363         fbh->fbh_ring_mem_head = pre_allocated;
 364
 365
 366 #ifdef KERNEL
 367         // install the early boot page as the current one for persist
 368         fbh->fbh_stream[firehose_stream_persist].fbs_state.fss_current =
 369                         FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT;
 370         fbh->fbh_bank.fbb_state.fbs_io_bank += 1;
 371 #endif
 372
 373         fbh->fbh_ring_tail = (firehose_ring_tail_u){
 374                 .frp_mem_flushed = pre_allocated,
 375         };
 376         return fb;
 377 }
 378
 379 #ifndef KERNEL
 380 static void
 381 firehose_notify_source_invoke(mach_msg_header_t *hdr)
 382 {
 383         const size_t reply_size =
 384                         sizeof(union __ReplyUnion__firehose_client_firehoseReply_subsystem);
 385
 386         firehose_mig_server(firehoseReply_server, reply_size, hdr);
 387 }
 388
 389 static void
 390 firehose_client_register_for_notifications(firehose_buffer_t fb)
 391 {
 392         static const struct dispatch_continuation_s dc = {
 393                 .dc_func = (void *)firehose_notify_source_invoke,
 394         };
 395         firehose_buffer_header_t fbh = &fb->fb_header;
 396
 397         dispatch_once(&fbh->fbh_notifs_pred, ^{
 398                 dispatch_source_t ds = _dispatch_source_create_mach_msg_direct_recv(
 399                                 fbh->fbh_recvp, &dc);
 400                 dispatch_set_context(ds, fb);
 401                 dispatch_activate(ds);
 402                 fbh->fbh_notifs_source = ds;
 403         });
 404 }
 405
 406 static void
 407 firehose_client_send_push_async(firehose_buffer_t fb, qos_class_t qos,
 408                 bool for_io)
 409 {
 410         bool ask_for_notifs = fb->fb_header.fbh_notifs_source != NULL;
 411         mach_port_t sendp = fb->fb_header.fbh_sendp;
 412         kern_return_t kr = KERN_FAILURE;
 413
 414         if (!ask_for_notifs && _dispatch_is_multithreaded_inline()) {
 415                 firehose_client_register_for_notifications(fb);
 416                 ask_for_notifs = true;
 417         }
 418
 419         if (slowpath(sendp == MACH_PORT_DEAD)) {
 420                 return;
 421         }
 422
 423         if (fastpath(sendp)) {
 424                 kr = firehose_send_push_async(sendp, qos, for_io, ask_for_notifs);
 425                 if (likely(kr == KERN_SUCCESS)) {
 426                         return;
 427                 }
 428                 if (kr != MACH_SEND_INVALID_DEST) {
 429                         DISPATCH_VERIFY_MIG(kr);
 430                         dispatch_assume_zero(kr);
 431                 }
 432         }
 433
 434         sendp = firehose_client_reconnect(fb, sendp);
 435         if (fastpath(MACH_PORT_VALID(sendp))) {
 436                 kr = firehose_send_push_async(sendp, qos, for_io, ask_for_notifs);
 437                 if (likely(kr == KERN_SUCCESS)) {
 438                         return;
 439                 }
 440                 if (kr != MACH_SEND_INVALID_DEST) {
 441                         DISPATCH_VERIFY_MIG(kr);
 442                         dispatch_assume_zero(kr);
 443                 }
 444         }
 445 }
 446 #endif // !KERNEL
 447
 448 static void
 449 firehose_client_merge_updates(firehose_buffer_t fb, bool async_notif,
 450                 firehose_push_reply_t reply, firehose_bank_state_u *state_out)
 451 {
 452         firehose_bank_state_u state;
 453         firehose_ring_tail_u otail, ntail;
 454         uint64_t old_flushed_pos, bank_updates;
 455         uint16_t io_delta = 0;
 456         uint16_t mem_delta = 0;
 457
 458         if (firehose_atomic_maxv2o(&fb->fb_header, fbh_bank.fbb_mem_flushed,
 459                         reply.fpr_mem_flushed_pos, &old_flushed_pos, relaxed)) {
 460                 mem_delta = (uint16_t)(reply.fpr_mem_flushed_pos - old_flushed_pos);
 461         }
 462         if (firehose_atomic_maxv2o(&fb->fb_header, fbh_bank.fbb_io_flushed,
 463                         reply.fpr_io_flushed_pos, &old_flushed_pos, relaxed)) {
 464                 io_delta = (uint16_t)(reply.fpr_io_flushed_pos - old_flushed_pos);
 465         }
 466 #ifndef KERNEL
 467         _dispatch_debug("client side: mem: +%d->%llx, io: +%d->%llx",
 468                         mem_delta, reply.fpr_mem_flushed_pos,
 469                         io_delta, reply.fpr_io_flushed_pos);
 470 #endif
 471
 472         if (!mem_delta && !io_delta) {
 473                 if (state_out) {
 474                         state_out->fbs_atomic_state = os_atomic_load2o(&fb->fb_header,
 475                                         fbh_bank.fbb_state.fbs_atomic_state, relaxed);
 476                 }
 477                 return;
 478         }
 479
 480         __firehose_critical_region_enter();
 481         os_atomic_rmw_loop2o(&fb->fb_header, fbh_ring_tail.frp_atomic_tail,
 482                         otail.frp_atomic_tail, ntail.frp_atomic_tail, relaxed, {
 483                 ntail = otail;
 484                 // overflow handles the generation wraps
 485                 ntail.frp_io_flushed += io_delta;
 486                 ntail.frp_mem_flushed += mem_delta;
 487         });
 488
 489         bank_updates = ((uint64_t)mem_delta << FIREHOSE_BANK_SHIFT(0)) |
 490                         ((uint64_t)io_delta << FIREHOSE_BANK_SHIFT(1));
 491         state.fbs_atomic_state = os_atomic_sub2o(&fb->fb_header,
 492                         fbh_bank.fbb_state.fbs_atomic_state, bank_updates, release);
 493         __firehose_critical_region_leave();
 494
 495         if (state_out) *state_out = state;
 496
 497         if (async_notif) {
 498                 if (io_delta) {
 499                         os_atomic_inc2o(&fb->fb_header, fbh_bank.fbb_io_notifs, relaxed);
 500                 }
 501                 if (mem_delta) {
 502                         os_atomic_inc2o(&fb->fb_header, fbh_bank.fbb_mem_notifs, relaxed);
 503                 }
 504         }
 505 }
 506
 507 #ifndef KERNEL
 508 static void
 509 firehose_client_send_push(firehose_buffer_t fb, bool for_io,
 510                 firehose_bank_state_u *state_out)
 511 {
 512         mach_port_t sendp = fb->fb_header.fbh_sendp;
 513         firehose_push_reply_t push_reply = { };
 514         qos_class_t qos = qos_class_self();
 515         kern_return_t kr;
 516
 517         if (slowpath(sendp == MACH_PORT_DEAD)) {
 518                 return;
 519         }
 520         if (fastpath(sendp)) {
 521                 kr = firehose_send_push(sendp, qos, for_io, &push_reply);
 522                 if (likely(kr == KERN_SUCCESS)) {
 523                         goto success;
 524                 }
 525                 if (kr != MACH_SEND_INVALID_DEST) {
 526                         DISPATCH_VERIFY_MIG(kr);
 527                         dispatch_assume_zero(kr);
 528                 }
 529         }
 530
 531         sendp = firehose_client_reconnect(fb, sendp);
 532         if (fastpath(MACH_PORT_VALID(sendp))) {
 533                 kr = firehose_send_push(sendp, qos, for_io, &push_reply);
 534                 if (likely(kr == KERN_SUCCESS)) {
 535                         goto success;
 536                 }
 537                 if (kr != MACH_SEND_INVALID_DEST) {
 538                         DISPATCH_VERIFY_MIG(kr);
 539                         dispatch_assume_zero(kr);
 540                 }
 541         }
 542
 543         if (state_out) {
 544                 state_out->fbs_atomic_state = os_atomic_load2o(&fb->fb_header,
 545                                 fbh_bank.fbb_state.fbs_atomic_state, relaxed);
 546         }
 547         return;
 548
 549 success:
 550         if (memcmp(&push_reply, &FIREHOSE_PUSH_REPLY_CORRUPTED,
 551                         sizeof(push_reply)) == 0) {
 552                 // TODO: find out the actual cause and log it
 553                 DISPATCH_CLIENT_CRASH(0, "Memory corruption in the logging buffers");
 554         }
 555
 556         if (for_io) {
 557                 os_atomic_inc2o(&fb->fb_header, fbh_bank.fbb_io_sync_pushes, relaxed);
 558         } else {
 559                 os_atomic_inc2o(&fb->fb_header, fbh_bank.fbb_mem_sync_pushes, relaxed);
 560         }
 561         // TODO <rdar://problem/22963876>
 562         //
 563         // use fbb_*_flushes and fbb_*_sync_pushes to decide to dynamically
 564         // allow using more buffers, if not under memory pressure.
 565         //
 566         // There only is a point for multithreaded clients if:
 567         // - enough samples (total_flushes above some limits)
 568         // - the ratio is really bad (a push per cycle is definitely a problem)
 569         return firehose_client_merge_updates(fb, false, push_reply, state_out);
 570 }
 571
 572 kern_return_t
 573 firehose_client_push_reply(mach_port_t req_port OS_UNUSED,
 574         kern_return_t rtc, firehose_push_reply_t push_reply OS_UNUSED)
 575 {
 576         DISPATCH_INTERNAL_CRASH(rtc, "firehose_push_reply should never be sent "
 577                         "to the buffer receive port");
 578 }
 579
 580 kern_return_t
 581 firehose_client_push_notify_async(mach_port_t server_port OS_UNUSED,
 582         firehose_push_reply_t push_reply)
 583 {
 584         // see _dispatch_source_merge_mach_msg_direct
 585         dispatch_queue_t dq = _dispatch_queue_get_current();
 586         firehose_buffer_t fb = dispatch_get_context(dq);
 587         firehose_client_merge_updates(fb, true, push_reply, NULL);
 588         return KERN_SUCCESS;
 589 }
 590
 591 #endif // !KERNEL
 592 #pragma mark -
 593 #pragma mark Buffer handling
 594
 595 #ifndef KERNEL
 596 void
 597 firehose_buffer_update_limits(firehose_buffer_t fb)
 598 {
 599         dispatch_unfair_lock_t fbb_lock = &fb->fb_header.fbh_bank.fbb_lock;
 600         _dispatch_unfair_lock_lock(fbb_lock);
 601         firehose_buffer_update_limits_unlocked(fb);
 602         _dispatch_unfair_lock_unlock(fbb_lock);
 603 }
 604 #endif // !KERNEL
 605
 606 OS_ALWAYS_INLINE
 607 static inline firehose_tracepoint_t
 608 firehose_buffer_chunk_init(firehose_chunk_t fc,
 609                 firehose_tracepoint_query_t ask, uint8_t **privptr)
 610 {
 611         const uint16_t ft_size = offsetof(struct firehose_tracepoint_s, ft_data);
 612
 613         uint16_t pub_offs = offsetof(struct firehose_chunk_s, fc_data);
 614         uint16_t priv_offs = FIREHOSE_CHUNK_SIZE;
 615
 616         pub_offs += roundup(ft_size + ask->pubsize, 8);
 617         priv_offs -= ask->privsize;
 618
 619         if (fc->fc_pos.fcp_atomic_pos) {
 620                 // Needed for process death handling (recycle-reuse):
 621                 // No atomic fences required, we merely want to make sure the observers
 622                 // will see memory effects in program (asm) order.
 623                 // 1. the payload part of the chunk is cleared completely
 624                 // 2. the chunk is marked as reused
 625                 // This ensures that if we don't see a reference to a chunk in the ring
 626                 // and it is dirty, when crawling the chunk, we don't see remnants of
 627                 // other tracepoints
 628                 //
 629                 // We only do that when the fc_pos is non zero, because zero means
 630                 // we just faulted the chunk, and the kernel already bzero-ed it.
 631                 bzero(fc->fc_data, sizeof(fc->fc_data));
 632         }
 633         dispatch_compiler_barrier();
 634         // <rdar://problem/23562733> boot starts mach absolute time at 0, and
 635         // wrapping around to values above UINT64_MAX - FIREHOSE_STAMP_SLOP
 636         // breaks firehose_buffer_stream_flush() assumptions
 637         if (ask->stamp > FIREHOSE_STAMP_SLOP) {
 638                 fc->fc_timestamp = ask->stamp - FIREHOSE_STAMP_SLOP;
 639         } else {
 640                 fc->fc_timestamp = 0;
 641         }
 642         fc->fc_pos = (firehose_chunk_pos_u){
 643                 .fcp_next_entry_offs = pub_offs,
 644                 .fcp_private_offs = priv_offs,
 645                 .fcp_refcnt = 1,
 646                 .fcp_qos = firehose_buffer_qos_bits_propagate(),
 647                 .fcp_stream = ask->stream,
 648                 .fcp_flag_io = ask->for_io,
 649         };
 650
 651         if (privptr) {
 652                 *privptr = fc->fc_start + priv_offs;
 653         }
 654         return (firehose_tracepoint_t)fc->fc_data;
 655 }
 656
 657 OS_NOINLINE
 658 static firehose_tracepoint_t
 659 firehose_buffer_stream_chunk_install(firehose_buffer_t fb,
 660                 firehose_tracepoint_query_t ask, uint8_t **privptr, uint16_t ref)
 661 {
 662         firehose_stream_state_u state, new_state;
 663         firehose_tracepoint_t ft;
 664         firehose_buffer_stream_t fbs = &fb->fb_header.fbh_stream[ask->stream];
 665         uint64_t stamp_and_len;
 666
 667         if (fastpath(ref)) {
 668                 firehose_chunk_t fc = firehose_buffer_ref_to_chunk(fb, ref);
 669                 ft = firehose_buffer_chunk_init(fc, ask, privptr);
 670                 // Needed for process death handling (tracepoint-begin):
 671                 // write the length before making the chunk visible
 672                 stamp_and_len  = ask->stamp - fc->fc_timestamp;
 673                 stamp_and_len |= (uint64_t)ask->pubsize << 48;
 674                 os_atomic_store2o(ft, ft_stamp_and_length, stamp_and_len, relaxed);
 675 #ifdef KERNEL
 676                 ft->ft_thread = thread_tid(current_thread());
 677 #else
 678                 ft->ft_thread = _pthread_threadid_self_np_direct();
 679 #endif
 680                 if (ask->stream == firehose_stream_metadata) {
 681                         os_atomic_or2o(fb, fb_header.fbh_bank.fbb_metadata_bitmap,
 682                                         1ULL << ref, relaxed);
 683                 }
 684                 // release barrier to make the chunk init visible
 685                 os_atomic_rmw_loop2o(fbs, fbs_state.fss_atomic_state,
 686                                 state.fss_atomic_state, new_state.fss_atomic_state, release, {
 687                         // We use a generation counter to prevent a theoretical ABA problem:
 688                         // a thread could try to acquire a tracepoint in a chunk, fail to
 689                         // do so mark it as to be pushed, enqueue it, and then be preempted
 690                         //
 691                         // It sleeps for a long time, and then tries to acquire the
 692                         // allocator bit and uninstalling the chunk. Succeeds in doing so,
 693                         // but because the chunk actually happened to have cycled all the
 694                         // way back to being installed. That thread would effectively hide
 695                         // that unflushed chunk and leak it.
 696                         //
 697                         // Having a generation counter prevents the uninstallation of the
 698                         // chunk to spuriously succeed when it was a re-incarnation of it.
 699                         new_state = (firehose_stream_state_u){
 700                                 .fss_current = ref,
 701                                 .fss_generation = state.fss_generation + 1,
 702                         };
 703                 });
 704         } else {
 705                 // the allocator gave up just clear the allocator + waiter bits
 706                 firehose_stream_state_u mask = { .fss_allocator = ~0u, };
 707                 state.fss_atomic_state = os_atomic_and_orig2o(fbs,
 708                                 fbs_state.fss_atomic_state, ~mask.fss_atomic_state, relaxed);
 709                 ft = NULL;
 710         }
 711
 712 #ifndef KERNEL
 713         if (unlikely(state.fss_gate.dgl_lock != _dispatch_tid_self())) {
 714                 _dispatch_gate_broadcast_slow(&fbs->fbs_state.fss_gate,
 715                                 state.fss_gate.dgl_lock);
 716         }
 717
 718         if (unlikely(state.fss_current == FIREHOSE_STREAM_STATE_PRISTINE)) {
 719                 firehose_buffer_update_limits(fb);
 720         }
 721 #endif // KERNEL
 722
 723         // pairs with the one in firehose_buffer_tracepoint_reserve()
 724         __firehose_critical_region_leave();
 725         return ft;
 726 }
 727
 728 #ifndef KERNEL
 729 OS_ALWAYS_INLINE
 730 static inline uint16_t
 731 firehose_buffer_ring_try_grow(firehose_buffer_bank_t fbb, uint16_t limit)
 732 {
 733         uint16_t ref = 0;
 734         uint64_t bitmap;
 735
 736         _dispatch_unfair_lock_lock(&fbb->fbb_lock);
 737         bitmap = ~(fbb->fbb_bitmap | (~0ULL << limit));
 738         if (bitmap) {
 739                 ref = firehose_bitmap_first_set(bitmap);
 740                 fbb->fbb_bitmap |= 1U << ref;
 741         }
 742         _dispatch_unfair_lock_unlock(&fbb->fbb_lock);
 743         return ref;
 744 }
 745
 746 OS_ALWAYS_INLINE
 747 static inline uint16_t
 748 firehose_buffer_ring_shrink(firehose_buffer_t fb, uint16_t ref)
 749 {
 750         const size_t madv_size =
 751                         FIREHOSE_CHUNK_SIZE * FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT;
 752         const size_t madv_mask =
 753                         (1ULL << FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT) - 1;
 754
 755         dispatch_unfair_lock_t fbb_lock = &fb->fb_header.fbh_bank.fbb_lock;
 756         uint64_t bitmap;
 757
 758         _dispatch_unfair_lock_lock(fbb_lock);
 759         if (ref < fb->fb_header.fbh_bank.fbb_limits.fbs_max_ref) {
 760                 goto done;
 761         }
 762
 763         bitmap = (fb->fb_header.fbh_bank.fbb_bitmap &= ~(1UL << ref));
 764         ref &= ~madv_mask;
 765         if ((bitmap & (madv_mask << ref)) == 0) {
 766                 // if MADVISE_WIDTH consecutive chunks are free, madvise them free
 767                 madvise(firehose_buffer_ref_to_chunk(fb, ref), madv_size, MADV_FREE);
 768         }
 769         ref = 0;
 770 done:
 771         _dispatch_unfair_lock_unlock(fbb_lock);
 772         return ref;
 773 }
 774 #endif // !KERNEL
 775
 776 OS_NOINLINE
 777 void
 778 firehose_buffer_ring_enqueue(firehose_buffer_t fb, uint16_t ref)
 779 {
 780         firehose_chunk_t fc = firehose_buffer_ref_to_chunk(fb, ref);
 781         uint16_t volatile *fbh_ring;
 782         uint16_t volatile *fbh_ring_head;
 783         uint16_t head, gen, dummy, idx;
 784         firehose_chunk_pos_u fc_pos = fc->fc_pos;
 785         bool for_io = fc_pos.fcp_flag_io;
 786
 787         if (for_io) {
 788                 fbh_ring = fb->fb_header.fbh_io_ring;
 789                 fbh_ring_head = &fb->fb_header.fbh_ring_io_head;
 790         } else {
 791                 fbh_ring = fb->fb_header.fbh_mem_ring;
 792                 fbh_ring_head = &fb->fb_header.fbh_ring_mem_head;
 793         }
 794
 795 #ifdef KERNEL
 796         // The algorithm in the kernel is simpler:
 797         //  1. reserve a write position for the head
 798         //  2. store the new reference at that position
 799         // Enqueuers can't starve each other that way.
 800         //
 801         // However, the dequeuers now have to sometimes wait for the value written
 802         // in the ring to appear and have to spin, which is okay since the kernel
 803         // disables preemption around these two consecutive atomic operations.
 804         // See firehose_client_drain.
 805         __firehose_critical_region_enter();
 806         head = os_atomic_inc_orig(fbh_ring_head, relaxed);
 807         gen = head & FIREHOSE_RING_POS_GEN_MASK;
 808         idx = head & FIREHOSE_RING_POS_IDX_MASK;
 809
 810         while (unlikely(!os_atomic_cmpxchgvw(&fbh_ring[idx], gen, gen | ref, &dummy,
 811                         relaxed))) {
 812                 // can only ever happen if a recycler is slow, this requires having
 813                 // enough cores (>5 for I/O e.g.)
 814                 _dispatch_wait_until(fbh_ring[idx] == gen);
 815         }
 816         __firehose_critical_region_leave();
 817         __firehose_buffer_push_to_logd(fb, for_io);
 818 #else
 819         // The algorithm is:
 820         //   1. read the head position
 821         //   2. cmpxchg head.gen with the (head.gen | ref) at head.idx
 822         //   3. if it fails wait until either the head cursor moves,
 823         //      or the cell becomes free
 824         //
 825         // The most likely stall at (3) is because another enqueuer raced us
 826         // and made the cell non empty.
 827         //
 828         // The alternative is to reserve the enqueue slot with an atomic inc.
 829         // Then write the ref into the ring. This would be much simpler as the
 830         // generation packing wouldn't be required (though setting the ring cell
 831         // would still need a cmpxchg loop to avoid clobbering values of slow
 832         // dequeuers)
 833         //
 834         // But then that means that flushers (logd) could be starved until that
 835         // finishes, and logd cannot be held forever (that could even be a logd
 836         // DoS from malicious programs). Meaning that logd would stop draining
 837         // buffer queues when encountering that issue, leading the program to be
 838         // stuck in firehose_client_push() apparently waiting on logd, while
 839         // really it's waiting on itself. It's better for the scheduler if we
 840         // make it clear that we're waiting on ourselves!
 841
 842         head = os_atomic_load(fbh_ring_head, relaxed);
 843         for (;;) {
 844                 gen = head & FIREHOSE_RING_POS_GEN_MASK;
 845                 idx = head & FIREHOSE_RING_POS_IDX_MASK;
 846
 847                 // a thread being preempted here for GEN_MASK worth of ring rotations,
 848                 // it could lead to the cmpxchg succeed, and have a bogus enqueue
 849                 // (confused enqueuer)
 850                 if (fastpath(os_atomic_cmpxchgvw(&fbh_ring[idx], gen, gen | ref, &dummy,
 851                                 relaxed))) {
 852                         if (fastpath(os_atomic_cmpxchgv(fbh_ring_head, head, head + 1,
 853                                         &head, release))) {
 854                                 __firehose_critical_region_leave();
 855                                 break;
 856                         }
 857                         // this thread is a confused enqueuer, need to undo enqueue
 858                         os_atomic_store(&fbh_ring[idx], gen, relaxed);
 859                         continue;
 860                 }
 861
 862                 _dispatch_wait_until(({
 863                         // wait until either the head moves (another enqueuer is done)
 864                         // or (not very likely) a recycler is very slow
 865                         // or (very unlikely) the confused thread undoes its enqueue
 866                         uint16_t old_head = head;
 867                         head = *fbh_ring_head;
 868                         head != old_head || fbh_ring[idx] == gen;
 869                 }));
 870         }
 871
 872         pthread_priority_t pp = fc_pos.fcp_qos;
 873         pp <<= _PTHREAD_PRIORITY_QOS_CLASS_SHIFT;
 874         firehose_client_send_push_async(fb, _pthread_qos_class_decode(pp, NULL, NULL),
 875                         for_io);
 876 #endif
 877 }
 878
 879 #ifndef KERNEL
 880 void
 881 firehose_buffer_force_connect(firehose_buffer_t fb)
 882 {
 883         mach_port_t sendp = fb->fb_header.fbh_sendp;
 884         if (sendp == MACH_PORT_NULL) firehose_client_reconnect(fb, MACH_PORT_NULL);
 885 }
 886 #endif
 887
 888 OS_ALWAYS_INLINE
 889 static inline uint16_t
 890 firehose_buffer_ring_try_recycle(firehose_buffer_t fb)
 891 {
 892         firehose_ring_tail_u pos, old;
 893         uint16_t volatile *fbh_ring;
 894         uint16_t gen, ref, entry, tail;
 895         firehose_chunk_t fc;
 896         bool for_io;
 897
 898         os_atomic_rmw_loop2o(&fb->fb_header, fbh_ring_tail.frp_atomic_tail,
 899                         old.frp_atomic_tail, pos.frp_atomic_tail, relaxed, {
 900                 pos = old;
 901                 if (fastpath(old.frp_mem_tail != old.frp_mem_flushed)) {
 902                         pos.frp_mem_tail++;
 903                 } else if (fastpath(old.frp_io_tail != old.frp_io_flushed)) {
 904                         pos.frp_io_tail++;
 905                 } else {
 906                         os_atomic_rmw_loop_give_up(return 0);
 907                 }
 908         });
 909
 910         // there's virtually no chance that the lack of acquire barrier above
 911         // lets us read a value from the ring so stale that it's still an Empty
 912         // marker. For correctness purposes have a cheap loop that should never
 913         // really loop, instead of an acquire barrier in the cmpxchg above.
 914         for_io = (pos.frp_io_tail != old.frp_io_tail);
 915         if (for_io) {
 916                 fbh_ring = fb->fb_header.fbh_io_ring;
 917                 tail = old.frp_io_tail & FIREHOSE_RING_POS_IDX_MASK;
 918         } else {
 919                 fbh_ring = fb->fb_header.fbh_mem_ring;
 920                 tail = old.frp_mem_tail & FIREHOSE_RING_POS_IDX_MASK;
 921         }
 922         _dispatch_wait_until((entry = fbh_ring[tail]) & FIREHOSE_RING_POS_IDX_MASK);
 923
 924         // Needed for process death handling (recycle-dequeue):
 925         // No atomic fences required, we merely want to make sure the observers
 926         // will see memory effects in program (asm) order.
 927         // 1. the chunk is marked as "void&full" (clobbering the pos with FULL_BIT)
 928         // 2. then we remove any reference to the chunk from the ring
 929         // This ensures that if we don't see a reference to a chunk in the ring
 930         // and it is dirty, it is a chunk being written to that needs a flush
 931         gen = (entry & FIREHOSE_RING_POS_GEN_MASK) + FIREHOSE_RING_POS_GEN_INC;
 932         ref = entry & FIREHOSE_RING_POS_IDX_MASK;
 933         fc = firehose_buffer_ref_to_chunk(fb, ref);
 934
 935         if (!for_io && fc->fc_pos.fcp_stream == firehose_stream_metadata) {
 936                 os_atomic_and2o(fb, fb_header.fbh_bank.fbb_metadata_bitmap,
 937                                 ~(1ULL << ref), relaxed);
 938         }
 939         os_atomic_store2o(fc, fc_pos.fcp_atomic_pos,
 940                         FIREHOSE_CHUNK_POS_FULL_BIT, relaxed);
 941         dispatch_compiler_barrier();
 942         os_atomic_store(&fbh_ring[tail], gen | 0, relaxed);
 943         return ref;
 944 }
 945
 946 #ifndef KERNEL
 947 OS_NOINLINE
 948 static firehose_tracepoint_t
 949 firehose_buffer_tracepoint_reserve_slow2(firehose_buffer_t fb,
 950                 firehose_tracepoint_query_t ask, uint8_t **privptr, uint16_t ref)
 951 {
 952         const uint64_t bank_unavail_mask = FIREHOSE_BANK_UNAVAIL_MASK(ask->for_io);
 953         const uint64_t bank_inc = FIREHOSE_BANK_INC(ask->for_io);
 954         firehose_buffer_bank_t const fbb = &fb->fb_header.fbh_bank;
 955         firehose_bank_state_u state;
 956         uint16_t fbs_max_ref;
 957
 958         // first wait for our bank to have space, if needed
 959         if (!fastpath(ask->is_bank_ok)) {
 960                 state.fbs_atomic_state =
 961                                 os_atomic_load2o(fbb, fbb_state.fbs_atomic_state, relaxed);
 962                 while ((state.fbs_atomic_state - bank_inc) & bank_unavail_mask) {
 963                         firehose_client_send_push(fb, ask->for_io, &state);
 964                         if (slowpath(fb->fb_header.fbh_sendp == MACH_PORT_DEAD)) {
 965                                 // logd was unloaded, give up
 966                                 return NULL;
 967                         }
 968                 }
 969                 ask->is_bank_ok = true;
 970                 fbs_max_ref = state.fbs_max_ref;
 971         } else {
 972                 fbs_max_ref = fbb->fbb_state.fbs_max_ref;
 973         }
 974
 975         // second, if we were passed a chunk, we may need to shrink
 976         if (slowpath(ref)) {
 977                 goto try_shrink;
 978         }
 979
 980         // third, wait for a chunk to come up, and if not, wait on the daemon
 981         for (;;) {
 982                 if (fastpath(ref = firehose_buffer_ring_try_recycle(fb))) {
 983                 try_shrink:
 984                         if (slowpath(ref >= fbs_max_ref)) {
 985                                 ref = firehose_buffer_ring_shrink(fb, ref);
 986                                 if (!ref) {
 987                                         continue;
 988                                 }
 989                         }
 990                         break;
 991                 }
 992                 if (fastpath(ref = firehose_buffer_ring_try_grow(fbb, fbs_max_ref))) {
 993                         break;
 994                 }
 995                 firehose_client_send_push(fb, ask->for_io, NULL);
 996                 if (slowpath(fb->fb_header.fbh_sendp == MACH_PORT_DEAD)) {
 997                         // logd was unloaded, give up
 998                         break;
 999                 }
1000         }
1001
1002         return firehose_buffer_stream_chunk_install(fb, ask, privptr, ref);
1003 }
1004 #else
1005 static inline dispatch_lock
1006 _dispatch_gate_lock_load_seq_cst(dispatch_gate_t l)
1007 {
1008         return os_atomic_load(&l->dgl_lock, seq_cst);
1009 }
1010 OS_NOINLINE
1011 static void
1012 _dispatch_gate_wait(dispatch_gate_t l, uint32_t flags)
1013 {
1014         (void)flags;
1015         _dispatch_wait_until(_dispatch_gate_lock_load_seq_cst(l) == 0);
1016 }
1017 #endif // KERNEL
1018
1019 firehose_tracepoint_t
1020 firehose_buffer_tracepoint_reserve_slow(firehose_buffer_t fb,
1021                 firehose_tracepoint_query_t ask, uint8_t **privptr)
1022 {
1023         const unsigned for_io = ask->for_io;
1024         const firehose_buffer_bank_t fbb = &fb->fb_header.fbh_bank;
1025         firehose_bank_state_u state;
1026         uint16_t ref = 0;
1027
1028         uint64_t unavail_mask = FIREHOSE_BANK_UNAVAIL_MASK(for_io);
1029 #ifndef KERNEL
1030         state.fbs_atomic_state = os_atomic_add_orig2o(fbb,
1031                         fbb_state.fbs_atomic_state, FIREHOSE_BANK_INC(for_io), acquire);
1032         if (fastpath(!(state.fbs_atomic_state & unavail_mask))) {
1033                 ask->is_bank_ok = true;
1034                 if (fastpath(ref = firehose_buffer_ring_try_recycle(fb))) {
1035                         if (fastpath(ref < state.fbs_max_ref)) {
1036                                 return firehose_buffer_stream_chunk_install(fb, ask,
1037                                                 privptr, ref);
1038                         }
1039                 }
1040         }
1041         return firehose_buffer_tracepoint_reserve_slow2(fb, ask, privptr, ref);
1042 #else
1043         firehose_bank_state_u value;
1044         ask->is_bank_ok = os_atomic_rmw_loop2o(fbb, fbb_state.fbs_atomic_state,
1045                         state.fbs_atomic_state, value.fbs_atomic_state, acquire, {
1046                 value = state;
1047                 if (slowpath((value.fbs_atomic_state & unavail_mask) != 0)) {
1048                         os_atomic_rmw_loop_give_up(break);
1049                 }
1050                 value.fbs_atomic_state += FIREHOSE_BANK_INC(for_io);
1051         });
1052         if (ask->is_bank_ok) {
1053                 ref = firehose_buffer_ring_try_recycle(fb);
1054                 if (slowpath(ref == 0)) {
1055                         // the kernel has no overlap between I/O and memory chunks,
1056                         // having an available bank slot means we should be able to recycle
1057                         DISPATCH_INTERNAL_CRASH(0, "Unable to recycle a chunk");
1058                 }
1059         }
1060         // rdar://25137005 installing `0` unlocks the allocator
1061         return firehose_buffer_stream_chunk_install(fb, ask, privptr, ref);
1062 #endif // KERNEL
1063 }
1064
1065 #ifdef KERNEL
1066 firehose_tracepoint_t
1067 __firehose_buffer_tracepoint_reserve(uint64_t stamp, firehose_stream_t stream,
1068                 uint16_t pubsize, uint16_t privsize, uint8_t **privptr)
1069 {
1070         firehose_buffer_t fb = kernel_firehose_buffer;
1071         if (!fastpath(fb)) {
1072                 return NULL;
1073         }
1074         return firehose_buffer_tracepoint_reserve(fb, stamp, stream, pubsize,
1075                         privsize, privptr);
1076 }
1077
1078 firehose_buffer_t
1079 __firehose_buffer_create(size_t *size)
1080 {
1081         if (!kernel_firehose_buffer) {
1082                 kernel_firehose_buffer = firehose_buffer_create(MACH_PORT_NULL, 0, 0);
1083         }
1084
1085         if (size) {
1086                 *size = FIREHOSE_BUFFER_KERNEL_CHUNK_COUNT * FIREHOSE_CHUNK_SIZE;
1087         }
1088         return kernel_firehose_buffer;
1089 }
1090
1091 void
1092 __firehose_buffer_tracepoint_flush(firehose_tracepoint_t ft,
1093                 firehose_tracepoint_id_u ftid)
1094 {
1095         return firehose_buffer_tracepoint_flush(kernel_firehose_buffer, ft, ftid);
1096 }
1097
1098 void
1099 __firehose_merge_updates(firehose_push_reply_t update)
1100 {
1101         firehose_buffer_t fb = kernel_firehose_buffer;
1102         if (fastpath(fb)) {
1103                 firehose_client_merge_updates(fb, true, update, NULL);
1104         }
1105 }
1106 #endif // KERNEL
1107
1108 #endif // OS_FIREHOSE_SPI