src/firehose/firehose_buffer.c

   1 /*
   2  * Copyright (c) 2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_APACHE_LICENSE_HEADER_START@
   5  *
   6  * Licensed under the Apache License, Version 2.0 (the "License");
   7  * you may not use this file except in compliance with the License.
   8  * You may obtain a copy of the License at
   9  *
  10  *     http://www.apache.org/licenses/LICENSE-2.0
  11  *
  12  * Unless required by applicable law or agreed to in writing, software
  13  * distributed under the License is distributed on an "AS IS" BASIS,
  14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15  * See the License for the specific language governing permissions and
  16  * limitations under the License.
  17  *
  18  * @APPLE_APACHE_LICENSE_HEADER_END@
  19  */
  20
  21 #include <mach/vm_statistics.h> // VM_MEMORY_GENEALOGY
  22 #ifdef KERNEL
  23
  24 #define OS_VOUCHER_ACTIVITY_SPI_TYPES 1
  25 #define OS_FIREHOSE_SPI 1
  26 #define __OS_EXPOSE_INTERNALS_INDIRECT__ 1
  27
  28 #define DISPATCH_PURE_C 1
  29 #define _safe_cast_to_long(x) \
  30                 ({ _Static_assert(sizeof(typeof(x)) <= sizeof(long), \
  31                                 "__builtin_expect doesn't support types wider than long"); \
  32                                 (long)(x); })
  33 #define fastpath(x) ((typeof(x))__builtin_expect(_safe_cast_to_long(x), ~0l))
  34 #define slowpath(x) ((typeof(x))__builtin_expect(_safe_cast_to_long(x), 0l))
  35 #define os_likely(x) __builtin_expect(!!(x), 1)
  36 #define os_unlikely(x) __builtin_expect(!!(x), 0)
  37 #define likely(x)   __builtin_expect(!!(x), 1)
  38 #define unlikely(x) __builtin_expect(!!(x), 0)
  39
  40 #define DISPATCH_INTERNAL_CRASH(ac, msg) ({ panic(msg); __builtin_trap(); })
  41
  42 #if defined(__x86_64__) || defined(__i386__)
  43 #define dispatch_hardware_pause() __asm__("pause")
  44 #elif (defined(__arm__) && defined(_ARM_ARCH_7) && defined(__thumb__)) || \
  45                 defined(__arm64__)
  46 #define dispatch_hardware_pause() __asm__("yield")
  47 #define dispatch_hardware_wfe()   __asm__("wfe")
  48 #else
  49 #define dispatch_hardware_pause() __asm__("")
  50 #endif
  51
  52 #define _dispatch_wait_until(c) do { \
  53                 while (!fastpath(c)) { \
  54                         dispatch_hardware_pause(); \
  55                 } } while (0)
  56 #define dispatch_compiler_barrier()  __asm__ __volatile__("" ::: "memory")
  57
  58 typedef uint32_t dispatch_lock;
  59 typedef struct dispatch_gate_s {
  60         dispatch_lock dgl_lock;
  61 } dispatch_gate_s, *dispatch_gate_t;
  62 #define DLOCK_LOCK_DATA_CONTENTION 0
  63 static void _dispatch_gate_wait(dispatch_gate_t l, uint32_t flags);
  64
  65 #include <kern/debug.h>
  66 #include <machine/cpu_number.h>
  67 #include <kern/thread.h>
  68 #include <mach/port.h>
  69 #include <stdbool.h>
  70 #include <string.h>
  71 #include <sys/param.h>
  72 #include <sys/types.h>
  73 #include <vm/vm_kern.h>
  74 #include <firehose_types_private.h> // <firehose/firehose_types_private.h>
  75 #include <tracepoint_private.h> // <firehose/tracepoint_private.h>
  76 #include <internal/atomic.h> // os/internal/atomic.h
  77 #include "os/firehose_buffer_private.h"
  78 #include "firehose_buffer_internal.h"
  79 #include "firehose_inline_internal.h"
  80 #else
  81 #include "internal.h"
  82 #include "firehose.h" // MiG
  83 #include "firehose_replyServer.h" // MiG
  84 #endif
  85
  86 #if OS_FIREHOSE_SPI
  87
  88 #if __has_feature(c_static_assert)
  89 _Static_assert(sizeof(((firehose_stream_state_u *)NULL)->fss_gate) ==
  90                 sizeof(((firehose_stream_state_u *)NULL)->fss_allocator),
  91                 "fss_gate and fss_allocator alias");
  92 _Static_assert(offsetof(firehose_stream_state_u, fss_gate) ==
  93                 offsetof(firehose_stream_state_u, fss_allocator),
  94                 "fss_gate and fss_allocator alias");
  95 _Static_assert(sizeof(struct firehose_buffer_header_s) ==
  96                                 FIREHOSE_BUFFER_CHUNK_SIZE,
  97                 "firehose buffer header must be 4k");
  98 _Static_assert(offsetof(struct firehose_buffer_header_s, fbh_unused) <=
  99                                 FIREHOSE_BUFFER_CHUNK_SIZE - FIREHOSE_BUFFER_LIBTRACE_HEADER_SIZE,
 100                 "we must have enough space for the libtrace header");
 101 _Static_assert(sizeof(struct firehose_buffer_chunk_s) ==
 102                                 FIREHOSE_BUFFER_CHUNK_SIZE,
 103                 "firehose buffer chunks must be 4k");
 104 _Static_assert(powerof2(FIREHOSE_BUFFER_CHUNK_COUNT),
 105                 "CHUNK_COUNT Must be a power of two");
 106 _Static_assert(FIREHOSE_BUFFER_CHUNK_COUNT <= 64,
 107                 "CHUNK_COUNT must be less than 64 (bitmap in uint64_t)");
 108 #ifdef FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT
 109 _Static_assert(powerof2(FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT),
 110                 "madvise chunk count must be a power of two");
 111 #endif
 112 _Static_assert(howmany(sizeof(struct firehose_tracepoint_s),
 113                 sizeof(struct firehose_buffer_chunk_s)) < 255,
 114                 "refcount assumes that you cannot have more than 255 tracepoints");
 115 // FIXME: we should have an event-count instead here
 116 _Static_assert(sizeof(struct firehose_buffer_stream_s) == 128,
 117                 "firehose buffer stream must be small (single cacheline if possible)");
 118 _Static_assert(offsetof(struct firehose_buffer_chunk_s, fbc_data) % 8 == 0,
 119                 "Page header is 8 byte aligned");
 120 _Static_assert(sizeof(struct firehose_tracepoint_s) == 24,
 121                 "tracepoint header should be exactly 24 bytes");
 122 #endif
 123
 124 #ifdef KERNEL
 125 static firehose_buffer_t kernel_firehose_buffer = NULL;
 126 #endif
 127
 128 #pragma mark -
 129 #pragma mark Client IPC to the log daemon
 130 #ifndef KERNEL
 131
 132 static mach_port_t
 133 firehose_client_reconnect(firehose_buffer_t fb, mach_port_t oldsendp)
 134 {
 135         mach_port_t sendp = MACH_PORT_NULL;
 136         mach_port_t mem_port = MACH_PORT_NULL, extra_info_port = MACH_PORT_NULL;
 137         mach_vm_size_t extra_info_size = 0;
 138         kern_return_t kr;
 139
 140         dispatch_assert(fb->fb_header.fbh_logd_port);
 141         dispatch_assert(fb->fb_header.fbh_recvp);
 142         dispatch_assert(fb->fb_header.fbh_uniquepid != 0);
 143
 144         _dispatch_unfair_lock_lock(&fb->fb_header.fbh_logd_lock);
 145         sendp = fb->fb_header.fbh_sendp;
 146         if (sendp != oldsendp || sendp == MACH_PORT_DEAD) {
 147                 // someone beat us to reconnecting or logd was unloaded, just go away
 148                 goto unlock;
 149         }
 150
 151         if (oldsendp) {
 152                 // same trick as _xpc_pipe_dispose: keeping a send right
 153                 // maintains the name, so that we can destroy the receive right
 154                 // in case we still have it.
 155                 (void)firehose_mach_port_recv_dispose(oldsendp, fb);
 156                 firehose_mach_port_send_release(oldsendp);
 157                 fb->fb_header.fbh_sendp = MACH_PORT_NULL;
 158         }
 159
 160         /* Create a memory port for the buffer VM region */
 161         vm_prot_t flags = VM_PROT_READ | MAP_MEM_VM_SHARE;
 162         memory_object_size_t size = sizeof(union firehose_buffer_u);
 163         mach_vm_address_t addr = (vm_address_t)fb;
 164
 165         kr = mach_make_memory_entry_64(mach_task_self(), &size, addr,
 166                         flags, &mem_port, MACH_PORT_NULL);
 167         if (size < sizeof(union firehose_buffer_u)) {
 168                 DISPATCH_CLIENT_CRASH(size, "Invalid size for the firehose buffer");
 169         }
 170         if (unlikely(kr)) {
 171                 // the client probably has some form of memory corruption
 172                 // and/or a port leak
 173                 DISPATCH_CLIENT_CRASH(kr, "Unable to make memory port");
 174         }
 175
 176         /* Create a communication port to the logging daemon */
 177         uint32_t opts = MPO_CONTEXT_AS_GUARD | MPO_TEMPOWNER | MPO_INSERT_SEND_RIGHT;
 178         sendp = firehose_mach_port_allocate(opts, fb);
 179
 180         if (oldsendp && _voucher_libtrace_hooks->vah_version >= 3) {
 181                 if (_voucher_libtrace_hooks->vah_get_reconnect_info) {
 182                         kr = _voucher_libtrace_hooks->vah_get_reconnect_info(&addr, &size);
 183                         if (likely(kr == KERN_SUCCESS) && addr && size) {
 184                                 extra_info_size = size;
 185                                 kr = mach_make_memory_entry_64(mach_task_self(), &size, addr,
 186                                                 flags, &extra_info_port, MACH_PORT_NULL);
 187                                 if (unlikely(kr)) {
 188                                         // the client probably has some form of memory corruption
 189                                         // and/or a port leak
 190                                         DISPATCH_CLIENT_CRASH(kr, "Unable to make memory port");
 191                                 }
 192                                 kr = mach_vm_deallocate(mach_task_self(), addr, size);
 193                                 (void)dispatch_assume_zero(kr);
 194                         }
 195                 }
 196         }
 197
 198         /* Call the firehose_register() MIG routine */
 199         kr = firehose_send_register(fb->fb_header.fbh_logd_port, mem_port,
 200                         sizeof(union firehose_buffer_u), sendp, fb->fb_header.fbh_recvp,
 201                         extra_info_port, extra_info_size);
 202         if (likely(kr == KERN_SUCCESS)) {
 203                 fb->fb_header.fbh_sendp = sendp;
 204         } else if (unlikely(kr == MACH_SEND_INVALID_DEST)) {
 205                 // MACH_SEND_INVALID_DEST here means that logd's boostrap port
 206                 // turned into a dead name, which in turn means that logd has been
 207                 // unloaded. The only option here, is to give up permanently.
 208                 //
 209                 // same trick as _xpc_pipe_dispose: keeping a send right
 210                 // maintains the name, so that we can destroy the receive right
 211                 // in case we still have it.
 212                 (void)firehose_mach_port_recv_dispose(sendp, fb);
 213                 firehose_mach_port_send_release(sendp);
 214                 firehose_mach_port_send_release(mem_port);
 215                 if (extra_info_port) firehose_mach_port_send_release(extra_info_port);
 216                 sendp = fb->fb_header.fbh_sendp = MACH_PORT_DEAD;
 217         } else {
 218                 // the client probably has some form of memory corruption
 219                 // and/or a port leak
 220                 DISPATCH_CLIENT_CRASH(kr, "Unable to register with logd");
 221         }
 222
 223 unlock:
 224         _dispatch_unfair_lock_unlock(&fb->fb_header.fbh_logd_lock);
 225         return sendp;
 226 }
 227
 228 static void
 229 firehose_buffer_update_limits_unlocked(firehose_buffer_t fb)
 230 {
 231         firehose_bank_state_u old, new;
 232         firehose_buffer_bank_t fbb = &fb->fb_header.fbh_bank;
 233         unsigned long fbb_flags = fbb->fbb_flags;
 234         uint16_t io_streams = 0, mem_streams = 0;
 235         uint16_t total = 0;
 236
 237         for (size_t i = 0; i < countof(fb->fb_header.fbh_stream); i++) {
 238                 firehose_buffer_stream_t fbs = fb->fb_header.fbh_stream + i;
 239
 240                 if (fbs->fbs_state.fss_current == FIREHOSE_STREAM_STATE_PRISTINE) {
 241                         continue;
 242                 }
 243                 if ((1UL << i) & firehose_stream_uses_io_bank) {
 244                         io_streams++;
 245                 } else {
 246                         mem_streams++;
 247                 }
 248         }
 249
 250         if (fbb_flags & FIREHOSE_BUFFER_BANK_FLAG_LOW_MEMORY) {
 251                 if (fbb_flags & FIREHOSE_BUFFER_BANK_FLAG_HIGH_RATE) {
 252                         total = 1 + 4 * mem_streams + io_streams;               // usually 10
 253                 } else {
 254                         total = 1 + 2 + mem_streams + io_streams;               // usually 6
 255                 }
 256         } else {
 257                 if (fbb_flags & FIREHOSE_BUFFER_BANK_FLAG_HIGH_RATE) {
 258                         total = 1 + 6 * mem_streams + 3 * io_streams;   // usually 16
 259                 } else {
 260                         total = 1 + 2 * (mem_streams + io_streams);             // usually 7
 261                 }
 262         }
 263
 264         uint16_t ratio = (uint16_t)(PAGE_SIZE / FIREHOSE_BUFFER_CHUNK_SIZE);
 265         if (ratio > 1) {
 266                 total = roundup(total, ratio);
 267         }
 268         total = MAX(total, FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT);
 269         if (!(fbb_flags & FIREHOSE_BUFFER_BANK_FLAG_LOW_MEMORY)) {
 270                 total = MAX(total, TARGET_OS_EMBEDDED ? 8 : 12);
 271         }
 272
 273         new.fbs_max_ref  = total;
 274         new.fbs_mem_bank = FIREHOSE_BANK_UNAVAIL_BIT - (total - 1);
 275         new.fbs_io_bank  = FIREHOSE_BANK_UNAVAIL_BIT -
 276                         MAX(3 * total / 8, 2 * io_streams);
 277         new.fbs_unused   = 0;
 278
 279         old = fbb->fbb_limits;
 280         fbb->fbb_limits = new;
 281         if (old.fbs_atomic_state == new.fbs_atomic_state) {
 282                 return;
 283         }
 284         os_atomic_add2o(&fb->fb_header, fbh_bank.fbb_state.fbs_atomic_state,
 285                         new.fbs_atomic_state - old.fbs_atomic_state, relaxed);
 286 }
 287 #endif // !KERNEL
 288
 289 firehose_buffer_t
 290 firehose_buffer_create(mach_port_t logd_port, uint64_t unique_pid,
 291                 unsigned long bank_flags)
 292 {
 293         firehose_buffer_header_t fbh;
 294         firehose_buffer_t fb;
 295
 296 #ifndef KERNEL
 297         mach_vm_address_t vm_addr = 0;
 298         kern_return_t kr;
 299
 300         vm_addr = vm_page_size;
 301         const size_t madvise_bytes = FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT *
 302                         FIREHOSE_BUFFER_CHUNK_SIZE;
 303         if (slowpath(madvise_bytes % PAGE_SIZE)) {
 304                 DISPATCH_INTERNAL_CRASH(madvise_bytes,
 305                                 "Invalid values for MADVISE_CHUNK_COUNT / CHUNK_SIZE");
 306         }
 307
 308         kr = mach_vm_map(mach_task_self(), &vm_addr, sizeof(*fb), 0,
 309                         VM_FLAGS_ANYWHERE | VM_FLAGS_PURGABLE |
 310                         VM_MAKE_TAG(VM_MEMORY_GENEALOGY), MEMORY_OBJECT_NULL, 0, FALSE,
 311                         VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_NONE);
 312         if (slowpath(kr)) {
 313                 if (kr != KERN_NO_SPACE) dispatch_assume_zero(kr);
 314                 firehose_mach_port_send_release(logd_port);
 315                 return NULL;
 316         }
 317
 318         uint32_t opts = MPO_CONTEXT_AS_GUARD | MPO_STRICT | MPO_INSERT_SEND_RIGHT;
 319 #else
 320         vm_offset_t vm_addr = 0;
 321         vm_size_t size;
 322
 323         size = FIREHOSE_BUFFER_KERNEL_CHUNK_COUNT * FIREHOSE_BUFFER_CHUNK_SIZE;
 324         __firehose_allocate(&vm_addr, size);
 325
 326         (void)logd_port; (void)unique_pid;
 327 #endif // KERNEL
 328
 329         fb = (firehose_buffer_t)vm_addr;
 330         fbh = &fb->fb_header;
 331 #ifndef KERNEL
 332         fbh->fbh_logd_port = logd_port;
 333         fbh->fbh_pid = getpid();
 334         fbh->fbh_uniquepid = unique_pid;
 335         fbh->fbh_recvp = firehose_mach_port_allocate(opts, fb);
 336 #endif // !KERNEL
 337         fbh->fbh_spi_version = OS_FIREHOSE_SPI_VERSION;
 338         fbh->fbh_bank.fbb_flags = bank_flags;
 339
 340 #ifndef KERNEL
 341         for (size_t i = 0; i < countof(fbh->fbh_stream); i++) {
 342                 firehose_buffer_stream_t fbs = fbh->fbh_stream + i;
 343                 if (i != firehose_stream_metadata) {
 344                         fbs->fbs_state.fss_current = FIREHOSE_STREAM_STATE_PRISTINE;
 345                 }
 346         }
 347         firehose_buffer_update_limits_unlocked(fb);
 348 #else
 349         uint16_t total = FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT + 1;
 350         const uint16_t num_kernel_io_pages = 8;
 351         uint16_t io_pages = num_kernel_io_pages;
 352         fbh->fbh_bank.fbb_state = (firehose_bank_state_u){
 353                 .fbs_max_ref = total,
 354                 .fbs_io_bank = FIREHOSE_BANK_UNAVAIL_BIT - io_pages,
 355                 .fbs_mem_bank = FIREHOSE_BANK_UNAVAIL_BIT - (total - io_pages - 1),
 356         };
 357         fbh->fbh_bank.fbb_limits = fbh->fbh_bank.fbb_state;
 358 #endif // KERNEL
 359
 360         // now pre-allocate some chunks in the ring directly
 361 #ifdef KERNEL
 362         const uint16_t pre_allocated = FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT - 1;
 363 #else
 364         const uint16_t pre_allocated = FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT;
 365 #endif
 366
 367         fbh->fbh_bank.fbb_bitmap = (1U << (1 + pre_allocated)) - 1;
 368
 369         for (uint16_t i = 0; i < pre_allocated; i++) {
 370                 fbh->fbh_mem_ring[i] = i + 1;
 371         }
 372         fbh->fbh_bank.fbb_mem_flushed = pre_allocated;
 373         fbh->fbh_ring_mem_head = pre_allocated;
 374
 375
 376 #ifdef KERNEL
 377         // install the early boot page as the current one for persist
 378         fbh->fbh_stream[firehose_stream_persist].fbs_state.fss_current =
 379                         FIREHOSE_BUFFER_CHUNK_PREALLOCATED_COUNT;
 380         fbh->fbh_bank.fbb_state.fbs_io_bank += 1;
 381 #endif
 382
 383         fbh->fbh_ring_tail = (firehose_ring_tail_u){
 384                 .frp_mem_flushed = pre_allocated,
 385         };
 386         return fb;
 387 }
 388
 389 #ifndef KERNEL
 390 static void
 391 firehose_notify_source_invoke(mach_msg_header_t *hdr)
 392 {
 393         const size_t reply_size =
 394                         sizeof(union __ReplyUnion__firehose_client_firehoseReply_subsystem);
 395
 396         firehose_mig_server(firehoseReply_server, reply_size, hdr);
 397 }
 398
 399 static void
 400 firehose_client_register_for_notifications(firehose_buffer_t fb)
 401 {
 402         static const struct dispatch_continuation_s dc = {
 403                 .dc_func = (void *)firehose_notify_source_invoke,
 404         };
 405         firehose_buffer_header_t fbh = &fb->fb_header;
 406
 407         dispatch_once(&fbh->fbh_notifs_pred, ^{
 408                 dispatch_source_t ds = _dispatch_source_create_mach_msg_direct_recv(
 409                                 fbh->fbh_recvp, &dc);
 410                 dispatch_set_context(ds, fb);
 411                 dispatch_activate(ds);
 412                 fbh->fbh_notifs_source = ds;
 413         });
 414 }
 415
 416 static void
 417 firehose_client_send_push_async(firehose_buffer_t fb, qos_class_t qos,
 418                 bool for_io)
 419 {
 420         bool ask_for_notifs = fb->fb_header.fbh_notifs_source != NULL;
 421         mach_port_t sendp = fb->fb_header.fbh_sendp;
 422         kern_return_t kr = KERN_FAILURE;
 423
 424         if (!ask_for_notifs && _dispatch_is_multithreaded_inline()) {
 425                 firehose_client_register_for_notifications(fb);
 426                 ask_for_notifs = true;
 427         }
 428
 429         if (slowpath(sendp == MACH_PORT_DEAD)) {
 430                 return;
 431         }
 432
 433         if (fastpath(sendp)) {
 434                 kr = firehose_send_push_async(sendp, qos, for_io, ask_for_notifs);
 435                 if (likely(kr == KERN_SUCCESS)) {
 436                         return;
 437                 }
 438                 if (kr != MACH_SEND_INVALID_DEST) {
 439                         DISPATCH_VERIFY_MIG(kr);
 440                         dispatch_assume_zero(kr);
 441                 }
 442         }
 443
 444         sendp = firehose_client_reconnect(fb, sendp);
 445         if (fastpath(MACH_PORT_VALID(sendp))) {
 446                 kr = firehose_send_push_async(sendp, qos, for_io, ask_for_notifs);
 447                 if (likely(kr == KERN_SUCCESS)) {
 448                         return;
 449                 }
 450                 if (kr != MACH_SEND_INVALID_DEST) {
 451                         DISPATCH_VERIFY_MIG(kr);
 452                         dispatch_assume_zero(kr);
 453                 }
 454         }
 455 }
 456 #endif // !KERNEL
 457
 458 static void
 459 firehose_client_merge_updates(firehose_buffer_t fb, bool async_notif,
 460                 firehose_push_reply_t reply, firehose_bank_state_u *state_out)
 461 {
 462         firehose_bank_state_u state;
 463         firehose_ring_tail_u otail, ntail;
 464         uint64_t old_flushed_pos, bank_updates;
 465         uint16_t io_delta = 0;
 466         uint16_t mem_delta = 0;
 467
 468         if (firehose_atomic_maxv2o(&fb->fb_header, fbh_bank.fbb_mem_flushed,
 469                         reply.fpr_mem_flushed_pos, &old_flushed_pos, relaxed)) {
 470                 mem_delta = (uint16_t)(reply.fpr_mem_flushed_pos - old_flushed_pos);
 471         }
 472         if (firehose_atomic_maxv2o(&fb->fb_header, fbh_bank.fbb_io_flushed,
 473                         reply.fpr_io_flushed_pos, &old_flushed_pos, relaxed)) {
 474                 io_delta = (uint16_t)(reply.fpr_io_flushed_pos - old_flushed_pos);
 475         }
 476 #ifndef KERNEL
 477         _dispatch_debug("client side: mem: +%d->%llx, io: +%d->%llx",
 478                         mem_delta, reply.fpr_mem_flushed_pos,
 479                         io_delta, reply.fpr_io_flushed_pos);
 480 #endif
 481
 482         if (!mem_delta && !io_delta) {
 483                 if (state_out) {
 484                         state_out->fbs_atomic_state = os_atomic_load2o(&fb->fb_header,
 485                                         fbh_bank.fbb_state.fbs_atomic_state, relaxed);
 486                 }
 487                 return;
 488         }
 489
 490         bank_updates = ((uint64_t)mem_delta << FIREHOSE_BANK_SHIFT(0)) |
 491                         ((uint64_t)io_delta << FIREHOSE_BANK_SHIFT(1));
 492         state.fbs_atomic_state = os_atomic_sub2o(&fb->fb_header,
 493                         fbh_bank.fbb_state.fbs_atomic_state, bank_updates, relaxed);
 494         if (state_out) *state_out = state;
 495
 496         os_atomic_rmw_loop2o(&fb->fb_header, fbh_ring_tail.frp_atomic_tail,
 497                         otail.frp_atomic_tail, ntail.frp_atomic_tail, relaxed, {
 498                 ntail = otail;
 499                 // overflow handles the generation wraps
 500                 ntail.frp_io_flushed += io_delta;
 501                 ntail.frp_mem_flushed += mem_delta;
 502         });
 503         if (async_notif) {
 504                 if (io_delta) {
 505                         os_atomic_inc2o(&fb->fb_header, fbh_bank.fbb_io_notifs, relaxed);
 506                 }
 507                 if (mem_delta) {
 508                         os_atomic_inc2o(&fb->fb_header, fbh_bank.fbb_mem_notifs, relaxed);
 509                 }
 510         }
 511 }
 512
 513 #ifndef KERNEL
 514 static void
 515 firehose_client_send_push(firehose_buffer_t fb, bool for_io,
 516                 firehose_bank_state_u *state_out)
 517 {
 518         mach_port_t sendp = fb->fb_header.fbh_sendp;
 519         firehose_push_reply_t push_reply = { };
 520         qos_class_t qos = qos_class_self();
 521         kern_return_t kr;
 522
 523         if (slowpath(sendp == MACH_PORT_DEAD)) {
 524                 return;
 525         }
 526         if (fastpath(sendp)) {
 527                 kr = firehose_send_push(sendp, qos, for_io, &push_reply);
 528                 if (likely(kr == KERN_SUCCESS)) {
 529                         goto success;
 530                 }
 531                 if (kr != MACH_SEND_INVALID_DEST) {
 532                         DISPATCH_VERIFY_MIG(kr);
 533                         dispatch_assume_zero(kr);
 534                 }
 535         }
 536
 537         sendp = firehose_client_reconnect(fb, sendp);
 538         if (fastpath(MACH_PORT_VALID(sendp))) {
 539                 kr = firehose_send_push(sendp, qos, for_io, &push_reply);
 540                 if (likely(kr == KERN_SUCCESS)) {
 541                         goto success;
 542                 }
 543                 if (kr != MACH_SEND_INVALID_DEST) {
 544                         DISPATCH_VERIFY_MIG(kr);
 545                         dispatch_assume_zero(kr);
 546                 }
 547         }
 548
 549         if (state_out) {
 550                 state_out->fbs_atomic_state = os_atomic_load2o(&fb->fb_header,
 551                                 fbh_bank.fbb_state.fbs_atomic_state, relaxed);
 552         }
 553         return;
 554
 555 success:
 556         if (memcmp(&push_reply, &FIREHOSE_PUSH_REPLY_CORRUPTED,
 557                         sizeof(push_reply)) == 0) {
 558                 // TODO: find out the actual cause and log it
 559                 DISPATCH_CLIENT_CRASH(0, "Memory corruption in the logging buffers");
 560         }
 561
 562         if (for_io) {
 563                 os_atomic_inc2o(&fb->fb_header, fbh_bank.fbb_io_sync_pushes, relaxed);
 564         } else {
 565                 os_atomic_inc2o(&fb->fb_header, fbh_bank.fbb_mem_sync_pushes, relaxed);
 566         }
 567         // TODO <rdar://problem/22963876>
 568         //
 569         // use fbb_*_flushes and fbb_*_sync_pushes to decide to dynamically
 570         // allow using more buffers, if not under memory pressure.
 571         //
 572         // There only is a point for multithreaded clients if:
 573         // - enough samples (total_flushes above some limits)
 574         // - the ratio is really bad (a push per cycle is definitely a problem)
 575         return firehose_client_merge_updates(fb, false, push_reply, state_out);
 576 }
 577
 578 kern_return_t
 579 firehose_client_push_reply(mach_port_t req_port OS_UNUSED,
 580         kern_return_t rtc, firehose_push_reply_t push_reply OS_UNUSED)
 581 {
 582         DISPATCH_INTERNAL_CRASH(rtc, "firehose_push_reply should never be sent "
 583                         "to the buffer receive port");
 584 }
 585
 586 kern_return_t
 587 firehose_client_push_notify_async(mach_port_t server_port OS_UNUSED,
 588         firehose_push_reply_t push_reply)
 589 {
 590         // see _dispatch_source_merge_mach_msg_direct
 591         dispatch_queue_t dq = _dispatch_queue_get_current();
 592         firehose_buffer_t fb = dispatch_get_context(dq);
 593         firehose_client_merge_updates(fb, true, push_reply, NULL);
 594         return KERN_SUCCESS;
 595 }
 596
 597 #endif // !KERNEL
 598 #pragma mark -
 599 #pragma mark Buffer handling
 600
 601 #ifndef KERNEL
 602 void
 603 firehose_buffer_update_limits(firehose_buffer_t fb)
 604 {
 605         dispatch_unfair_lock_t fbb_lock = &fb->fb_header.fbh_bank.fbb_lock;
 606         _dispatch_unfair_lock_lock(fbb_lock);
 607         firehose_buffer_update_limits_unlocked(fb);
 608         _dispatch_unfair_lock_unlock(fbb_lock);
 609 }
 610 #endif // !KERNEL
 611
 612 OS_ALWAYS_INLINE
 613 static inline firehose_tracepoint_t
 614 firehose_buffer_chunk_init(firehose_buffer_chunk_t fbc,
 615                 firehose_tracepoint_query_t ask, uint8_t **privptr)
 616 {
 617         const uint16_t ft_size = offsetof(struct firehose_tracepoint_s, ft_data);
 618
 619         uint16_t pub_offs = offsetof(struct firehose_buffer_chunk_s, fbc_data);
 620         uint16_t priv_offs = FIREHOSE_BUFFER_CHUNK_SIZE;
 621
 622         pub_offs += roundup(ft_size + ask->pubsize, 8);
 623         priv_offs -= ask->privsize;
 624
 625         if (fbc->fbc_pos.fbc_atomic_pos) {
 626                 // Needed for process death handling (recycle-reuse):
 627                 // No atomic fences required, we merely want to make sure the observers
 628                 // will see memory effects in program (asm) order.
 629                 // 1. the payload part of the chunk is cleared completely
 630                 // 2. the chunk is marked as reused
 631                 // This ensures that if we don't see a reference to a chunk in the ring
 632                 // and it is dirty, when crawling the chunk, we don't see remnants of
 633                 // other tracepoints
 634                 //
 635                 // We only do that when the fbc_pos is non zero, because zero means
 636                 // we just faulted the chunk, and the kernel already bzero-ed it.
 637                 bzero(fbc->fbc_data, sizeof(fbc->fbc_data));
 638         }
 639         dispatch_compiler_barrier();
 640         // <rdar://problem/23562733> boot starts mach absolute time at 0, and
 641         // wrapping around to values above UINT64_MAX - FIREHOSE_STAMP_SLOP
 642         // breaks firehose_buffer_stream_flush() assumptions
 643         if (ask->stamp > FIREHOSE_STAMP_SLOP) {
 644                 fbc->fbc_timestamp = ask->stamp - FIREHOSE_STAMP_SLOP;
 645         } else {
 646                 fbc->fbc_timestamp = 0;
 647         }
 648         fbc->fbc_pos = (firehose_buffer_pos_u){
 649                 .fbc_next_entry_offs = pub_offs,
 650                 .fbc_private_offs = priv_offs,
 651                 .fbc_refcnt = 1,
 652                 .fbc_qos_bits = firehose_buffer_qos_bits_propagate(),
 653                 .fbc_stream = ask->stream,
 654                 .fbc_flag_io = ask->for_io,
 655         };
 656
 657         if (privptr) {
 658                 *privptr = fbc->fbc_start + priv_offs;
 659         }
 660         return (firehose_tracepoint_t)fbc->fbc_data;
 661 }
 662
 663 OS_NOINLINE
 664 static firehose_tracepoint_t
 665 firehose_buffer_stream_chunk_install(firehose_buffer_t fb,
 666                 firehose_tracepoint_query_t ask, uint8_t **privptr, uint16_t ref)
 667 {
 668         firehose_stream_state_u state, new_state;
 669         firehose_tracepoint_t ft;
 670         firehose_buffer_stream_t fbs = &fb->fb_header.fbh_stream[ask->stream];
 671         uint64_t stamp_and_len;
 672
 673         if (fastpath(ref)) {
 674                 firehose_buffer_chunk_t fbc = firehose_buffer_ref_to_chunk(fb, ref);
 675                 ft = firehose_buffer_chunk_init(fbc, ask, privptr);
 676                 // Needed for process death handling (tracepoint-begin):
 677                 // write the length before making the chunk visible
 678                 stamp_and_len  = ask->stamp - fbc->fbc_timestamp;
 679                 stamp_and_len |= (uint64_t)ask->pubsize << 48;
 680                 os_atomic_store2o(ft, ft_stamp_and_length, stamp_and_len, relaxed);
 681
 682                 if (ask->stream == firehose_stream_metadata) {
 683                         os_atomic_or2o(fb, fb_header.fbh_bank.fbb_metadata_bitmap,
 684                                         1ULL << ref, relaxed);
 685                 }
 686                 // release barrier to make the chunk init visible
 687                 os_atomic_rmw_loop2o(fbs, fbs_state.fss_atomic_state,
 688                                 state.fss_atomic_state, new_state.fss_atomic_state, release, {
 689                         // We use a generation counter to prevent a theoretical ABA problem:
 690                         // a thread could try to acquire a tracepoint in a chunk, fail to
 691                         // do so mark it as to be pushed, enqueue it, and then be preempted
 692                         //
 693                         // It sleeps for a long time, and then tries to acquire the
 694                         // allocator bit and uninstalling the chunk. Succeeds in doing so,
 695                         // but because the chunk actually happened to have cycled all the
 696                         // way back to being installed. That thread would effectively hide
 697                         // that unflushed chunk and leak it.
 698                         //
 699                         // Having a generation counter prevents the uninstallation of the
 700                         // chunk to spuriously succeed when it was a re-incarnation of it.
 701                         new_state = (firehose_stream_state_u){
 702                                 .fss_current = ref,
 703                                 .fss_generation = state.fss_generation + 1,
 704                         };
 705                 });
 706         } else {
 707                 // the allocator gave up just clear the allocator + waiter bits
 708                 firehose_stream_state_u mask = { .fss_allocator = ~0u, };
 709                 state.fss_atomic_state = os_atomic_and_orig2o(fbs,
 710                                 fbs_state.fss_atomic_state, ~mask.fss_atomic_state, relaxed);
 711                 ft = NULL;
 712         }
 713
 714 #ifndef KERNEL
 715         if (unlikely(state.fss_gate.dgl_lock != _dispatch_tid_self())) {
 716                 _dispatch_gate_broadcast_slow(&fbs->fbs_state.fss_gate,
 717                                 state.fss_gate.dgl_lock);
 718         }
 719
 720         if (unlikely(state.fss_current == FIREHOSE_STREAM_STATE_PRISTINE)) {
 721                 firehose_buffer_update_limits(fb);
 722         }
 723 #endif // KERNEL
 724
 725         // pairs with the one in firehose_buffer_tracepoint_reserve()
 726         __firehose_critical_region_leave();
 727         return ft;
 728 }
 729
 730 #ifndef KERNEL
 731 OS_ALWAYS_INLINE
 732 static inline uint16_t
 733 firehose_buffer_ring_try_grow(firehose_buffer_bank_t fbb, uint16_t limit)
 734 {
 735         uint16_t ref = 0;
 736         uint64_t bitmap;
 737
 738         _dispatch_unfair_lock_lock(&fbb->fbb_lock);
 739         bitmap = ~(fbb->fbb_bitmap | (~0ULL << limit));
 740         if (bitmap) {
 741                 ref = firehose_bitmap_first_set(bitmap);
 742                 fbb->fbb_bitmap |= 1U << ref;
 743         }
 744         _dispatch_unfair_lock_unlock(&fbb->fbb_lock);
 745         return ref;
 746 }
 747
 748 OS_ALWAYS_INLINE
 749 static inline uint16_t
 750 firehose_buffer_ring_shrink(firehose_buffer_t fb, uint16_t ref)
 751 {
 752         const size_t madv_size =
 753                         FIREHOSE_BUFFER_CHUNK_SIZE * FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT;
 754         const size_t madv_mask =
 755                         (1ULL << FIREHOSE_BUFFER_MADVISE_CHUNK_COUNT) - 1;
 756
 757         dispatch_unfair_lock_t fbb_lock = &fb->fb_header.fbh_bank.fbb_lock;
 758         uint64_t bitmap;
 759
 760         _dispatch_unfair_lock_lock(fbb_lock);
 761         if (ref < fb->fb_header.fbh_bank.fbb_limits.fbs_max_ref) {
 762                 goto done;
 763         }
 764
 765         bitmap = (fb->fb_header.fbh_bank.fbb_bitmap &= ~(1UL << ref));
 766         ref &= ~madv_mask;
 767         if ((bitmap & (madv_mask << ref)) == 0) {
 768                 // if MADVISE_WIDTH consecutive chunks are free, madvise them free
 769                 madvise(firehose_buffer_ref_to_chunk(fb, ref), madv_size, MADV_FREE);
 770         }
 771         ref = 0;
 772 done:
 773         _dispatch_unfair_lock_unlock(fbb_lock);
 774         return ref;
 775 }
 776 #endif // !KERNEL
 777
 778 OS_NOINLINE
 779 void
 780 firehose_buffer_ring_enqueue(firehose_buffer_t fb, uint16_t ref)
 781 {
 782         firehose_buffer_chunk_t fbc = firehose_buffer_ref_to_chunk(fb, ref);
 783         uint16_t volatile *fbh_ring;
 784         uint16_t volatile *fbh_ring_head;
 785         uint16_t head, gen, dummy, idx;
 786         firehose_buffer_pos_u fbc_pos = fbc->fbc_pos;
 787         bool for_io = fbc_pos.fbc_flag_io;
 788
 789         if (for_io) {
 790                 fbh_ring = fb->fb_header.fbh_io_ring;
 791                 fbh_ring_head = &fb->fb_header.fbh_ring_io_head;
 792         } else {
 793                 fbh_ring = fb->fb_header.fbh_mem_ring;
 794                 fbh_ring_head = &fb->fb_header.fbh_ring_mem_head;
 795         }
 796
 797 #ifdef KERNEL
 798         // The algorithm in the kernel is simpler:
 799         //  1. reserve a write position for the head
 800         //  2. store the new reference at that position
 801         // Enqueuers can't starve each other that way.
 802         //
 803         // However, the dequeuers now have to sometimes wait for the value written
 804         // in the ring to appear and have to spin, which is okay since the kernel
 805         // disables preemption around these two consecutive atomic operations.
 806         // See firehose_client_drain.
 807         __firehose_critical_region_enter();
 808         head = os_atomic_inc_orig(fbh_ring_head, relaxed);
 809         gen = head & FIREHOSE_RING_POS_GEN_MASK;
 810         idx = head & FIREHOSE_RING_POS_IDX_MASK;
 811
 812         while (unlikely(!os_atomic_cmpxchgvw(&fbh_ring[idx], gen, gen | ref, &dummy,
 813                         relaxed))) {
 814                 // can only ever happen if a recycler is slow, this requires having
 815                 // enough cores (>5 for I/O e.g.)
 816                 _dispatch_wait_until(fbh_ring[idx] == gen);
 817         }
 818         __firehose_critical_region_leave();
 819         __firehose_buffer_push_to_logd(fb, for_io);
 820 #else
 821         // The algorithm is:
 822         //   1. read the head position
 823         //   2. cmpxchg head.gen with the (head.gen | ref) at head.idx
 824         //   3. if it fails wait until either the head cursor moves,
 825         //      or the cell becomes free
 826         //
 827         // The most likely stall at (3) is because another enqueuer raced us
 828         // and made the cell non empty.
 829         //
 830         // The alternative is to reserve the enqueue slot with an atomic inc.
 831         // Then write the ref into the ring. This would be much simpler as the
 832         // generation packing wouldn't be required (though setting the ring cell
 833         // would still need a cmpxchg loop to avoid clobbering values of slow
 834         // dequeuers)
 835         //
 836         // But then that means that flushers (logd) could be starved until that
 837         // finishes, and logd cannot be held forever (that could even be a logd
 838         // DoS from malicious programs). Meaning that logd would stop draining
 839         // buffer queues when encountering that issue, leading the program to be
 840         // stuck in firehose_client_push() apparently waiting on logd, while
 841         // really it's waiting on itself. It's better for the scheduler if we
 842         // make it clear that we're waiting on ourselves!
 843
 844         head = os_atomic_load(fbh_ring_head, relaxed);
 845         for (;;) {
 846                 gen = head & FIREHOSE_RING_POS_GEN_MASK;
 847                 idx = head & FIREHOSE_RING_POS_IDX_MASK;
 848
 849                 // a thread being preempted here for GEN_MASK worth of ring rotations,
 850                 // it could lead to the cmpxchg succeed, and have a bogus enqueue
 851                 // (confused enqueuer)
 852                 if (fastpath(os_atomic_cmpxchgvw(&fbh_ring[idx], gen, gen | ref, &dummy,
 853                                 relaxed))) {
 854                         if (fastpath(os_atomic_cmpxchgv(fbh_ring_head, head, head + 1,
 855                                         &head, release))) {
 856                                 __firehose_critical_region_leave();
 857                                 break;
 858                         }
 859                         // this thread is a confused enqueuer, need to undo enqueue
 860                         os_atomic_store(&fbh_ring[idx], gen, relaxed);
 861                         continue;
 862                 }
 863
 864                 _dispatch_wait_until(({
 865                         // wait until either the head moves (another enqueuer is done)
 866                         // or (not very likely) a recycler is very slow
 867                         // or (very unlikely) the confused thread undoes its enqueue
 868                         uint16_t old_head = head;
 869                         head = *fbh_ring_head;
 870                         head != old_head || fbh_ring[idx] == gen;
 871                 }));
 872         }
 873
 874         pthread_priority_t pp = fbc_pos.fbc_qos_bits;
 875         pp <<= _PTHREAD_PRIORITY_QOS_CLASS_SHIFT;
 876         firehose_client_send_push_async(fb, _pthread_qos_class_decode(pp, NULL, NULL),
 877                         for_io);
 878 #endif
 879 }
 880
 881 OS_ALWAYS_INLINE
 882 static inline uint16_t
 883 firehose_buffer_ring_try_recycle(firehose_buffer_t fb)
 884 {
 885         firehose_ring_tail_u pos, old;
 886         uint16_t volatile *fbh_ring;
 887         uint16_t gen, ref, entry, tail;
 888         firehose_buffer_chunk_t fbc;
 889         bool for_io;
 890
 891         os_atomic_rmw_loop2o(&fb->fb_header, fbh_ring_tail.frp_atomic_tail,
 892                         old.frp_atomic_tail, pos.frp_atomic_tail, relaxed, {
 893                 pos = old;
 894                 if (fastpath(old.frp_mem_tail != old.frp_mem_flushed)) {
 895                         pos.frp_mem_tail++;
 896                 } else if (fastpath(old.frp_io_tail != old.frp_io_flushed)) {
 897                         pos.frp_io_tail++;
 898                 } else {
 899                         os_atomic_rmw_loop_give_up(return 0);
 900                 }
 901         });
 902
 903         // there's virtually no chance that the lack of acquire barrier above
 904         // lets us read a value from the ring so stale that it's still an Empty
 905         // marker. For correctness purposes have a cheap loop that should never
 906         // really loop, instead of an acquire barrier in the cmpxchg above.
 907         for_io = (pos.frp_io_tail != old.frp_io_tail);
 908         if (for_io) {
 909                 fbh_ring = fb->fb_header.fbh_io_ring;
 910                 tail = old.frp_io_tail & FIREHOSE_RING_POS_IDX_MASK;
 911         } else {
 912                 fbh_ring = fb->fb_header.fbh_mem_ring;
 913                 tail = old.frp_mem_tail & FIREHOSE_RING_POS_IDX_MASK;
 914         }
 915         _dispatch_wait_until((entry = fbh_ring[tail]) & FIREHOSE_RING_POS_IDX_MASK);
 916
 917         // Needed for process death handling (recycle-dequeue):
 918         // No atomic fences required, we merely want to make sure the observers
 919         // will see memory effects in program (asm) order.
 920         // 1. the chunk is marked as "void&full" (clobbering the pos with FULL_BIT)
 921         // 2. then we remove any reference to the chunk from the ring
 922         // This ensures that if we don't see a reference to a chunk in the ring
 923         // and it is dirty, it is a chunk being written to that needs a flush
 924         gen = (entry & FIREHOSE_RING_POS_GEN_MASK) + FIREHOSE_RING_POS_GEN_INC;
 925         ref = entry & FIREHOSE_RING_POS_IDX_MASK;
 926         fbc = firehose_buffer_ref_to_chunk(fb, ref);
 927
 928         if (!for_io && fbc->fbc_pos.fbc_stream == firehose_stream_metadata) {
 929                 os_atomic_and2o(fb, fb_header.fbh_bank.fbb_metadata_bitmap,
 930                                 ~(1ULL << ref), relaxed);
 931         }
 932         os_atomic_store2o(fbc, fbc_pos.fbc_atomic_pos,
 933                         FIREHOSE_BUFFER_POS_FULL_BIT, relaxed);
 934         dispatch_compiler_barrier();
 935         os_atomic_store(&fbh_ring[tail], gen | 0, relaxed);
 936         return ref;
 937 }
 938
 939 #ifndef KERNEL
 940 OS_NOINLINE
 941 static firehose_tracepoint_t
 942 firehose_buffer_tracepoint_reserve_slow2(firehose_buffer_t fb,
 943                 firehose_tracepoint_query_t ask, uint8_t **privptr, uint16_t ref)
 944 {
 945         const uint64_t bank_unavail_mask = FIREHOSE_BANK_UNAVAIL_MASK(ask->for_io);
 946         firehose_buffer_bank_t const fbb = &fb->fb_header.fbh_bank;
 947         firehose_bank_state_u state;
 948         uint16_t fbs_max_ref;
 949
 950         // first wait for our bank to have space, if needed
 951         if (!fastpath(ask->is_bank_ok)) {
 952                 state.fbs_atomic_state =
 953                                 os_atomic_load2o(fbb, fbb_state.fbs_atomic_state, relaxed);
 954                 while (state.fbs_atomic_state & bank_unavail_mask) {
 955                         firehose_client_send_push(fb, ask->for_io, &state);
 956                         if (slowpath(fb->fb_header.fbh_sendp == MACH_PORT_DEAD)) {
 957                                 // logd was unloaded, give up
 958                                 return NULL;
 959                         }
 960                 }
 961                 ask->is_bank_ok = true;
 962                 fbs_max_ref = state.fbs_max_ref;
 963         } else {
 964                 fbs_max_ref = fbb->fbb_state.fbs_max_ref;
 965         }
 966
 967         // second, if we were passed a chunk, we may need to shrink
 968         if (slowpath(ref)) {
 969                 goto try_shrink;
 970         }
 971
 972         // third, wait for a chunk to come up, and if not, wait on the daemon
 973         for (;;) {
 974                 if (fastpath(ref = firehose_buffer_ring_try_recycle(fb))) {
 975                 try_shrink:
 976                         if (slowpath(ref >= fbs_max_ref)) {
 977                                 ref = firehose_buffer_ring_shrink(fb, ref);
 978                                 if (!ref) {
 979                                         continue;
 980                                 }
 981                         }
 982                         break;
 983                 }
 984                 if (fastpath(ref = firehose_buffer_ring_try_grow(fbb, fbs_max_ref))) {
 985                         break;
 986                 }
 987                 firehose_client_send_push(fb, ask->for_io, NULL);
 988                 if (slowpath(fb->fb_header.fbh_sendp == MACH_PORT_DEAD)) {
 989                         // logd was unloaded, give up
 990                         break;
 991                 }
 992         }
 993
 994         return firehose_buffer_stream_chunk_install(fb, ask, privptr, ref);
 995 }
 996 #else
 997 static inline dispatch_lock
 998 _dispatch_gate_lock_load_seq_cst(dispatch_gate_t l)
 999 {
1000         return os_atomic_load(&l->dgl_lock, seq_cst);
1001 }
1002 OS_NOINLINE
1003 static void
1004 _dispatch_gate_wait(dispatch_gate_t l, uint32_t flags)
1005 {
1006         (void)flags;
1007         _dispatch_wait_until(_dispatch_gate_lock_load_seq_cst(l) == 0);
1008 }
1009 #endif // KERNEL
1010
1011 firehose_tracepoint_t
1012 firehose_buffer_tracepoint_reserve_slow(firehose_buffer_t fb,
1013                 firehose_tracepoint_query_t ask, uint8_t **privptr)
1014 {
1015         const unsigned for_io = ask->for_io;
1016         const firehose_buffer_bank_t fbb = &fb->fb_header.fbh_bank;
1017         firehose_bank_state_u state;
1018         uint16_t ref = 0;
1019
1020         uint64_t unavail_mask = FIREHOSE_BANK_UNAVAIL_MASK(for_io);
1021 #ifndef KERNEL
1022         state.fbs_atomic_state = os_atomic_add_orig2o(fbb,
1023                         fbb_state.fbs_atomic_state, FIREHOSE_BANK_INC(for_io), relaxed);
1024         if (fastpath(!(state.fbs_atomic_state & unavail_mask))) {
1025                 ask->is_bank_ok = true;
1026                 if (fastpath(ref = firehose_buffer_ring_try_recycle(fb))) {
1027                         if (fastpath(ref < state.fbs_max_ref)) {
1028                                 return firehose_buffer_stream_chunk_install(fb, ask,
1029                                                 privptr, ref);
1030                         }
1031                 }
1032         }
1033         return firehose_buffer_tracepoint_reserve_slow2(fb, ask, privptr, ref);
1034 #else
1035         firehose_bank_state_u value;
1036         ask->is_bank_ok = os_atomic_rmw_loop2o(fbb, fbb_state.fbs_atomic_state,
1037                         state.fbs_atomic_state, value.fbs_atomic_state, relaxed, {
1038                 value = state;
1039                 if (slowpath((value.fbs_atomic_state & unavail_mask) != 0)) {
1040                         os_atomic_rmw_loop_give_up(break);
1041                 }
1042                 value.fbs_atomic_state += FIREHOSE_BANK_INC(for_io);
1043         });
1044         if (ask->is_bank_ok) {
1045                 ref = firehose_buffer_ring_try_recycle(fb);
1046                 if (slowpath(ref == 0)) {
1047                         // the kernel has no overlap between I/O and memory chunks,
1048                         // having an available bank slot means we should be able to recycle
1049                         DISPATCH_INTERNAL_CRASH(0, "Unable to recycle a chunk");
1050                 }
1051         }
1052         // rdar://25137005 installing `0` unlocks the allocator
1053         return firehose_buffer_stream_chunk_install(fb, ask, privptr, ref);
1054 #endif // KERNEL
1055 }
1056
1057 #ifdef KERNEL
1058 firehose_tracepoint_t
1059 __firehose_buffer_tracepoint_reserve(uint64_t stamp, firehose_stream_t stream,
1060                 uint16_t pubsize, uint16_t privsize, uint8_t **privptr)
1061 {
1062         firehose_buffer_t fb = kernel_firehose_buffer;
1063         if (!fastpath(fb)) {
1064                 return NULL;
1065         }
1066         return firehose_buffer_tracepoint_reserve(fb, stamp, stream, pubsize,
1067                         privsize, privptr);
1068 }
1069
1070 firehose_tracepoint_t
1071 __firehose_buffer_tracepoint_reserve_with_chunk(firehose_buffer_chunk_t fbc,
1072                 uint64_t stamp, firehose_stream_t stream,
1073                 uint16_t pubsize, uint16_t privsize, uint8_t **privptr)
1074 {
1075
1076         firehose_tracepoint_t ft;
1077         long result;
1078
1079         result = firehose_buffer_chunk_try_reserve(fbc, stamp, stream,
1080                           pubsize, privsize, privptr);
1081         if (fastpath(result > 0)) {
1082                 ft = (firehose_tracepoint_t)(fbc->fbc_start + result);
1083                 stamp -= fbc->fbc_timestamp;
1084                 stamp |= (uint64_t)pubsize << 48;
1085                 // Needed for process death handling (tracepoint-begin)
1086                 // see firehose_buffer_stream_chunk_install
1087                 os_atomic_store2o(ft, ft_stamp_and_length, stamp, relaxed);
1088                 dispatch_compiler_barrier();
1089                 return ft;
1090         }
1091         else {
1092                 return NULL;
1093         }
1094 }
1095
1096 firehose_buffer_t
1097 __firehose_buffer_create(size_t *size)
1098 {
1099         if (!kernel_firehose_buffer) {
1100                 kernel_firehose_buffer = firehose_buffer_create(MACH_PORT_NULL, 0, 0);
1101         }
1102
1103         if (size) {
1104                 *size = FIREHOSE_BUFFER_KERNEL_CHUNK_COUNT * FIREHOSE_BUFFER_CHUNK_SIZE;
1105         }
1106         return kernel_firehose_buffer;
1107 }
1108
1109 void
1110 __firehose_buffer_tracepoint_flush(firehose_tracepoint_t ft,
1111                 firehose_tracepoint_id_u ftid)
1112 {
1113         return firehose_buffer_tracepoint_flush(kernel_firehose_buffer, ft, ftid);
1114 }
1115
1116 void
1117 __firehose_buffer_tracepoint_flush_chunk(firehose_buffer_chunk_t fbc,
1118                 firehose_tracepoint_t ft, firehose_tracepoint_id_u ftid)
1119 {
1120         firehose_buffer_pos_u pos;
1121
1122         // Needed for process death handling (tracepoint-flush):
1123         // We want to make sure the observers
1124         // will see memory effects in program (asm) order.
1125         // 1. write all the data to the tracepoint
1126         // 2. write the tracepoint ID, so that seeing it means the tracepoint
1127         //    is valid
1128         ft->ft_thread = thread_tid(current_thread());
1129
1130         // release barrier makes the log writes visible
1131         os_atomic_store2o(ft, ft_id.ftid_value, ftid.ftid_value, release);
1132         pos.fbc_atomic_pos = os_atomic_sub2o(fbc, fbc_pos.fbc_atomic_pos,
1133                         FIREHOSE_BUFFER_POS_REFCNT_INC, relaxed);
1134         return;
1135 }
1136
1137 void
1138 __firehose_merge_updates(firehose_push_reply_t update)
1139 {
1140         firehose_buffer_t fb = kernel_firehose_buffer;
1141         if (fastpath(fb)) {
1142                 firehose_client_merge_updates(fb, true, update, NULL);
1143         }
1144 }
1145 #endif // KERNEL
1146
1147 #endif // OS_FIREHOSE_SPI