osfmk/kern/thread_call.c

   1 /*
   2  * Copyright (c) 1993-1995, 1999-2008 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <mach/mach_types.h>
  30 #include <mach/thread_act.h>
  31
  32 #include <kern/kern_types.h>
  33 #include <kern/zalloc.h>
  34 #include <kern/sched_prim.h>
  35 #include <kern/clock.h>
  36 #include <kern/task.h>
  37 #include <kern/thread.h>
  38 #include <kern/waitq.h>
  39 #include <kern/ledger.h>
  40 #include <kern/policy_internal.h>
  41
  42 #include <vm/vm_pageout.h>
  43
  44 #include <kern/thread_call.h>
  45 #include <kern/call_entry.h>
  46 #include <kern/timer_call.h>
  47
  48 #include <libkern/OSAtomic.h>
  49 #include <kern/timer_queue.h>
  50
  51 #include <sys/kdebug.h>
  52 #if CONFIG_DTRACE
  53 #include <mach/sdt.h>
  54 #endif
  55 #include <machine/machine_routines.h>
  56
  57 static zone_t                   thread_call_zone;
  58 static struct waitq             daemon_waitq;
  59
  60 typedef enum {
  61         TCF_ABSOLUTE    = 0,
  62         TCF_CONTINUOUS  = 1,
  63         TCF_COUNT       = 2,
  64 } thread_call_flavor_t;
  65
  66 typedef enum {
  67         TCG_NONE                = 0x0,
  68         TCG_PARALLEL            = 0x1,
  69         TCG_DEALLOC_ACTIVE      = 0x2,
  70 } thread_call_group_flags_t;
  71
  72 static struct thread_call_group {
  73         const char *            tcg_name;
  74
  75         queue_head_t            pending_queue;
  76         uint32_t                pending_count;
  77
  78         queue_head_t            delayed_queues[TCF_COUNT];
  79         timer_call_data_t       delayed_timers[TCF_COUNT];
  80
  81         timer_call_data_t       dealloc_timer;
  82
  83         struct waitq            idle_waitq;
  84         uint32_t                idle_count, active_count, blocked_count;
  85
  86         uint32_t                tcg_thread_pri;
  87         uint32_t                target_thread_count;
  88         uint64_t                idle_timestamp;
  89
  90         thread_call_group_flags_t flags;
  91
  92 } thread_call_groups[THREAD_CALL_INDEX_MAX] = {
  93         [THREAD_CALL_INDEX_HIGH] = {
  94                 .tcg_name               = "high",
  95                 .tcg_thread_pri         = BASEPRI_PREEMPT_HIGH,
  96                 .target_thread_count    = 4,
  97                 .flags                  = TCG_NONE,
  98         },
  99         [THREAD_CALL_INDEX_KERNEL] = {
 100                 .tcg_name               = "kernel",
 101                 .tcg_thread_pri         = BASEPRI_KERNEL,
 102                 .target_thread_count    = 1,
 103                 .flags                  = TCG_PARALLEL,
 104         },
 105         [THREAD_CALL_INDEX_USER] = {
 106                 .tcg_name               = "user",
 107                 .tcg_thread_pri         = BASEPRI_DEFAULT,
 108                 .target_thread_count    = 1,
 109                 .flags                  = TCG_PARALLEL,
 110         },
 111         [THREAD_CALL_INDEX_LOW] = {
 112                 .tcg_name               = "low",
 113                 .tcg_thread_pri         = MAXPRI_THROTTLE,
 114                 .target_thread_count    = 1,
 115                 .flags                  = TCG_PARALLEL,
 116         },
 117         [THREAD_CALL_INDEX_KERNEL_HIGH] = {
 118                 .tcg_name               = "kernel-high",
 119                 .tcg_thread_pri         = BASEPRI_PREEMPT,
 120                 .target_thread_count    = 2,
 121                 .flags                  = TCG_NONE,
 122         },
 123         [THREAD_CALL_INDEX_QOS_UI] = {
 124                 .tcg_name               = "qos-ui",
 125                 .tcg_thread_pri         = BASEPRI_FOREGROUND,
 126                 .target_thread_count    = 1,
 127                 .flags                  = TCG_NONE,
 128         },
 129         [THREAD_CALL_INDEX_QOS_IN] = {
 130                 .tcg_name               = "qos-in",
 131                 .tcg_thread_pri         = BASEPRI_USER_INITIATED,
 132                 .target_thread_count    = 1,
 133                 .flags                  = TCG_NONE,
 134         },
 135         [THREAD_CALL_INDEX_QOS_UT] = {
 136                 .tcg_name               = "qos-ut",
 137                 .tcg_thread_pri         = BASEPRI_UTILITY,
 138                 .target_thread_count    = 1,
 139                 .flags                  = TCG_NONE,
 140         },
 141 };
 142
 143 typedef struct thread_call_group        *thread_call_group_t;
 144
 145 #define INTERNAL_CALL_COUNT             768
 146 #define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * NSEC_PER_MSEC) /* 5 ms */
 147 #define THREAD_CALL_ADD_RATIO           4
 148 #define THREAD_CALL_MACH_FACTOR_CAP     3
 149 #define THREAD_CALL_GROUP_MAX_THREADS   500
 150
 151 static boolean_t                thread_call_daemon_awake;
 152 static thread_call_data_t       internal_call_storage[INTERNAL_CALL_COUNT];
 153 static queue_head_t             thread_call_internal_queue;
 154 int                                             thread_call_internal_queue_count = 0;
 155 static uint64_t                 thread_call_dealloc_interval_abs;
 156
 157 static __inline__ thread_call_t _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0);
 158 static __inline__ void          _internal_call_release(thread_call_t call);
 159 static __inline__ boolean_t     _pending_call_enqueue(thread_call_t call, thread_call_group_t group);
 160 static boolean_t                _delayed_call_enqueue(thread_call_t call, thread_call_group_t group,
 161                                                       uint64_t deadline, thread_call_flavor_t flavor);
 162 static __inline__ boolean_t     _call_dequeue(thread_call_t call, thread_call_group_t group);
 163 static __inline__ void          thread_call_wake(thread_call_group_t group);
 164 static void                     thread_call_daemon(void *arg);
 165 static void                     thread_call_thread(thread_call_group_t group, wait_result_t wres);
 166 static void                     thread_call_dealloc_timer(timer_call_param_t p0, timer_call_param_t p1);
 167 static void                     thread_call_group_setup(thread_call_group_t group);
 168 static void                     sched_call_thread(int type, thread_t thread);
 169 static void                     thread_call_start_deallocate_timer(thread_call_group_t group);
 170 static void                     thread_call_wait_locked(thread_call_t call, spl_t s);
 171 static boolean_t                thread_call_wait_once_locked(thread_call_t call, spl_t s);
 172
 173 static boolean_t                thread_call_enter_delayed_internal(thread_call_t call,
 174                                                 thread_call_func_t alt_func, thread_call_param_t alt_param0,
 175                                                 thread_call_param_t param1, uint64_t deadline,
 176                                                 uint64_t leeway, unsigned int flags);
 177
 178 /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
 179 extern void thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1);
 180
 181 lck_grp_t               thread_call_lck_grp;
 182 lck_mtx_t               thread_call_lock_data;
 183
 184 #define thread_call_lock_spin()                 \
 185         lck_mtx_lock_spin_always(&thread_call_lock_data)
 186
 187 #define thread_call_unlock()                    \
 188         lck_mtx_unlock_always(&thread_call_lock_data)
 189
 190 #define tc_deadline tc_call.deadline
 191
 192 extern boolean_t        mach_timer_coalescing_enabled;
 193
 194 static inline spl_t
 195 disable_ints_and_lock(void)
 196 {
 197         spl_t s = splsched();
 198         thread_call_lock_spin();
 199
 200         return s;
 201 }
 202
 203 static inline void
 204 enable_ints_and_unlock(spl_t s)
 205 {
 206         thread_call_unlock();
 207         splx(s);
 208 }
 209
 210 static inline boolean_t
 211 group_isparallel(thread_call_group_t group)
 212 {
 213         return ((group->flags & TCG_PARALLEL) != 0);
 214 }
 215
 216 static boolean_t
 217 thread_call_group_should_add_thread(thread_call_group_t group)
 218 {
 219         if ((group->active_count + group->blocked_count + group->idle_count) >= THREAD_CALL_GROUP_MAX_THREADS) {
 220                 panic("thread_call group '%s' reached max thread cap (%d): active: %d, blocked: %d, idle: %d",
 221                       group->tcg_name, THREAD_CALL_GROUP_MAX_THREADS,
 222                       group->active_count, group->blocked_count, group->idle_count);
 223         }
 224
 225         if (group_isparallel(group) == FALSE) {
 226                 if (group->pending_count > 0 && group->active_count == 0) {
 227                         return TRUE;
 228                 }
 229
 230                 return FALSE;
 231         }
 232
 233         if (group->pending_count > 0) {
 234                 if (group->idle_count > 0) {
 235                         return FALSE;
 236                 }
 237
 238                 uint32_t thread_count = group->active_count;
 239
 240                 /*
 241                  * Add a thread if either there are no threads,
 242                  * the group has fewer than its target number of
 243                  * threads, or the amount of work is large relative
 244                  * to the number of threads.  In the last case, pay attention
 245                  * to the total load on the system, and back off if
 246                  * it's high.
 247                  */
 248                 if ((thread_count == 0) ||
 249                         (thread_count < group->target_thread_count) ||
 250                         ((group->pending_count > THREAD_CALL_ADD_RATIO * thread_count) &&
 251                          (sched_mach_factor < THREAD_CALL_MACH_FACTOR_CAP))) {
 252                         return TRUE;
 253                 }
 254         }
 255
 256         return FALSE;
 257 }
 258
 259 /* Lock held */
 260 static inline thread_call_group_t
 261 thread_call_get_group(thread_call_t call)
 262 {
 263         thread_call_index_t index = call->tc_index;
 264
 265         assert(index >= 0 && index < THREAD_CALL_INDEX_MAX);
 266
 267         return &thread_call_groups[index];
 268 }
 269
 270 /* Lock held */
 271 static inline thread_call_flavor_t
 272 thread_call_get_flavor(thread_call_t call)
 273 {
 274         return (call->tc_flags & THREAD_CALL_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
 275 }
 276
 277 static void
 278 thread_call_group_setup(thread_call_group_t group)
 279 {
 280         queue_init(&group->pending_queue);
 281         queue_init(&group->delayed_queues[TCF_ABSOLUTE]);
 282         queue_init(&group->delayed_queues[TCF_CONTINUOUS]);
 283
 284         /* TODO: Consolidate to one hard timer for each group */
 285         timer_call_setup(&group->delayed_timers[TCF_ABSOLUTE],   thread_call_delayed_timer, group);
 286         timer_call_setup(&group->delayed_timers[TCF_CONTINUOUS], thread_call_delayed_timer, group);
 287         timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
 288
 289         /* Reverse the wait order so we re-use the most recently parked thread from the pool */
 290         waitq_init(&group->idle_waitq, SYNC_POLICY_REVERSED|SYNC_POLICY_DISABLE_IRQ);
 291 }
 292
 293 /*
 294  * Simple wrapper for creating threads bound to
 295  * thread call groups.
 296  */
 297 static kern_return_t
 298 thread_call_thread_create(
 299                 thread_call_group_t             group)
 300 {
 301         thread_t thread;
 302         kern_return_t result;
 303
 304         int thread_pri = group->tcg_thread_pri;
 305
 306         result = kernel_thread_start_priority((thread_continue_t)thread_call_thread,
 307                                               group, thread_pri, &thread);
 308         if (result != KERN_SUCCESS) {
 309                 return result;
 310         }
 311
 312         if (thread_pri <= BASEPRI_KERNEL) {
 313                 /*
 314                  * THREAD_CALL_PRIORITY_KERNEL and lower don't get to run to completion
 315                  * in kernel if there are higher priority threads available.
 316                  */
 317                 thread_set_eager_preempt(thread);
 318         }
 319
 320         char name[MAXTHREADNAMESIZE] = "";
 321
 322         int group_thread_count = group->idle_count + group->active_count + group->blocked_count;
 323
 324         snprintf(name, sizeof(name), "thread call %s #%d", group->tcg_name, group_thread_count);
 325         thread_set_thread_name(thread, name);
 326
 327         thread_deallocate(thread);
 328         return KERN_SUCCESS;
 329 }
 330
 331 /*
 332  *      thread_call_initialize:
 333  *
 334  *      Initialize this module, called
 335  *      early during system initialization.
 336  */
 337 void
 338 thread_call_initialize(void)
 339 {
 340         int tc_size = sizeof (thread_call_data_t);
 341         thread_call_zone = zinit(tc_size, 4096 * tc_size, 16 * tc_size, "thread_call");
 342         zone_change(thread_call_zone, Z_CALLERACCT, FALSE);
 343         zone_change(thread_call_zone, Z_NOENCRYPT, TRUE);
 344
 345         lck_grp_init(&thread_call_lck_grp, "thread_call", LCK_GRP_ATTR_NULL);
 346         lck_mtx_init(&thread_call_lock_data, &thread_call_lck_grp, LCK_ATTR_NULL);
 347
 348         nanotime_to_absolutetime(0, THREAD_CALL_DEALLOC_INTERVAL_NS, &thread_call_dealloc_interval_abs);
 349         waitq_init(&daemon_waitq, SYNC_POLICY_DISABLE_IRQ | SYNC_POLICY_FIFO);
 350
 351         for (uint32_t i = 0; i < THREAD_CALL_INDEX_MAX; i++)
 352                 thread_call_group_setup(&thread_call_groups[i]);
 353
 354         spl_t s = disable_ints_and_lock();
 355
 356         queue_init(&thread_call_internal_queue);
 357         for (
 358                         thread_call_t call = internal_call_storage;
 359                         call < &internal_call_storage[INTERNAL_CALL_COUNT];
 360                         call++) {
 361
 362                 enqueue_tail(&thread_call_internal_queue, &call->tc_call.q_link);
 363                 thread_call_internal_queue_count++;
 364         }
 365
 366         thread_call_daemon_awake = TRUE;
 367
 368         enable_ints_and_unlock(s);
 369
 370         thread_t thread;
 371         kern_return_t result;
 372
 373         result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon,
 374                                               NULL, BASEPRI_PREEMPT_HIGH + 1, &thread);
 375         if (result != KERN_SUCCESS)
 376                 panic("thread_call_initialize");
 377
 378         thread_deallocate(thread);
 379 }
 380
 381 void
 382 thread_call_setup(
 383         thread_call_t                   call,
 384         thread_call_func_t              func,
 385         thread_call_param_t             param0)
 386 {
 387         bzero(call, sizeof(*call));
 388         call_entry_setup((call_entry_t)call, func, param0);
 389
 390         /* Thread calls default to the HIGH group unless otherwise specified */
 391         call->tc_index = THREAD_CALL_INDEX_HIGH;
 392
 393         /* THREAD_CALL_ALLOC not set, memory owned by caller */
 394 }
 395
 396 /*
 397  *      _internal_call_allocate:
 398  *
 399  *      Allocate an internal callout entry.
 400  *
 401  *      Called with thread_call_lock held.
 402  */
 403 static __inline__ thread_call_t
 404 _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0)
 405 {
 406     thread_call_t               call;
 407
 408     if (queue_empty(&thread_call_internal_queue))
 409         panic("_internal_call_allocate");
 410
 411         call = qe_dequeue_head(&thread_call_internal_queue, struct thread_call, tc_call.q_link);
 412
 413     thread_call_internal_queue_count--;
 414
 415     thread_call_setup(call, func, param0);
 416     call->tc_refs = 0;
 417     call->tc_flags = 0; /* THREAD_CALL_ALLOC not set, do not free back to zone */
 418
 419     return (call);
 420 }
 421
 422 /*
 423  *      _internal_call_release:
 424  *
 425  *      Release an internal callout entry which
 426  *      is no longer pending (or delayed). This is
 427  *      safe to call on a non-internal entry, in which
 428  *      case nothing happens.
 429  *
 430  *      Called with thread_call_lock held.
 431  */
 432 static __inline__ void
 433 _internal_call_release(thread_call_t call)
 434 {
 435         if (call >= internal_call_storage &&
 436             call < &internal_call_storage[INTERNAL_CALL_COUNT]) {
 437                 assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
 438                 enqueue_head(&thread_call_internal_queue, &call->tc_call.q_link);
 439                 thread_call_internal_queue_count++;
 440         }
 441 }
 442
 443 /*
 444  *      _pending_call_enqueue:
 445  *
 446  *      Place an entry at the end of the
 447  *      pending queue, to be executed soon.
 448  *
 449  *      Returns TRUE if the entry was already
 450  *      on a queue.
 451  *
 452  *      Called with thread_call_lock held.
 453  */
 454 static __inline__ boolean_t
 455 _pending_call_enqueue(thread_call_t             call,
 456                       thread_call_group_t       group)
 457 {
 458         if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
 459           == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
 460                 call->tc_deadline = 0;
 461
 462                 uint32_t flags = call->tc_flags;
 463                 call->tc_flags |= THREAD_CALL_RESCHEDULE;
 464
 465                 if ((flags & THREAD_CALL_RESCHEDULE) != 0)
 466                         return (TRUE);
 467                 else
 468                         return (FALSE);
 469         }
 470
 471         queue_head_t *old_queue = call_entry_enqueue_tail(CE(call), &group->pending_queue);
 472
 473         if (old_queue == NULL) {
 474                 call->tc_submit_count++;
 475         } else if (old_queue != &group->pending_queue &&
 476                    old_queue != &group->delayed_queues[TCF_ABSOLUTE] &&
 477                    old_queue != &group->delayed_queues[TCF_CONTINUOUS]) {
 478                 panic("tried to move a thread call (%p) between groups (old_queue: %p)", call, old_queue);
 479         }
 480
 481         group->pending_count++;
 482
 483         thread_call_wake(group);
 484
 485         return (old_queue != NULL);
 486 }
 487
 488 /*
 489  *      _delayed_call_enqueue:
 490  *
 491  *      Place an entry on the delayed queue,
 492  *      after existing entries with an earlier
 493  *      (or identical) deadline.
 494  *
 495  *      Returns TRUE if the entry was already
 496  *      on a queue.
 497  *
 498  *      Called with thread_call_lock held.
 499  */
 500 static boolean_t
 501 _delayed_call_enqueue(
 502         thread_call_t           call,
 503         thread_call_group_t     group,
 504         uint64_t                deadline,
 505         thread_call_flavor_t    flavor)
 506 {
 507         if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
 508           == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
 509                 call->tc_deadline = deadline;
 510
 511                 uint32_t flags = call->tc_flags;
 512                 call->tc_flags |= THREAD_CALL_RESCHEDULE;
 513
 514                 if ((flags & THREAD_CALL_RESCHEDULE) != 0)
 515                         return (TRUE);
 516                 else
 517                         return (FALSE);
 518         }
 519
 520         queue_head_t *old_queue = call_entry_enqueue_deadline(CE(call),
 521                                                               &group->delayed_queues[flavor],
 522                                                               deadline);
 523
 524         if (old_queue == &group->pending_queue) {
 525                 group->pending_count--;
 526         } else if (old_queue == NULL) {
 527                 call->tc_submit_count++;
 528         } else if (old_queue == &group->delayed_queues[TCF_ABSOLUTE] ||
 529                    old_queue == &group->delayed_queues[TCF_CONTINUOUS]) {
 530                 /* TODO: if it's in the other delayed queue, that might not be OK */
 531                 // we did nothing, and that's fine
 532         } else {
 533                 panic("tried to move a thread call (%p) between groups (old_queue: %p)", call, old_queue);
 534         }
 535
 536         return (old_queue != NULL);
 537 }
 538
 539 /*
 540  *      _call_dequeue:
 541  *
 542  *      Remove an entry from a queue.
 543  *
 544  *      Returns TRUE if the entry was on a queue.
 545  *
 546  *      Called with thread_call_lock held.
 547  */
 548 static __inline__ boolean_t
 549 _call_dequeue(
 550         thread_call_t           call,
 551         thread_call_group_t     group)
 552 {
 553         queue_head_t            *old_queue;
 554
 555         old_queue = call_entry_dequeue(CE(call));
 556
 557         if (old_queue != NULL) {
 558                 assert(old_queue == &group->pending_queue ||
 559                        old_queue == &group->delayed_queues[TCF_ABSOLUTE] ||
 560                        old_queue == &group->delayed_queues[TCF_CONTINUOUS]);
 561
 562                 call->tc_finish_count++;
 563                 if (old_queue == &group->pending_queue)
 564                         group->pending_count--;
 565         }
 566
 567         return (old_queue != NULL);
 568 }
 569
 570 /*
 571  * _arm_delayed_call_timer:
 572  *
 573  * Check if the timer needs to be armed for this flavor,
 574  * and if so, arm it.
 575  *
 576  * If call is non-NULL, only re-arm the timer if the specified call
 577  * is the first in the queue.
 578  *
 579  * Returns true if the timer was armed/re-armed, false if it was left unset
 580  * Caller should cancel the timer if need be.
 581  *
 582  * Called with thread_call_lock held.
 583  */
 584 static bool
 585 _arm_delayed_call_timer(thread_call_t           new_call,
 586                         thread_call_group_t     group,
 587                         thread_call_flavor_t    flavor)
 588 {
 589         /* No calls implies no timer needed */
 590         if (queue_empty(&group->delayed_queues[flavor]))
 591                 return false;
 592
 593         thread_call_t call = qe_queue_first(&group->delayed_queues[flavor], struct thread_call, tc_call.q_link);
 594
 595         /* We only need to change the hard timer if this new call is the first in the list */
 596         if (new_call != NULL && new_call != call)
 597                 return false;
 598
 599         assert((call->tc_soft_deadline != 0) && ((call->tc_soft_deadline <= call->tc_call.deadline)));
 600
 601         uint64_t fire_at = call->tc_soft_deadline;
 602
 603         if (flavor == TCF_CONTINUOUS) {
 604                 assert((call->tc_flags & THREAD_CALL_CONTINUOUS) == THREAD_CALL_CONTINUOUS);
 605                 fire_at = continuoustime_to_absolutetime(fire_at);
 606         } else {
 607                 assert((call->tc_flags & THREAD_CALL_CONTINUOUS) == 0);
 608         }
 609
 610         /*
 611          * Note: This picks the soonest-deadline call's leeway as the hard timer's leeway,
 612          * which does not take into account later-deadline timers with a larger leeway.
 613          * This is a valid coalescing behavior, but masks a possible window to
 614          * fire a timer instead of going idle.
 615          */
 616         uint64_t leeway = call->tc_call.deadline - call->tc_soft_deadline;
 617
 618         timer_call_enter_with_leeway(&group->delayed_timers[flavor], (timer_call_param_t)flavor,
 619             fire_at, leeway,
 620             TIMER_CALL_SYS_CRITICAL|TIMER_CALL_LEEWAY,
 621             ((call->tc_flags & THREAD_CALL_RATELIMITED) == THREAD_CALL_RATELIMITED));
 622
 623         return true;
 624 }
 625
 626 /*
 627  *      _cancel_func_from_queue:
 628  *
 629  *      Remove the first (or all) matching
 630  *      entries from the specified queue.
 631  *
 632  *      Returns TRUE if any matching entries
 633  *      were found.
 634  *
 635  *      Called with thread_call_lock held.
 636  */
 637 static boolean_t
 638 _cancel_func_from_queue(thread_call_func_t      func,
 639                         thread_call_param_t     param0,
 640                         thread_call_group_t     group,
 641                         boolean_t               remove_all,
 642                         queue_head_t            *queue)
 643 {
 644         boolean_t call_removed = FALSE;
 645         thread_call_t call;
 646
 647         qe_foreach_element_safe(call, queue, tc_call.q_link) {
 648                 if (call->tc_call.func   != func ||
 649                     call->tc_call.param0 != param0) {
 650                         continue;
 651                 }
 652
 653                 _call_dequeue(call, group);
 654
 655                 _internal_call_release(call);
 656
 657                 call_removed = TRUE;
 658                 if (!remove_all)
 659                         break;
 660         }
 661
 662         return (call_removed);
 663 }
 664
 665 /*
 666  *      thread_call_func_delayed:
 667  *
 668  *      Enqueue a function callout to
 669  *      occur at the stated time.
 670  */
 671 void
 672 thread_call_func_delayed(
 673                 thread_call_func_t              func,
 674                 thread_call_param_t             param,
 675                 uint64_t                        deadline)
 676 {
 677         (void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, 0, 0);
 678 }
 679
 680 /*
 681  * thread_call_func_delayed_with_leeway:
 682  *
 683  * Same as thread_call_func_delayed(), but with
 684  * leeway/flags threaded through.
 685  */
 686
 687 void
 688 thread_call_func_delayed_with_leeway(
 689         thread_call_func_t              func,
 690         thread_call_param_t             param,
 691         uint64_t                deadline,
 692         uint64_t                leeway,
 693         uint32_t                flags)
 694 {
 695         (void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, leeway, flags);
 696 }
 697
 698 /*
 699  *      thread_call_func_cancel:
 700  *
 701  *      Dequeue a function callout.
 702  *
 703  *      Removes one (or all) { function, argument }
 704  *      instance(s) from either (or both)
 705  *      the pending and the delayed queue,
 706  *      in that order.
 707  *
 708  *      Returns TRUE if any calls were cancelled.
 709  *
 710  *      This iterates all of the pending or delayed thread calls in the group,
 711  *      which is really inefficient.  Switch to an allocated thread call instead.
 712  */
 713 boolean_t
 714 thread_call_func_cancel(
 715                 thread_call_func_t              func,
 716                 thread_call_param_t             param,
 717                 boolean_t                       cancel_all)
 718 {
 719         boolean_t       result;
 720
 721         assert(func != NULL);
 722
 723         spl_t s = disable_ints_and_lock();
 724
 725         /* Function-only thread calls are only kept in the default HIGH group */
 726         thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
 727
 728         if (cancel_all) {
 729                 /* exhaustively search every queue, and return true if any search found something */
 730                 result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) |
 731                          _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE])  |
 732                          _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
 733         } else {
 734                 /* early-exit as soon as we find something, don't search other queues */
 735                 result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) ||
 736                          _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE]) ||
 737                          _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
 738         }
 739
 740         enable_ints_and_unlock(s);
 741
 742         return (result);
 743 }
 744
 745 /*
 746  * Allocate a thread call with a given priority.  Importances other than
 747  * THREAD_CALL_PRIORITY_HIGH or THREAD_CALL_PRIORITY_KERNEL_HIGH will be run in threads
 748  * with eager preemption enabled (i.e. may be aggressively preempted by higher-priority
 749  * threads which are not in the normal "urgent" bands).
 750  */
 751 thread_call_t
 752 thread_call_allocate_with_priority(
 753                 thread_call_func_t              func,
 754                 thread_call_param_t             param0,
 755                 thread_call_priority_t          pri)
 756 {
 757         return thread_call_allocate_with_options(func, param0, pri, 0);
 758 }
 759
 760 thread_call_t
 761 thread_call_allocate_with_options(
 762                 thread_call_func_t              func,
 763                 thread_call_param_t             param0,
 764                 thread_call_priority_t          pri,
 765                 thread_call_options_t           options)
 766 {
 767         thread_call_t call = thread_call_allocate(func, param0);
 768
 769         switch (pri) {
 770                 case THREAD_CALL_PRIORITY_HIGH:
 771                         call->tc_index = THREAD_CALL_INDEX_HIGH;
 772                         break;
 773                 case THREAD_CALL_PRIORITY_KERNEL:
 774                         call->tc_index = THREAD_CALL_INDEX_KERNEL;
 775                         break;
 776                 case THREAD_CALL_PRIORITY_USER:
 777                         call->tc_index = THREAD_CALL_INDEX_USER;
 778                         break;
 779                 case THREAD_CALL_PRIORITY_LOW:
 780                         call->tc_index = THREAD_CALL_INDEX_LOW;
 781                         break;
 782                 case THREAD_CALL_PRIORITY_KERNEL_HIGH:
 783                         call->tc_index = THREAD_CALL_INDEX_KERNEL_HIGH;
 784                         break;
 785                 default:
 786                         panic("Invalid thread call pri value: %d", pri);
 787                         break;
 788         }
 789
 790         if (options & THREAD_CALL_OPTIONS_ONCE) {
 791             call->tc_flags |= THREAD_CALL_ONCE;
 792         }
 793         if (options & THREAD_CALL_OPTIONS_SIGNAL) {
 794             call->tc_flags |= THREAD_CALL_SIGNAL | THREAD_CALL_ONCE;
 795         }
 796
 797         return call;
 798 }
 799
 800 thread_call_t
 801 thread_call_allocate_with_qos(thread_call_func_t        func,
 802                               thread_call_param_t       param0,
 803                               int                       qos_tier,
 804                               thread_call_options_t     options)
 805 {
 806         thread_call_t call = thread_call_allocate(func, param0);
 807
 808         switch (qos_tier) {
 809                 case THREAD_QOS_UNSPECIFIED:
 810                         call->tc_index = THREAD_CALL_INDEX_HIGH;
 811                         break;
 812                 case THREAD_QOS_LEGACY:
 813                         call->tc_index = THREAD_CALL_INDEX_USER;
 814                         break;
 815                 case THREAD_QOS_MAINTENANCE:
 816                 case THREAD_QOS_BACKGROUND:
 817                         call->tc_index = THREAD_CALL_INDEX_LOW;
 818                         break;
 819                 case THREAD_QOS_UTILITY:
 820                         call->tc_index = THREAD_CALL_INDEX_QOS_UT;
 821                         break;
 822                 case THREAD_QOS_USER_INITIATED:
 823                         call->tc_index = THREAD_CALL_INDEX_QOS_IN;
 824                         break;
 825                 case THREAD_QOS_USER_INTERACTIVE:
 826                         call->tc_index = THREAD_CALL_INDEX_QOS_UI;
 827                         break;
 828                 default:
 829                         panic("Invalid thread call qos value: %d", qos_tier);
 830                         break;
 831         }
 832
 833         if (options & THREAD_CALL_OPTIONS_ONCE)
 834                 call->tc_flags |= THREAD_CALL_ONCE;
 835
 836         /* does not support THREAD_CALL_OPTIONS_SIGNAL */
 837
 838         return call;
 839 }
 840
 841
 842 /*
 843  *      thread_call_allocate:
 844  *
 845  *      Allocate a callout entry.
 846  */
 847 thread_call_t
 848 thread_call_allocate(
 849                 thread_call_func_t              func,
 850                 thread_call_param_t             param0)
 851 {
 852         thread_call_t   call = zalloc(thread_call_zone);
 853
 854         thread_call_setup(call, func, param0);
 855         call->tc_refs = 1;
 856         call->tc_flags = THREAD_CALL_ALLOC;
 857
 858         return (call);
 859 }
 860
 861 /*
 862  *      thread_call_free:
 863  *
 864  *      Release a callout.  If the callout is currently
 865  *      executing, it will be freed when all invocations
 866  *      finish.
 867  *
 868  *      If the callout is currently armed to fire again, then
 869  *      freeing is not allowed and returns FALSE.  The
 870  *      client must have canceled the pending invocation before freeing.
 871  */
 872 boolean_t
 873 thread_call_free(
 874                 thread_call_t           call)
 875 {
 876         spl_t s = disable_ints_and_lock();
 877
 878         if (call->tc_call.queue != NULL ||
 879            ((call->tc_flags & THREAD_CALL_RESCHEDULE) != 0)) {
 880                 thread_call_unlock();
 881                 splx(s);
 882
 883                 return (FALSE);
 884         }
 885
 886         int32_t refs = --call->tc_refs;
 887         if (refs < 0) {
 888                 panic("Refcount negative: %d\n", refs);
 889         }
 890
 891         if ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING)
 892           == ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING) & call->tc_flags)) {
 893                 thread_call_wait_once_locked(call, s);
 894                 /* thread call lock has been unlocked */
 895         } else {
 896                 enable_ints_and_unlock(s);
 897         }
 898
 899         if (refs == 0) {
 900                 assert(call->tc_finish_count == call->tc_submit_count);
 901                 zfree(thread_call_zone, call);
 902         }
 903
 904         return (TRUE);
 905 }
 906
 907 /*
 908  *      thread_call_enter:
 909  *
 910  *      Enqueue a callout entry to occur "soon".
 911  *
 912  *      Returns TRUE if the call was
 913  *      already on a queue.
 914  */
 915 boolean_t
 916 thread_call_enter(
 917                 thread_call_t           call)
 918 {
 919         return thread_call_enter1(call, 0);
 920 }
 921
 922 boolean_t
 923 thread_call_enter1(
 924                 thread_call_t                   call,
 925                 thread_call_param_t             param1)
 926 {
 927         boolean_t               result = TRUE;
 928         thread_call_group_t     group;
 929
 930         assert(call->tc_call.func != NULL);
 931
 932         assert((call->tc_flags & THREAD_CALL_SIGNAL) == 0);
 933
 934         group = thread_call_get_group(call);
 935
 936         spl_t s = disable_ints_and_lock();
 937
 938         if (call->tc_call.queue != &group->pending_queue) {
 939                 result = _pending_call_enqueue(call, group);
 940         }
 941
 942         call->tc_call.param1 = param1;
 943
 944         enable_ints_and_unlock(s);
 945
 946         return (result);
 947 }
 948
 949 /*
 950  *      thread_call_enter_delayed:
 951  *
 952  *      Enqueue a callout entry to occur
 953  *      at the stated time.
 954  *
 955  *      Returns TRUE if the call was
 956  *      already on a queue.
 957  */
 958 boolean_t
 959 thread_call_enter_delayed(
 960                 thread_call_t           call,
 961                 uint64_t                deadline)
 962 {
 963         assert(call != NULL);
 964         return thread_call_enter_delayed_internal(call, NULL, 0, 0, deadline, 0, 0);
 965 }
 966
 967 boolean_t
 968 thread_call_enter1_delayed(
 969                 thread_call_t                   call,
 970                 thread_call_param_t             param1,
 971                 uint64_t                        deadline)
 972 {
 973         assert(call != NULL);
 974         return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, 0, 0);
 975 }
 976
 977 boolean_t
 978 thread_call_enter_delayed_with_leeway(
 979                 thread_call_t           call,
 980                 thread_call_param_t     param1,
 981                 uint64_t                deadline,
 982                 uint64_t                leeway,
 983                 unsigned int            flags)
 984 {
 985         assert(call != NULL);
 986         return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, leeway, flags);
 987 }
 988
 989
 990 /*
 991  * thread_call_enter_delayed_internal:
 992  * enqueue a callout entry to occur at the stated time
 993  *
 994  * Returns True if the call was already on a queue
 995  * params:
 996  * call     - structure encapsulating state of the callout
 997  * alt_func/alt_param0 - if call is NULL, allocate temporary storage using these parameters
 998  * deadline - time deadline in nanoseconds
 999  * leeway   - timer slack represented as delta of deadline.
1000  * flags    - THREAD_CALL_DELAY_XXX : classification of caller's desires wrt timer coalescing.
1001  *            THREAD_CALL_DELAY_LEEWAY : value in leeway is used for timer coalescing.
1002  *            THREAD_CALL_CONTINUOUS: thread call will be called according to mach_continuous_time rather
1003  *                                                                        than mach_absolute_time
1004  */
1005 boolean_t
1006 thread_call_enter_delayed_internal(
1007                 thread_call_t           call,
1008                 thread_call_func_t      alt_func,
1009                 thread_call_param_t     alt_param0,
1010                 thread_call_param_t     param1,
1011                 uint64_t                deadline,
1012                 uint64_t                leeway,
1013                 unsigned int            flags)
1014 {
1015         boolean_t               result = TRUE;
1016         thread_call_group_t     group;
1017         uint64_t                now, sdeadline, slop;
1018         uint32_t                urgency;
1019
1020         thread_call_flavor_t flavor = (flags & THREAD_CALL_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
1021
1022         /* direct mapping between thread_call, timer_call, and timeout_urgency values */
1023         urgency = (flags & TIMEOUT_URGENCY_MASK);
1024
1025         spl_t s = disable_ints_and_lock();
1026
1027         if (call == NULL) {
1028                 /* allocate a structure out of internal storage, as a convenience for BSD callers */
1029                 call = _internal_call_allocate(alt_func, alt_param0);
1030         }
1031
1032         assert(call->tc_call.func != NULL);
1033         group = thread_call_get_group(call);
1034
1035         /* TODO: assert that call is not enqueued before flipping the flag */
1036         if (flavor == TCF_CONTINUOUS) {
1037                 now = mach_continuous_time();
1038                 call->tc_flags |= THREAD_CALL_CONTINUOUS;
1039         } else {
1040                 now = mach_absolute_time();
1041                 call->tc_flags &= ~THREAD_CALL_CONTINUOUS;
1042         }
1043
1044         call->tc_flags |= THREAD_CALL_DELAYED;
1045
1046         call->tc_soft_deadline = sdeadline = deadline;
1047
1048         boolean_t ratelimited = FALSE;
1049         slop = timer_call_slop(deadline, now, urgency, current_thread(), &ratelimited);
1050
1051         if ((flags & THREAD_CALL_DELAY_LEEWAY) != 0 && leeway > slop)
1052                 slop = leeway;
1053
1054         if (UINT64_MAX - deadline <= slop)
1055                 deadline = UINT64_MAX;
1056         else
1057                 deadline += slop;
1058
1059         if (ratelimited) {
1060                 call->tc_flags |= TIMER_CALL_RATELIMITED;
1061         } else {
1062                 call->tc_flags &= ~TIMER_CALL_RATELIMITED;
1063         }
1064
1065         call->tc_call.param1 = param1;
1066
1067         call->tc_ttd = (sdeadline > now) ? (sdeadline - now) : 0;
1068
1069         result = _delayed_call_enqueue(call, group, deadline, flavor);
1070
1071         _arm_delayed_call_timer(call, group, flavor);
1072
1073 #if CONFIG_DTRACE
1074         DTRACE_TMR5(thread_callout__create, thread_call_func_t, call->tc_call.func,
1075                     uint64_t, (deadline - sdeadline), uint64_t, (call->tc_ttd >> 32),
1076                     (unsigned) (call->tc_ttd & 0xFFFFFFFF), call);
1077 #endif
1078
1079         enable_ints_and_unlock(s);
1080
1081         return (result);
1082 }
1083
1084 /*
1085  * Remove a callout entry from the queue
1086  * Called with thread_call_lock held
1087  */
1088 static boolean_t
1089 thread_call_cancel_locked(thread_call_t call)
1090 {
1091         boolean_t canceled = (0 != (THREAD_CALL_RESCHEDULE & call->tc_flags));
1092         call->tc_flags &= ~THREAD_CALL_RESCHEDULE;
1093
1094         if (canceled) {
1095                 /* if reschedule was set, it must not have been queued */
1096                 assert(call->tc_call.queue == NULL);
1097         } else {
1098                 boolean_t do_cancel_callout = FALSE;
1099
1100                 thread_call_flavor_t flavor = thread_call_get_flavor(call);
1101                 thread_call_group_t  group  = thread_call_get_group(call);
1102
1103                 if ((call->tc_call.deadline != 0) &&
1104                     (call == qe_queue_first(&group->delayed_queues[flavor], struct thread_call, tc_call.q_link))) {
1105                         assert(call->tc_call.queue == &group->delayed_queues[flavor]);
1106                         do_cancel_callout = TRUE;
1107                 }
1108
1109                 canceled = _call_dequeue(call, group);
1110
1111                 if (do_cancel_callout) {
1112                         if (_arm_delayed_call_timer(NULL, group, flavor) == false)
1113                                 timer_call_cancel(&group->delayed_timers[flavor]);
1114                 }
1115         }
1116
1117 #if CONFIG_DTRACE
1118         DTRACE_TMR4(thread_callout__cancel, thread_call_func_t, call->tc_call.func,
1119                     0, (call->tc_ttd >> 32), (unsigned) (call->tc_ttd & 0xFFFFFFFF));
1120 #endif
1121
1122         return canceled;
1123 }
1124
1125 /*
1126  *      thread_call_cancel:
1127  *
1128  *      Dequeue a callout entry.
1129  *
1130  *      Returns TRUE if the call was
1131  *      on a queue.
1132  */
1133 boolean_t
1134 thread_call_cancel(thread_call_t call)
1135 {
1136         spl_t s = disable_ints_and_lock();
1137
1138         boolean_t result = thread_call_cancel_locked(call);
1139
1140         enable_ints_and_unlock(s);
1141
1142         return result;
1143 }
1144
1145 /*
1146  * Cancel a thread call.  If it cannot be cancelled (i.e.
1147  * is already in flight), waits for the most recent invocation
1148  * to finish.  Note that if clients re-submit this thread call,
1149  * it may still be pending or in flight when thread_call_cancel_wait
1150  * returns, but all requests to execute this work item prior
1151  * to the call to thread_call_cancel_wait will have finished.
1152  */
1153 boolean_t
1154 thread_call_cancel_wait(thread_call_t call)
1155 {
1156         if ((call->tc_flags & THREAD_CALL_ALLOC) == 0)
1157                 panic("thread_call_cancel_wait: can't wait on thread call whose storage I don't own");
1158
1159         if (!ml_get_interrupts_enabled())
1160                 panic("unsafe thread_call_cancel_wait");
1161
1162         if (current_thread()->thc_state.thc_call == call)
1163                 panic("thread_call_cancel_wait: deadlock waiting on self from inside call: %p to function %p",
1164                       call, call->tc_call.func);
1165
1166         spl_t s = disable_ints_and_lock();
1167
1168         boolean_t canceled = thread_call_cancel_locked(call);
1169
1170         if ((call->tc_flags & THREAD_CALL_ONCE) == THREAD_CALL_ONCE) {
1171                 /*
1172                  * A cancel-wait on a 'once' call will both cancel
1173                  * the pending call and wait for the in-flight call
1174                  */
1175
1176                 thread_call_wait_once_locked(call, s);
1177                 /* thread call lock unlocked */
1178         } else {
1179                 /*
1180                  * A cancel-wait on a normal call will only wait for the in-flight calls
1181                  * if it did not cancel the pending call.
1182                  *
1183                  * TODO: This seems less than useful - shouldn't it do the wait as well?
1184                  */
1185
1186                 if (canceled == FALSE) {
1187                         thread_call_wait_locked(call, s);
1188                         /* thread call lock unlocked */
1189                 } else {
1190                         enable_ints_and_unlock(s);
1191                 }
1192         }
1193
1194         return canceled;
1195 }
1196
1197
1198 /*
1199  *      thread_call_wake:
1200  *
1201  *      Wake a call thread to service
1202  *      pending call entries.  May wake
1203  *      the daemon thread in order to
1204  *      create additional call threads.
1205  *
1206  *      Called with thread_call_lock held.
1207  *
1208  *      For high-priority group, only does wakeup/creation if there are no threads
1209  *      running.
1210  */
1211 static __inline__ void
1212 thread_call_wake(
1213         thread_call_group_t             group)
1214 {
1215         /*
1216          * New behavior: use threads if you've got 'em.
1217          * Traditional behavior: wake only if no threads running.
1218          */
1219         if (group_isparallel(group) || group->active_count == 0) {
1220                 if (waitq_wakeup64_one(&group->idle_waitq, NO_EVENT64,
1221                                        THREAD_AWAKENED, WAITQ_ALL_PRIORITIES) == KERN_SUCCESS) {
1222                         group->idle_count--; group->active_count++;
1223
1224                         if (group->idle_count == 0) {
1225                                 timer_call_cancel(&group->dealloc_timer);
1226                                 group->flags &= ~TCG_DEALLOC_ACTIVE;
1227                         }
1228                 } else {
1229                         if (!thread_call_daemon_awake && thread_call_group_should_add_thread(group)) {
1230                                 thread_call_daemon_awake = TRUE;
1231                                 waitq_wakeup64_one(&daemon_waitq, NO_EVENT64,
1232                                                    THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1233                         }
1234                 }
1235         }
1236 }
1237
1238 /*
1239  *      sched_call_thread:
1240  *
1241  *      Call out invoked by the scheduler.
1242  */
1243 static void
1244 sched_call_thread(
1245                 int                             type,
1246                 thread_t                thread)
1247 {
1248         thread_call_group_t             group;
1249
1250         group = thread->thc_state.thc_group;
1251         assert((group - &thread_call_groups[0]) < THREAD_CALL_INDEX_MAX);
1252
1253         thread_call_lock_spin();
1254
1255         switch (type) {
1256
1257                 case SCHED_CALL_BLOCK:
1258                         assert(group->active_count);
1259                         --group->active_count;
1260                         group->blocked_count++;
1261                         if (group->pending_count > 0)
1262                                 thread_call_wake(group);
1263                         break;
1264
1265                 case SCHED_CALL_UNBLOCK:
1266                         assert(group->blocked_count);
1267                         --group->blocked_count;
1268                         group->active_count++;
1269                         break;
1270         }
1271
1272         thread_call_unlock();
1273 }
1274
1275 /*
1276  * Interrupts disabled, lock held; returns the same way.
1277  * Only called on thread calls whose storage we own.  Wakes up
1278  * anyone who might be waiting on this work item and frees it
1279  * if the client has so requested.
1280  */
1281 static boolean_t
1282 thread_call_finish(thread_call_t call, thread_call_group_t group, spl_t *s)
1283 {
1284         uint64_t  time;
1285         uint32_t  flags;
1286         boolean_t signal;
1287         boolean_t dowake = FALSE;
1288         boolean_t repend = FALSE;
1289
1290         call->tc_finish_count++;
1291         flags = call->tc_flags;
1292         signal = ((THREAD_CALL_SIGNAL & flags) != 0);
1293
1294     if (!signal) {
1295                 /* The thread call thread owns a ref until the call is finished */
1296                 if (call->tc_refs <= 0)
1297                         panic("thread_call_finish: detected over-released thread call: %p", call);
1298                 call->tc_refs--;
1299     }
1300
1301         call->tc_flags &= ~(THREAD_CALL_RESCHEDULE | THREAD_CALL_RUNNING | THREAD_CALL_WAIT);
1302
1303         if ((call->tc_refs != 0) && ((flags & THREAD_CALL_RESCHEDULE) != 0)) {
1304                 assert(flags & THREAD_CALL_ONCE);
1305                 thread_call_flavor_t flavor = thread_call_get_flavor(call);
1306
1307                 if (THREAD_CALL_DELAYED & flags) {
1308                         time =  mach_absolute_time();
1309                         if (flavor == TCF_CONTINUOUS) {
1310                                 time =  absolutetime_to_continuoustime(time);
1311                         }
1312                         if (call->tc_soft_deadline <= time) {
1313                                 call->tc_flags &= ~(THREAD_CALL_DELAYED | TIMER_CALL_RATELIMITED);
1314                                 call->tc_deadline = 0;
1315                         }
1316                 }
1317                 if (call->tc_deadline) {
1318                         _delayed_call_enqueue(call, group, call->tc_deadline, flavor);
1319                         if (!signal) {
1320                                 _arm_delayed_call_timer(call, group, flavor);
1321                         }
1322                 } else if (signal) {
1323                         call->tc_submit_count++;
1324                         repend = TRUE;
1325                 } else {
1326                         _pending_call_enqueue(call, group);
1327                 }
1328         }
1329
1330         if ((flags & THREAD_CALL_WAIT) != 0) {
1331                 dowake = TRUE;
1332
1333                 /*
1334                  * Dropping lock here because the sched call for the
1335                  * high-pri group can take the big lock from under
1336                  * a thread lock.
1337                  */
1338                 thread_call_unlock();
1339                 thread_wakeup((event_t)call);
1340                 thread_call_lock_spin();
1341                 /* THREAD_CALL_SIGNAL call may have been freed */
1342         }
1343
1344         if (!signal && (call->tc_refs == 0)) {
1345                 if (dowake) {
1346                         panic("Someone waiting on a thread call that is scheduled for free: %p\n", call->tc_call.func);
1347                 }
1348
1349                 assert(call->tc_finish_count == call->tc_submit_count);
1350
1351                 enable_ints_and_unlock(*s);
1352
1353                 zfree(thread_call_zone, call);
1354
1355                 *s = disable_ints_and_lock();
1356         }
1357
1358         return (repend);
1359 }
1360
1361 /*
1362  * thread_call_invoke
1363  *
1364  * Invoke the function provided for this thread call
1365  *
1366  * Note that the thread call object can be deallocated by the function if we do not control its storage.
1367  */
1368 static void __attribute__((noinline))
1369 thread_call_invoke(thread_call_func_t func, thread_call_param_t param0, thread_call_param_t param1, thread_call_t call)
1370 {
1371         current_thread()->thc_state.thc_call = call;
1372
1373 #if DEVELOPMENT || DEBUG
1374         KERNEL_DEBUG_CONSTANT(
1375                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_START,
1376                               VM_KERNEL_UNSLIDE(func), VM_KERNEL_ADDRHIDE(param0), VM_KERNEL_ADDRHIDE(param1), 0, 0);
1377 #endif /* DEVELOPMENT || DEBUG */
1378
1379 #if CONFIG_DTRACE
1380         uint64_t tc_ttd = call->tc_ttd;
1381         boolean_t is_delayed = call->tc_flags & THREAD_CALL_DELAYED;
1382         DTRACE_TMR6(thread_callout__start, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1383                     (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1384 #endif
1385
1386         (*func)(param0, param1);
1387
1388 #if CONFIG_DTRACE
1389         DTRACE_TMR6(thread_callout__end, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1390                     (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1391 #endif
1392
1393 #if DEVELOPMENT || DEBUG
1394         KERNEL_DEBUG_CONSTANT(
1395                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_END,
1396                               VM_KERNEL_UNSLIDE(func), 0, 0, 0, 0);
1397 #endif /* DEVELOPMENT || DEBUG */
1398
1399         current_thread()->thc_state.thc_call = NULL;
1400 }
1401
1402 /*
1403  *      thread_call_thread:
1404  */
1405 static void
1406 thread_call_thread(
1407                 thread_call_group_t             group,
1408                 wait_result_t                   wres)
1409 {
1410         thread_t        self = current_thread();
1411         boolean_t       canwait;
1412
1413         if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0)
1414                 (void)thread_set_tag_internal(self, THREAD_TAG_CALLOUT);
1415
1416         /*
1417          * A wakeup with THREAD_INTERRUPTED indicates that
1418          * we should terminate.
1419          */
1420         if (wres == THREAD_INTERRUPTED) {
1421                 thread_terminate(self);
1422
1423                 /* NOTREACHED */
1424                 panic("thread_terminate() returned?");
1425         }
1426
1427         spl_t s = disable_ints_and_lock();
1428
1429         self->thc_state.thc_group = group;
1430         thread_sched_call(self, sched_call_thread);
1431
1432         while (group->pending_count > 0) {
1433                 thread_call_t                   call;
1434                 thread_call_func_t              func;
1435                 thread_call_param_t             param0, param1;
1436
1437                 call = qe_dequeue_head(&group->pending_queue, struct thread_call, tc_call.q_link);
1438                 assert(call != NULL);
1439                 group->pending_count--;
1440
1441                 func = call->tc_call.func;
1442                 param0 = call->tc_call.param0;
1443                 param1 = call->tc_call.param1;
1444
1445                 call->tc_call.queue = NULL;
1446
1447                 _internal_call_release(call);
1448
1449                 /*
1450                  * Can only do wakeups for thread calls whose storage
1451                  * we control.
1452                  */
1453                 if ((call->tc_flags & THREAD_CALL_ALLOC) != 0) {
1454                         canwait = TRUE;
1455                         call->tc_flags |= THREAD_CALL_RUNNING;
1456                         call->tc_refs++;        /* Delay free until we're done */
1457                 } else
1458                         canwait = FALSE;
1459
1460                 enable_ints_and_unlock(s);
1461
1462                 thread_call_invoke(func, param0, param1, call);
1463
1464                 if (get_preemption_level() != 0) {
1465                         int pl = get_preemption_level();
1466                         panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
1467                                         pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
1468                 }
1469
1470                 s = disable_ints_and_lock();
1471
1472                 if (canwait) {
1473                         /* Frees if so desired */
1474                         thread_call_finish(call, group, &s);
1475                 }
1476         }
1477
1478         thread_sched_call(self, NULL);
1479         group->active_count--;
1480
1481         if (self->callout_woken_from_icontext && !self->callout_woke_thread) {
1482                 ledger_credit(self->t_ledger, task_ledgers.interrupt_wakeups, 1);
1483                 if (self->callout_woken_from_platform_idle)
1484                         ledger_credit(self->t_ledger, task_ledgers.platform_idle_wakeups, 1);
1485         }
1486
1487         self->callout_woken_from_icontext = FALSE;
1488         self->callout_woken_from_platform_idle = FALSE;
1489         self->callout_woke_thread = FALSE;
1490
1491         if (group_isparallel(group)) {
1492                 /*
1493                  * For new style of thread group, thread always blocks.
1494                  * If we have more than the target number of threads,
1495                  * and this is the first to block, and it isn't active
1496                  * already, set a timer for deallocating a thread if we
1497                  * continue to have a surplus.
1498                  */
1499                 group->idle_count++;
1500
1501                 if (group->idle_count == 1) {
1502                         group->idle_timestamp = mach_absolute_time();
1503                 }
1504
1505                 if (((group->flags & TCG_DEALLOC_ACTIVE) == 0) &&
1506                                 ((group->active_count + group->idle_count) > group->target_thread_count)) {
1507                         group->flags |= TCG_DEALLOC_ACTIVE;
1508                         thread_call_start_deallocate_timer(group);
1509                 }
1510
1511                 /* Wait for more work (or termination) */
1512                 wres = waitq_assert_wait64(&group->idle_waitq, NO_EVENT64, THREAD_INTERRUPTIBLE, 0);
1513                 if (wres != THREAD_WAITING) {
1514                         panic("kcall worker unable to assert wait?");
1515                 }
1516
1517                 enable_ints_and_unlock(s);
1518
1519                 thread_block_parameter((thread_continue_t)thread_call_thread, group);
1520         } else {
1521                 if (group->idle_count < group->target_thread_count) {
1522                         group->idle_count++;
1523
1524                         waitq_assert_wait64(&group->idle_waitq, NO_EVENT64, THREAD_UNINT, 0); /* Interrupted means to exit */
1525
1526                         enable_ints_and_unlock(s);
1527
1528                         thread_block_parameter((thread_continue_t)thread_call_thread, group);
1529                         /* NOTREACHED */
1530                 }
1531         }
1532
1533         enable_ints_and_unlock(s);
1534
1535         thread_terminate(self);
1536         /* NOTREACHED */
1537 }
1538
1539 /*
1540  *      thread_call_daemon: walk list of groups, allocating
1541  *      threads if appropriate (as determined by
1542  *      thread_call_group_should_add_thread()).
1543  */
1544 static void
1545 thread_call_daemon_continue(__unused void *arg)
1546 {
1547         spl_t s = disable_ints_and_lock();
1548
1549         /* Starting at zero happens to be high-priority first. */
1550         for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
1551                 thread_call_group_t group = &thread_call_groups[i];
1552                 while (thread_call_group_should_add_thread(group)) {
1553                         group->active_count++;
1554
1555                         enable_ints_and_unlock(s);
1556
1557                         kern_return_t kr = thread_call_thread_create(group);
1558                         if (kr != KERN_SUCCESS) {
1559                                 /*
1560                                  * On failure, just pause for a moment and give up.
1561                                  * We can try again later.
1562                                  */
1563                                 delay(10000); /* 10 ms */
1564                                 s = disable_ints_and_lock();
1565                                 goto out;
1566                         }
1567
1568                         s = disable_ints_and_lock();
1569                 }
1570         }
1571
1572 out:
1573         thread_call_daemon_awake = FALSE;
1574         waitq_assert_wait64(&daemon_waitq, NO_EVENT64, THREAD_UNINT, 0);
1575
1576         enable_ints_and_unlock(s);
1577
1578         thread_block_parameter((thread_continue_t)thread_call_daemon_continue, NULL);
1579         /* NOTREACHED */
1580 }
1581
1582 static void
1583 thread_call_daemon(
1584                 __unused void    *arg)
1585 {
1586         thread_t        self = current_thread();
1587
1588         self->options |= TH_OPT_VMPRIV;
1589         vm_page_free_reserve(2);        /* XXX */
1590
1591         thread_set_thread_name(self, "thread_call_daemon");
1592
1593         thread_call_daemon_continue(NULL);
1594         /* NOTREACHED */
1595 }
1596
1597 /*
1598  * Schedule timer to deallocate a worker thread if we have a surplus
1599  * of threads (in excess of the group's target) and at least one thread
1600  * is idle the whole time.
1601  */
1602 static void
1603 thread_call_start_deallocate_timer(
1604                 thread_call_group_t group)
1605 {
1606         uint64_t deadline;
1607         boolean_t onqueue;
1608
1609         assert(group->idle_count > 0);
1610
1611         group->flags |= TCG_DEALLOC_ACTIVE;
1612         deadline = group->idle_timestamp + thread_call_dealloc_interval_abs;
1613         onqueue = timer_call_enter(&group->dealloc_timer, deadline, 0);
1614
1615         if (onqueue) {
1616                 panic("Deallocate timer already active?");
1617         }
1618 }
1619
1620 /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
1621 void
1622 thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1)
1623 {
1624         thread_call_group_t  group  = (thread_call_group_t)  p0;
1625         thread_call_flavor_t flavor = (thread_call_flavor_t) p1;
1626
1627         thread_call_t   call;
1628         uint64_t        now;
1629         boolean_t       restart;
1630         boolean_t       repend;
1631
1632         thread_call_lock_spin();
1633
1634         if (flavor == TCF_CONTINUOUS)
1635                 now = mach_continuous_time();
1636         else if (flavor == TCF_ABSOLUTE)
1637                 now = mach_absolute_time();
1638         else
1639                 panic("invalid timer flavor: %d", flavor);
1640
1641     do {
1642                 restart = FALSE;
1643                 qe_foreach_element_safe(call, &group->delayed_queues[flavor], tc_call.q_link) {
1644                         if (flavor == TCF_CONTINUOUS)
1645                                 assert((call->tc_flags & THREAD_CALL_CONTINUOUS) == THREAD_CALL_CONTINUOUS);
1646                         else
1647                                 assert((call->tc_flags & THREAD_CALL_CONTINUOUS) == 0);
1648
1649                         /*
1650                          * if we hit a call that isn't yet ready to expire,
1651                          * then we're done for now
1652                          * TODO: The next timer in the list could have a larger leeway
1653                          *       and therefore be ready to expire.
1654                          *       Sort by deadline then by soft deadline to avoid this
1655                          */
1656                         if (call->tc_soft_deadline > now)
1657                                 break;
1658
1659                         /*
1660                          * If we hit a rate-limited timer, don't eagerly wake it up.
1661                          * Wait until it reaches the end of the leeway window.
1662                          *
1663                          * TODO: What if the next timer is not rate-limited?
1664                          *       Have a separate rate-limited queue to avoid this
1665                          */
1666                         if ((call->tc_flags & THREAD_CALL_RATELIMITED) &&
1667                             (call->tc_call.deadline > now) &&
1668                             (ml_timer_forced_evaluation() == FALSE)) {
1669                                 break;
1670                         }
1671
1672                         if (THREAD_CALL_SIGNAL & call->tc_flags) {
1673                                 __assert_only queue_head_t *old_queue;
1674                                 old_queue = call_entry_dequeue(&call->tc_call);
1675                                 assert(old_queue == &group->delayed_queues[flavor]);
1676
1677                 do {
1678                                         thread_call_func_t  func   = call->tc_call.func;
1679                                         thread_call_param_t param0 = call->tc_call.param0;
1680                                         thread_call_param_t param1 = call->tc_call.param1;
1681
1682                                         call->tc_flags |= THREAD_CALL_RUNNING;
1683                                         thread_call_unlock();
1684                                         thread_call_invoke(func, param0, param1, call);
1685                                         thread_call_lock_spin();
1686
1687                                         repend = thread_call_finish(call, group, NULL);
1688                 } while (repend);
1689
1690                                 /* call may have been freed */
1691                                 restart = TRUE;
1692                                 break;
1693                         } else {
1694                                 _pending_call_enqueue(call, group);
1695                         }
1696                 }
1697         } while (restart);
1698
1699         _arm_delayed_call_timer(call, group, flavor);
1700
1701         thread_call_unlock();
1702 }
1703
1704 static void
1705 thread_call_delayed_timer_rescan(thread_call_group_t group,
1706                                  thread_call_flavor_t flavor)
1707 {
1708         thread_call_t call;
1709         uint64_t now;
1710
1711         spl_t s = disable_ints_and_lock();
1712
1713         assert(ml_timer_forced_evaluation() == TRUE);
1714
1715         if (flavor == TCF_CONTINUOUS) {
1716                 now = mach_continuous_time();
1717         } else {
1718                 now = mach_absolute_time();
1719         }
1720
1721         qe_foreach_element_safe(call, &group->delayed_queues[flavor], tc_call.q_link) {
1722                 if (call->tc_soft_deadline <= now) {
1723                         _pending_call_enqueue(call, group);
1724                 } else {
1725                         uint64_t skew = call->tc_call.deadline - call->tc_soft_deadline;
1726                         assert (call->tc_call.deadline >= call->tc_soft_deadline);
1727                         /*
1728                          * On a latency quality-of-service level change,
1729                          * re-sort potentially rate-limited callout. The platform
1730                          * layer determines which timers require this.
1731                          */
1732                         if (timer_resort_threshold(skew)) {
1733                                 _call_dequeue(call, group);
1734                                 _delayed_call_enqueue(call, group, call->tc_soft_deadline, flavor);
1735                         }
1736                 }
1737         }
1738
1739         _arm_delayed_call_timer(NULL, group, flavor);
1740
1741         enable_ints_and_unlock(s);
1742 }
1743
1744 void
1745 thread_call_delayed_timer_rescan_all(void) {
1746         for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
1747                 thread_call_delayed_timer_rescan(&thread_call_groups[i], TCF_ABSOLUTE);
1748                 thread_call_delayed_timer_rescan(&thread_call_groups[i], TCF_CONTINUOUS);
1749         }
1750 }
1751
1752 /*
1753  * Timer callback to tell a thread to terminate if
1754  * we have an excess of threads and at least one has been
1755  * idle for a long time.
1756  */
1757 static void
1758 thread_call_dealloc_timer(
1759                 timer_call_param_t              p0,
1760                 __unused timer_call_param_t     p1)
1761 {
1762         thread_call_group_t group = (thread_call_group_t)p0;
1763         uint64_t now;
1764         kern_return_t res;
1765         boolean_t terminated = FALSE;
1766
1767         thread_call_lock_spin();
1768
1769         now = mach_absolute_time();
1770         if (group->idle_count > 0) {
1771                 if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
1772                         terminated = TRUE;
1773                         group->idle_count--;
1774                         res = waitq_wakeup64_one(&group->idle_waitq, NO_EVENT64,
1775                                                  THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES);
1776                         if (res != KERN_SUCCESS) {
1777                                 panic("Unable to wake up idle thread for termination?");
1778                         }
1779                 }
1780
1781         }
1782
1783         /*
1784          * If we still have an excess of threads, schedule another
1785          * invocation of this function.
1786          */
1787         if (group->idle_count > 0 && (group->idle_count + group->active_count > group->target_thread_count)) {
1788                 /*
1789                  * If we killed someone just now, push out the
1790                  * next deadline.
1791                  */
1792                 if (terminated) {
1793                         group->idle_timestamp = now;
1794                 }
1795
1796                 thread_call_start_deallocate_timer(group);
1797         } else {
1798                 group->flags &= ~TCG_DEALLOC_ACTIVE;
1799         }
1800
1801         thread_call_unlock();
1802 }
1803
1804 /*
1805  * Wait for the invocation of the thread call to complete
1806  * We know there's only one in flight because of the 'once' flag.
1807  *
1808  * If a subsequent invocation comes in before we wake up, that's OK
1809  *
1810  * TODO: Here is where we will add priority inheritance to the thread executing
1811  * the thread call in case it's lower priority than the current thread
1812  *      <rdar://problem/30321792> Priority inheritance for thread_call_wait_once
1813  *
1814  * Takes the thread call lock locked, returns unlocked
1815  *      This lets us avoid a spurious take/drop after waking up from thread_block
1816  */
1817 static boolean_t
1818 thread_call_wait_once_locked(thread_call_t call, spl_t s)
1819 {
1820         assert(call->tc_flags & THREAD_CALL_ALLOC);
1821         assert(call->tc_flags & THREAD_CALL_ONCE);
1822
1823         if ((call->tc_flags & THREAD_CALL_RUNNING) == 0) {
1824                 enable_ints_and_unlock(s);
1825                 return FALSE;
1826         }
1827
1828         /* call is running, so we have to wait for it */
1829         call->tc_flags |= THREAD_CALL_WAIT;
1830
1831         wait_result_t res = assert_wait(call, THREAD_UNINT);
1832         if (res != THREAD_WAITING)
1833                 panic("Unable to assert wait: %d", res);
1834
1835         enable_ints_and_unlock(s);
1836
1837         res = thread_block(THREAD_CONTINUE_NULL);
1838         if (res != THREAD_AWAKENED)
1839                 panic("Awoken with %d?", res);
1840
1841         /* returns unlocked */
1842         return TRUE;
1843 }
1844
1845 /*
1846  * Wait for an in-flight invocation to complete
1847  * Does NOT try to cancel, so the client doesn't need to hold their
1848  * lock while calling this function.
1849  *
1850  * Returns whether or not it had to wait.
1851  *
1852  * Only works for THREAD_CALL_ONCE calls.
1853  */
1854 boolean_t
1855 thread_call_wait_once(thread_call_t call)
1856 {
1857         if ((call->tc_flags & THREAD_CALL_ALLOC) == 0)
1858                 panic("thread_call_wait_once: can't wait on thread call whose storage I don't own");
1859
1860         if ((call->tc_flags & THREAD_CALL_ONCE) == 0)
1861                 panic("thread_call_wait_once: can't wait_once on a non-once call");
1862
1863         if (!ml_get_interrupts_enabled())
1864                 panic("unsafe thread_call_wait_once");
1865
1866         if (current_thread()->thc_state.thc_call == call)
1867                 panic("thread_call_wait_once: deadlock waiting on self from inside call: %p to function %p",
1868                       call, call->tc_call.func);
1869
1870         spl_t s = disable_ints_and_lock();
1871
1872         boolean_t waited = thread_call_wait_once_locked(call, s);
1873         /* thread call lock unlocked */
1874
1875         return waited;
1876 }
1877
1878
1879 /*
1880  * Wait for all requested invocations of a thread call prior to now
1881  * to finish.  Can only be invoked on thread calls whose storage we manage.
1882  * Just waits for the finish count to catch up to the submit count we find
1883  * at the beginning of our wait.
1884  *
1885  * Called with thread_call_lock held.  Returns with lock released.
1886  */
1887 static void
1888 thread_call_wait_locked(thread_call_t call, spl_t s)
1889 {
1890         uint64_t submit_count;
1891         wait_result_t res;
1892
1893         assert(call->tc_flags & THREAD_CALL_ALLOC);
1894
1895         submit_count = call->tc_submit_count;
1896
1897         while (call->tc_finish_count < submit_count) {
1898                 call->tc_flags |= THREAD_CALL_WAIT;
1899
1900                 res = assert_wait(call, THREAD_UNINT);
1901                 if (res != THREAD_WAITING)
1902                         panic("Unable to assert wait: %d", res);
1903
1904                 enable_ints_and_unlock(s);
1905
1906                 res = thread_block(THREAD_CONTINUE_NULL);
1907                 if (res != THREAD_AWAKENED)
1908                         panic("Awoken with %d?", res);
1909
1910                 s = disable_ints_and_lock();
1911         }
1912
1913         enable_ints_and_unlock(s);
1914 }
1915
1916 /*
1917  * Determine whether a thread call is either on a queue or
1918  * currently being executed.
1919  */
1920 boolean_t
1921 thread_call_isactive(thread_call_t call)
1922 {
1923         boolean_t active;
1924
1925         spl_t s = disable_ints_and_lock();
1926         active = (call->tc_submit_count > call->tc_finish_count);
1927         enable_ints_and_unlock(s);
1928
1929         return active;
1930 }
1931
1932 /*
1933  * adjust_cont_time_thread_calls
1934  * on wake, reenqueue delayed call timer for continuous time thread call groups
1935  */
1936 void
1937 adjust_cont_time_thread_calls(void)
1938 {
1939         spl_t s = disable_ints_and_lock();
1940
1941         for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
1942                 thread_call_group_t group = &thread_call_groups[i];
1943
1944                 /* only the continuous timers need to be re-armed */
1945
1946                 _arm_delayed_call_timer(NULL, group, TCF_CONTINUOUS);
1947         }
1948
1949         enable_ints_and_unlock(s);
1950 }
1951