osfmk/kern/thread_call.c

   1 /*
   2  * Copyright (c) 1993-1995, 1999-2020 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <mach/mach_types.h>
  30 #include <mach/thread_act.h>
  31
  32 #include <kern/kern_types.h>
  33 #include <kern/zalloc.h>
  34 #include <kern/sched_prim.h>
  35 #include <kern/clock.h>
  36 #include <kern/task.h>
  37 #include <kern/thread.h>
  38 #include <kern/waitq.h>
  39 #include <kern/ledger.h>
  40 #include <kern/policy_internal.h>
  41
  42 #include <vm/vm_pageout.h>
  43
  44 #include <kern/thread_call.h>
  45 #include <kern/timer_call.h>
  46
  47 #include <libkern/OSAtomic.h>
  48 #include <kern/timer_queue.h>
  49
  50 #include <sys/kdebug.h>
  51 #if CONFIG_DTRACE
  52 #include <mach/sdt.h>
  53 #endif
  54 #include <machine/machine_routines.h>
  55
  56 static ZONE_DECLARE(thread_call_zone, "thread_call",
  57     sizeof(thread_call_data_t), ZC_NOENCRYPT);
  58
  59 static struct waitq daemon_waitq;
  60
  61 typedef enum {
  62         TCF_ABSOLUTE    = 0,
  63         TCF_CONTINUOUS  = 1,
  64         TCF_COUNT       = 2,
  65 } thread_call_flavor_t;
  66
  67 __options_decl(thread_call_group_flags_t, uint32_t, {
  68         TCG_NONE                = 0x0,
  69         TCG_PARALLEL            = 0x1,
  70         TCG_DEALLOC_ACTIVE      = 0x2,
  71 });
  72
  73 static struct thread_call_group {
  74         __attribute__((aligned(128))) lck_ticket_t tcg_lock;
  75
  76         const char *            tcg_name;
  77
  78         queue_head_t            pending_queue;
  79         uint32_t                pending_count;
  80
  81         queue_head_t            delayed_queues[TCF_COUNT];
  82         struct priority_queue_deadline_min delayed_pqueues[TCF_COUNT];
  83         timer_call_data_t       delayed_timers[TCF_COUNT];
  84
  85         timer_call_data_t       dealloc_timer;
  86
  87         struct waitq            idle_waitq;
  88         uint64_t                idle_timestamp;
  89         uint32_t                idle_count, active_count, blocked_count;
  90
  91         uint32_t                tcg_thread_pri;
  92         uint32_t                target_thread_count;
  93
  94         thread_call_group_flags_t tcg_flags;
  95 } thread_call_groups[THREAD_CALL_INDEX_MAX] = {
  96         [THREAD_CALL_INDEX_HIGH] = {
  97                 .tcg_name               = "high",
  98                 .tcg_thread_pri         = BASEPRI_PREEMPT_HIGH,
  99                 .target_thread_count    = 4,
 100                 .tcg_flags              = TCG_NONE,
 101         },
 102         [THREAD_CALL_INDEX_KERNEL] = {
 103                 .tcg_name               = "kernel",
 104                 .tcg_thread_pri         = BASEPRI_KERNEL,
 105                 .target_thread_count    = 1,
 106                 .tcg_flags              = TCG_PARALLEL,
 107         },
 108         [THREAD_CALL_INDEX_USER] = {
 109                 .tcg_name               = "user",
 110                 .tcg_thread_pri         = BASEPRI_DEFAULT,
 111                 .target_thread_count    = 1,
 112                 .tcg_flags              = TCG_PARALLEL,
 113         },
 114         [THREAD_CALL_INDEX_LOW] = {
 115                 .tcg_name               = "low",
 116                 .tcg_thread_pri         = MAXPRI_THROTTLE,
 117                 .target_thread_count    = 1,
 118                 .tcg_flags              = TCG_PARALLEL,
 119         },
 120         [THREAD_CALL_INDEX_KERNEL_HIGH] = {
 121                 .tcg_name               = "kernel-high",
 122                 .tcg_thread_pri         = BASEPRI_PREEMPT,
 123                 .target_thread_count    = 2,
 124                 .tcg_flags              = TCG_NONE,
 125         },
 126         [THREAD_CALL_INDEX_QOS_UI] = {
 127                 .tcg_name               = "qos-ui",
 128                 .tcg_thread_pri         = BASEPRI_FOREGROUND,
 129                 .target_thread_count    = 1,
 130                 .tcg_flags              = TCG_NONE,
 131         },
 132         [THREAD_CALL_INDEX_QOS_IN] = {
 133                 .tcg_name               = "qos-in",
 134                 .tcg_thread_pri         = BASEPRI_USER_INITIATED,
 135                 .target_thread_count    = 1,
 136                 .tcg_flags              = TCG_NONE,
 137         },
 138         [THREAD_CALL_INDEX_QOS_UT] = {
 139                 .tcg_name               = "qos-ut",
 140                 .tcg_thread_pri         = BASEPRI_UTILITY,
 141                 .target_thread_count    = 1,
 142                 .tcg_flags              = TCG_NONE,
 143         },
 144 };
 145
 146 typedef struct thread_call_group        *thread_call_group_t;
 147
 148 #define INTERNAL_CALL_COUNT             768
 149 #define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * NSEC_PER_MSEC) /* 5 ms */
 150 #define THREAD_CALL_ADD_RATIO           4
 151 #define THREAD_CALL_MACH_FACTOR_CAP     3
 152 #define THREAD_CALL_GROUP_MAX_THREADS   500
 153
 154 struct thread_call_thread_state {
 155         struct thread_call_group * thc_group;
 156         struct thread_call *       thc_call;    /* debug only, may be deallocated */
 157         uint64_t thc_call_start;
 158         uint64_t thc_call_soft_deadline;
 159         uint64_t thc_call_hard_deadline;
 160         uint64_t thc_call_pending_timestamp;
 161         uint64_t thc_IOTES_invocation_timestamp;
 162         thread_call_func_t  thc_func;
 163         thread_call_param_t thc_param0;
 164         thread_call_param_t thc_param1;
 165 };
 166
 167 static bool                     thread_call_daemon_awake = true;
 168 /*
 169  * This special waitq exists because the daemon thread
 170  * might need to be woken while already holding a global waitq locked.
 171  */
 172 static struct waitq             daemon_waitq;
 173
 174 static thread_call_data_t       internal_call_storage[INTERNAL_CALL_COUNT];
 175 static queue_head_t             thread_call_internal_queue;
 176 int                                             thread_call_internal_queue_count = 0;
 177 static uint64_t                 thread_call_dealloc_interval_abs;
 178
 179 static void                     _internal_call_init(void);
 180
 181 static thread_call_t            _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0);
 182 static bool                     _is_internal_call(thread_call_t call);
 183 static void                     _internal_call_release(thread_call_t call);
 184 static bool                     _pending_call_enqueue(thread_call_t call, thread_call_group_t group, uint64_t now);
 185 static bool                     _delayed_call_enqueue(thread_call_t call, thread_call_group_t group,
 186     uint64_t deadline, thread_call_flavor_t flavor);
 187 static bool                     _call_dequeue(thread_call_t call, thread_call_group_t group);
 188 static void                     thread_call_wake(thread_call_group_t group);
 189 static void                     thread_call_daemon(void *arg);
 190 static void                     thread_call_thread(thread_call_group_t group, wait_result_t wres);
 191 static void                     thread_call_dealloc_timer(timer_call_param_t p0, timer_call_param_t p1);
 192 static void                     thread_call_group_setup(thread_call_group_t group);
 193 static void                     sched_call_thread(int type, thread_t thread);
 194 static void                     thread_call_start_deallocate_timer(thread_call_group_t group);
 195 static void                     thread_call_wait_locked(thread_call_t call, spl_t s);
 196 static bool                     thread_call_wait_once_locked(thread_call_t call, spl_t s);
 197
 198 static boolean_t                thread_call_enter_delayed_internal(thread_call_t call,
 199     thread_call_func_t alt_func, thread_call_param_t alt_param0,
 200     thread_call_param_t param1, uint64_t deadline,
 201     uint64_t leeway, unsigned int flags);
 202
 203 /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
 204 extern void thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1);
 205
 206 LCK_GRP_DECLARE(thread_call_lck_grp, "thread_call");
 207
 208
 209 static void
 210 thread_call_lock_spin(thread_call_group_t group)
 211 {
 212         lck_ticket_lock(&group->tcg_lock, &thread_call_lck_grp);
 213 }
 214
 215 static void
 216 thread_call_unlock(thread_call_group_t group)
 217 {
 218         lck_ticket_unlock(&group->tcg_lock);
 219 }
 220
 221 static void __assert_only
 222 thread_call_assert_locked(thread_call_group_t group)
 223 {
 224         lck_ticket_assert_owned(&group->tcg_lock);
 225 }
 226
 227
 228 static spl_t
 229 disable_ints_and_lock(thread_call_group_t group)
 230 {
 231         spl_t s = splsched();
 232         thread_call_lock_spin(group);
 233
 234         return s;
 235 }
 236
 237 static void
 238 enable_ints_and_unlock(thread_call_group_t group, spl_t s)
 239 {
 240         thread_call_unlock(group);
 241         splx(s);
 242 }
 243
 244 /* Lock held */
 245 static thread_call_group_t
 246 thread_call_get_group(thread_call_t call)
 247 {
 248         thread_call_index_t index = call->tc_index;
 249
 250         assert(index >= 0 && index < THREAD_CALL_INDEX_MAX);
 251
 252         return &thread_call_groups[index];
 253 }
 254
 255 /* Lock held */
 256 static thread_call_flavor_t
 257 thread_call_get_flavor(thread_call_t call)
 258 {
 259         return (call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
 260 }
 261
 262 /* Lock held */
 263 static thread_call_flavor_t
 264 thread_call_set_flavor(thread_call_t call, thread_call_flavor_t flavor)
 265 {
 266         assert(flavor == TCF_CONTINUOUS || flavor == TCF_ABSOLUTE);
 267         thread_call_flavor_t old_flavor = thread_call_get_flavor(call);
 268
 269         if (old_flavor != flavor) {
 270                 if (flavor == TCF_CONTINUOUS) {
 271                         call->tc_flags |= THREAD_CALL_FLAG_CONTINUOUS;
 272                 } else {
 273                         call->tc_flags &= ~THREAD_CALL_FLAG_CONTINUOUS;
 274                 }
 275         }
 276
 277         return old_flavor;
 278 }
 279
 280 /* returns true if it was on a queue */
 281 static bool
 282 thread_call_enqueue_tail(
 283         thread_call_t           call,
 284         queue_t                 new_queue)
 285 {
 286         queue_t                 old_queue = call->tc_queue;
 287
 288         thread_call_group_t     group = thread_call_get_group(call);
 289         thread_call_flavor_t    flavor = thread_call_get_flavor(call);
 290
 291         if (old_queue != NULL &&
 292             old_queue != &group->delayed_queues[flavor]) {
 293                 panic("thread call (%p) on bad queue (old_queue: %p)", call, old_queue);
 294         }
 295
 296         if (old_queue == &group->delayed_queues[flavor]) {
 297                 priority_queue_remove(&group->delayed_pqueues[flavor], &call->tc_pqlink);
 298         }
 299
 300         if (old_queue == NULL) {
 301                 enqueue_tail(new_queue, &call->tc_qlink);
 302         } else {
 303                 re_queue_tail(new_queue, &call->tc_qlink);
 304         }
 305
 306         call->tc_queue = new_queue;
 307
 308         return old_queue != NULL;
 309 }
 310
 311 static queue_head_t *
 312 thread_call_dequeue(
 313         thread_call_t            call)
 314 {
 315         queue_t                 old_queue = call->tc_queue;
 316
 317         thread_call_group_t     group = thread_call_get_group(call);
 318         thread_call_flavor_t    flavor = thread_call_get_flavor(call);
 319
 320         if (old_queue != NULL &&
 321             old_queue != &group->pending_queue &&
 322             old_queue != &group->delayed_queues[flavor]) {
 323                 panic("thread call (%p) on bad queue (old_queue: %p)", call, old_queue);
 324         }
 325
 326         if (old_queue == &group->delayed_queues[flavor]) {
 327                 priority_queue_remove(&group->delayed_pqueues[flavor], &call->tc_pqlink);
 328         }
 329
 330         if (old_queue != NULL) {
 331                 remqueue(&call->tc_qlink);
 332
 333                 call->tc_queue = NULL;
 334         }
 335         return old_queue;
 336 }
 337
 338 static queue_head_t *
 339 thread_call_enqueue_deadline(
 340         thread_call_t           call,
 341         thread_call_group_t     group,
 342         thread_call_flavor_t    flavor,
 343         uint64_t                deadline)
 344 {
 345         queue_t old_queue = call->tc_queue;
 346         queue_t new_queue = &group->delayed_queues[flavor];
 347
 348         thread_call_flavor_t old_flavor = thread_call_set_flavor(call, flavor);
 349
 350         if (old_queue != NULL &&
 351             old_queue != &group->pending_queue &&
 352             old_queue != &group->delayed_queues[old_flavor]) {
 353                 panic("thread call (%p) on bad queue (old_queue: %p)", call, old_queue);
 354         }
 355
 356         if (old_queue == new_queue) {
 357                 /* optimize the same-queue case to avoid a full re-insert */
 358                 uint64_t old_deadline = call->tc_pqlink.deadline;
 359                 call->tc_pqlink.deadline = deadline;
 360
 361                 if (old_deadline < deadline) {
 362                         priority_queue_entry_increased(&group->delayed_pqueues[flavor],
 363                             &call->tc_pqlink);
 364                 } else {
 365                         priority_queue_entry_decreased(&group->delayed_pqueues[flavor],
 366                             &call->tc_pqlink);
 367                 }
 368         } else {
 369                 if (old_queue == &group->delayed_queues[old_flavor]) {
 370                         priority_queue_remove(&group->delayed_pqueues[old_flavor],
 371                             &call->tc_pqlink);
 372                 }
 373
 374                 call->tc_pqlink.deadline = deadline;
 375
 376                 priority_queue_insert(&group->delayed_pqueues[flavor], &call->tc_pqlink);
 377         }
 378
 379         if (old_queue == NULL) {
 380                 enqueue_tail(new_queue, &call->tc_qlink);
 381         } else if (old_queue != new_queue) {
 382                 re_queue_tail(new_queue, &call->tc_qlink);
 383         }
 384
 385         call->tc_queue = new_queue;
 386
 387         return old_queue;
 388 }
 389
 390 uint64_t
 391 thread_call_get_armed_deadline(thread_call_t call)
 392 {
 393         return call->tc_pqlink.deadline;
 394 }
 395
 396
 397 static bool
 398 group_isparallel(thread_call_group_t group)
 399 {
 400         return (group->tcg_flags & TCG_PARALLEL) != 0;
 401 }
 402
 403 static bool
 404 thread_call_group_should_add_thread(thread_call_group_t group)
 405 {
 406         if ((group->active_count + group->blocked_count + group->idle_count) >= THREAD_CALL_GROUP_MAX_THREADS) {
 407                 panic("thread_call group '%s' reached max thread cap (%d): active: %d, blocked: %d, idle: %d",
 408                     group->tcg_name, THREAD_CALL_GROUP_MAX_THREADS,
 409                     group->active_count, group->blocked_count, group->idle_count);
 410         }
 411
 412         if (group_isparallel(group) == false) {
 413                 if (group->pending_count > 0 && group->active_count == 0) {
 414                         return true;
 415                 }
 416
 417                 return false;
 418         }
 419
 420         if (group->pending_count > 0) {
 421                 if (group->idle_count > 0) {
 422                         return false;
 423                 }
 424
 425                 uint32_t thread_count = group->active_count;
 426
 427                 /*
 428                  * Add a thread if either there are no threads,
 429                  * the group has fewer than its target number of
 430                  * threads, or the amount of work is large relative
 431                  * to the number of threads.  In the last case, pay attention
 432                  * to the total load on the system, and back off if
 433                  * it's high.
 434                  */
 435                 if ((thread_count == 0) ||
 436                     (thread_count < group->target_thread_count) ||
 437                     ((group->pending_count > THREAD_CALL_ADD_RATIO * thread_count) &&
 438                     (sched_mach_factor < THREAD_CALL_MACH_FACTOR_CAP))) {
 439                         return true;
 440                 }
 441         }
 442
 443         return false;
 444 }
 445
 446 static void
 447 thread_call_group_setup(thread_call_group_t group)
 448 {
 449         lck_ticket_init(&group->tcg_lock, &thread_call_lck_grp);
 450
 451         queue_init(&group->pending_queue);
 452
 453         for (thread_call_flavor_t flavor = 0; flavor < TCF_COUNT; flavor++) {
 454                 queue_init(&group->delayed_queues[flavor]);
 455                 priority_queue_init(&group->delayed_pqueues[flavor]);
 456                 timer_call_setup(&group->delayed_timers[flavor], thread_call_delayed_timer, group);
 457         }
 458
 459         timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
 460
 461         /* Reverse the wait order so we re-use the most recently parked thread from the pool */
 462         waitq_init(&group->idle_waitq, SYNC_POLICY_REVERSED | SYNC_POLICY_DISABLE_IRQ);
 463 }
 464
 465 /*
 466  * Simple wrapper for creating threads bound to
 467  * thread call groups.
 468  */
 469 static void
 470 thread_call_thread_create(
 471         thread_call_group_t             group)
 472 {
 473         thread_t thread;
 474         kern_return_t result;
 475
 476         int thread_pri = group->tcg_thread_pri;
 477
 478         result = kernel_thread_start_priority((thread_continue_t)thread_call_thread,
 479             group, thread_pri, &thread);
 480         if (result != KERN_SUCCESS) {
 481                 panic("cannot create new thread call thread %d", result);
 482         }
 483
 484         if (thread_pri <= BASEPRI_KERNEL) {
 485                 /*
 486                  * THREAD_CALL_PRIORITY_KERNEL and lower don't get to run to completion
 487                  * in kernel if there are higher priority threads available.
 488                  */
 489                 thread_set_eager_preempt(thread);
 490         }
 491
 492         char name[MAXTHREADNAMESIZE] = "";
 493
 494         int group_thread_count = group->idle_count + group->active_count + group->blocked_count;
 495
 496         snprintf(name, sizeof(name), "thread call %s #%d", group->tcg_name, group_thread_count);
 497         thread_set_thread_name(thread, name);
 498
 499         thread_deallocate(thread);
 500 }
 501
 502 /*
 503  *      thread_call_initialize:
 504  *
 505  *      Initialize this module, called
 506  *      early during system initialization.
 507  */
 508 void
 509 thread_call_initialize(void)
 510 {
 511         nanotime_to_absolutetime(0, THREAD_CALL_DEALLOC_INTERVAL_NS, &thread_call_dealloc_interval_abs);
 512         waitq_init(&daemon_waitq, SYNC_POLICY_DISABLE_IRQ | SYNC_POLICY_FIFO);
 513
 514         for (uint32_t i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
 515                 thread_call_group_setup(&thread_call_groups[i]);
 516         }
 517
 518         _internal_call_init();
 519
 520         thread_t thread;
 521         kern_return_t result;
 522
 523         result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon,
 524             NULL, BASEPRI_PREEMPT_HIGH + 1, &thread);
 525         if (result != KERN_SUCCESS) {
 526                 panic("thread_call_initialize");
 527         }
 528
 529         thread_deallocate(thread);
 530 }
 531
 532 void
 533 thread_call_setup(
 534         thread_call_t                   call,
 535         thread_call_func_t              func,
 536         thread_call_param_t             param0)
 537 {
 538         bzero(call, sizeof(*call));
 539
 540         *call = (struct thread_call) {
 541                 .tc_func = func,
 542                 .tc_param0 = param0,
 543
 544                 /*
 545                  * Thread calls default to the HIGH group
 546                  * unless otherwise specified.
 547                  */
 548                 .tc_index = THREAD_CALL_INDEX_HIGH,
 549         };
 550 }
 551
 552 static void
 553 _internal_call_init(void)
 554 {
 555         /* Function-only thread calls are only kept in the default HIGH group */
 556         thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
 557
 558         spl_t s = disable_ints_and_lock(group);
 559
 560         queue_init(&thread_call_internal_queue);
 561
 562         for (unsigned i = 0; i < INTERNAL_CALL_COUNT; i++) {
 563                 enqueue_tail(&thread_call_internal_queue, &internal_call_storage[i].tc_qlink);
 564                 thread_call_internal_queue_count++;
 565         }
 566
 567         enable_ints_and_unlock(group, s);
 568 }
 569
 570 /*
 571  *      _internal_call_allocate:
 572  *
 573  *      Allocate an internal callout entry.
 574  *
 575  *      Called with thread_call_lock held.
 576  */
 577 static thread_call_t
 578 _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0)
 579 {
 580         /* Function-only thread calls are only kept in the default HIGH group */
 581         thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
 582
 583         spl_t s = disable_ints_and_lock(group);
 584
 585         thread_call_t call = qe_dequeue_head(&thread_call_internal_queue,
 586             struct thread_call, tc_qlink);
 587
 588         if (call == NULL) {
 589                 panic("_internal_call_allocate: thread_call_internal_queue empty");
 590         }
 591
 592         thread_call_internal_queue_count--;
 593
 594         thread_call_setup(call, func, param0);
 595         call->tc_refs = 0;
 596         call->tc_flags = 0; /* THREAD_CALL_ALLOC not set, do not free back to zone */
 597         enable_ints_and_unlock(group, s);
 598
 599         return call;
 600 }
 601
 602 /* Check if a call is internal and needs to be returned to the internal pool. */
 603 static bool
 604 _is_internal_call(thread_call_t call)
 605 {
 606         if (call >= internal_call_storage &&
 607             call < &internal_call_storage[INTERNAL_CALL_COUNT]) {
 608                 assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
 609                 return true;
 610         }
 611         return false;
 612 }
 613
 614 /*
 615  *      _internal_call_release:
 616  *
 617  *      Release an internal callout entry which
 618  *      is no longer pending (or delayed).
 619  *
 620  *      Called with thread_call_lock held.
 621  */
 622 static void
 623 _internal_call_release(thread_call_t call)
 624 {
 625         assert(_is_internal_call(call));
 626
 627         thread_call_group_t group = thread_call_get_group(call);
 628
 629         assert(group == &thread_call_groups[THREAD_CALL_INDEX_HIGH]);
 630         thread_call_assert_locked(group);
 631
 632         enqueue_head(&thread_call_internal_queue, &call->tc_qlink);
 633         thread_call_internal_queue_count++;
 634 }
 635
 636 /*
 637  *      _pending_call_enqueue:
 638  *
 639  *      Place an entry at the end of the
 640  *      pending queue, to be executed soon.
 641  *
 642  *      Returns TRUE if the entry was already
 643  *      on a queue.
 644  *
 645  *      Called with thread_call_lock held.
 646  */
 647 static bool
 648 _pending_call_enqueue(thread_call_t call,
 649     thread_call_group_t group,
 650     uint64_t now)
 651 {
 652         if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
 653             == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
 654                 call->tc_pqlink.deadline = 0;
 655
 656                 thread_call_flags_t flags = call->tc_flags;
 657                 call->tc_flags |= THREAD_CALL_RESCHEDULE;
 658
 659                 assert(call->tc_queue == NULL);
 660
 661                 return flags & THREAD_CALL_RESCHEDULE;
 662         }
 663
 664         call->tc_pending_timestamp = now;
 665
 666         bool was_on_queue = thread_call_enqueue_tail(call, &group->pending_queue);
 667
 668         if (!was_on_queue) {
 669                 call->tc_submit_count++;
 670         }
 671
 672         group->pending_count++;
 673
 674         thread_call_wake(group);
 675
 676         return was_on_queue;
 677 }
 678
 679 /*
 680  *      _delayed_call_enqueue:
 681  *
 682  *      Place an entry on the delayed queue,
 683  *      after existing entries with an earlier
 684  *      (or identical) deadline.
 685  *
 686  *      Returns TRUE if the entry was already
 687  *      on a queue.
 688  *
 689  *      Called with thread_call_lock held.
 690  */
 691 static bool
 692 _delayed_call_enqueue(
 693         thread_call_t           call,
 694         thread_call_group_t     group,
 695         uint64_t                deadline,
 696         thread_call_flavor_t    flavor)
 697 {
 698         if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
 699             == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
 700                 call->tc_pqlink.deadline = deadline;
 701
 702                 thread_call_flags_t flags = call->tc_flags;
 703                 call->tc_flags |= THREAD_CALL_RESCHEDULE;
 704
 705                 assert(call->tc_queue == NULL);
 706                 thread_call_set_flavor(call, flavor);
 707
 708                 return flags & THREAD_CALL_RESCHEDULE;
 709         }
 710
 711         queue_head_t *old_queue = thread_call_enqueue_deadline(call, group, flavor, deadline);
 712
 713         if (old_queue == &group->pending_queue) {
 714                 group->pending_count--;
 715         } else if (old_queue == NULL) {
 716                 call->tc_submit_count++;
 717         }
 718
 719         return old_queue != NULL;
 720 }
 721
 722 /*
 723  *      _call_dequeue:
 724  *
 725  *      Remove an entry from a queue.
 726  *
 727  *      Returns TRUE if the entry was on a queue.
 728  *
 729  *      Called with thread_call_lock held.
 730  */
 731 static bool
 732 _call_dequeue(
 733         thread_call_t           call,
 734         thread_call_group_t     group)
 735 {
 736         queue_head_t *old_queue = thread_call_dequeue(call);
 737
 738         if (old_queue == NULL) {
 739                 return false;
 740         }
 741
 742         call->tc_finish_count++;
 743
 744         if (old_queue == &group->pending_queue) {
 745                 group->pending_count--;
 746         }
 747
 748         return true;
 749 }
 750
 751 /*
 752  * _arm_delayed_call_timer:
 753  *
 754  * Check if the timer needs to be armed for this flavor,
 755  * and if so, arm it.
 756  *
 757  * If call is non-NULL, only re-arm the timer if the specified call
 758  * is the first in the queue.
 759  *
 760  * Returns true if the timer was armed/re-armed, false if it was left unset
 761  * Caller should cancel the timer if need be.
 762  *
 763  * Called with thread_call_lock held.
 764  */
 765 static bool
 766 _arm_delayed_call_timer(thread_call_t           new_call,
 767     thread_call_group_t     group,
 768     thread_call_flavor_t    flavor)
 769 {
 770         /* No calls implies no timer needed */
 771         if (queue_empty(&group->delayed_queues[flavor])) {
 772                 return false;
 773         }
 774
 775         thread_call_t call = priority_queue_min(&group->delayed_pqueues[flavor], struct thread_call, tc_pqlink);
 776
 777         /* We only need to change the hard timer if this new call is the first in the list */
 778         if (new_call != NULL && new_call != call) {
 779                 return false;
 780         }
 781
 782         assert((call->tc_soft_deadline != 0) && ((call->tc_soft_deadline <= call->tc_pqlink.deadline)));
 783
 784         uint64_t fire_at = call->tc_soft_deadline;
 785
 786         if (flavor == TCF_CONTINUOUS) {
 787                 assert(call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS);
 788                 fire_at = continuoustime_to_absolutetime(fire_at);
 789         } else {
 790                 assert((call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS) == 0);
 791         }
 792
 793         /*
 794          * Note: This picks the soonest-deadline call's leeway as the hard timer's leeway,
 795          * which does not take into account later-deadline timers with a larger leeway.
 796          * This is a valid coalescing behavior, but masks a possible window to
 797          * fire a timer instead of going idle.
 798          */
 799         uint64_t leeway = call->tc_pqlink.deadline - call->tc_soft_deadline;
 800
 801         timer_call_enter_with_leeway(&group->delayed_timers[flavor], (timer_call_param_t)flavor,
 802             fire_at, leeway,
 803             TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LEEWAY,
 804             ((call->tc_flags & THREAD_CALL_RATELIMITED) == THREAD_CALL_RATELIMITED));
 805
 806         return true;
 807 }
 808
 809 /*
 810  *      _cancel_func_from_queue:
 811  *
 812  *      Remove the first (or all) matching
 813  *      entries from the specified queue.
 814  *
 815  *      Returns TRUE if any matching entries
 816  *      were found.
 817  *
 818  *      Called with thread_call_lock held.
 819  */
 820 static boolean_t
 821 _cancel_func_from_queue(thread_call_func_t      func,
 822     thread_call_param_t     param0,
 823     thread_call_group_t     group,
 824     boolean_t               remove_all,
 825     queue_head_t            *queue)
 826 {
 827         boolean_t call_removed = FALSE;
 828         thread_call_t call;
 829
 830         qe_foreach_element_safe(call, queue, tc_qlink) {
 831                 if (call->tc_func != func ||
 832                     call->tc_param0 != param0) {
 833                         continue;
 834                 }
 835
 836                 _call_dequeue(call, group);
 837
 838                 if (_is_internal_call(call)) {
 839                         _internal_call_release(call);
 840                 }
 841
 842                 call_removed = TRUE;
 843                 if (!remove_all) {
 844                         break;
 845                 }
 846         }
 847
 848         return call_removed;
 849 }
 850
 851 /*
 852  *      thread_call_func_delayed:
 853  *
 854  *      Enqueue a function callout to
 855  *      occur at the stated time.
 856  */
 857 void
 858 thread_call_func_delayed(
 859         thread_call_func_t              func,
 860         thread_call_param_t             param,
 861         uint64_t                        deadline)
 862 {
 863         (void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, 0, 0);
 864 }
 865
 866 /*
 867  * thread_call_func_delayed_with_leeway:
 868  *
 869  * Same as thread_call_func_delayed(), but with
 870  * leeway/flags threaded through.
 871  */
 872
 873 void
 874 thread_call_func_delayed_with_leeway(
 875         thread_call_func_t              func,
 876         thread_call_param_t             param,
 877         uint64_t                deadline,
 878         uint64_t                leeway,
 879         uint32_t                flags)
 880 {
 881         (void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, leeway, flags);
 882 }
 883
 884 /*
 885  *      thread_call_func_cancel:
 886  *
 887  *      Dequeue a function callout.
 888  *
 889  *      Removes one (or all) { function, argument }
 890  *      instance(s) from either (or both)
 891  *      the pending and the delayed queue,
 892  *      in that order.
 893  *
 894  *      Returns TRUE if any calls were cancelled.
 895  *
 896  *      This iterates all of the pending or delayed thread calls in the group,
 897  *      which is really inefficient.  Switch to an allocated thread call instead.
 898  *
 899  *      TODO: Give 'func' thread calls their own group, so this silliness doesn't
 900  *      affect the main 'high' group.
 901  */
 902 boolean_t
 903 thread_call_func_cancel(
 904         thread_call_func_t              func,
 905         thread_call_param_t             param,
 906         boolean_t                       cancel_all)
 907 {
 908         boolean_t       result;
 909
 910         assert(func != NULL);
 911
 912         /* Function-only thread calls are only kept in the default HIGH group */
 913         thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
 914
 915         spl_t s = disable_ints_and_lock(group);
 916
 917         if (cancel_all) {
 918                 /* exhaustively search every queue, and return true if any search found something */
 919                 result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) |
 920                     _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE])  |
 921                     _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
 922         } else {
 923                 /* early-exit as soon as we find something, don't search other queues */
 924                 result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) ||
 925                     _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE]) ||
 926                     _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
 927         }
 928
 929         enable_ints_and_unlock(group, s);
 930
 931         return result;
 932 }
 933
 934 /*
 935  * Allocate a thread call with a given priority.  Importances other than
 936  * THREAD_CALL_PRIORITY_HIGH or THREAD_CALL_PRIORITY_KERNEL_HIGH will be run in threads
 937  * with eager preemption enabled (i.e. may be aggressively preempted by higher-priority
 938  * threads which are not in the normal "urgent" bands).
 939  */
 940 thread_call_t
 941 thread_call_allocate_with_priority(
 942         thread_call_func_t              func,
 943         thread_call_param_t             param0,
 944         thread_call_priority_t          pri)
 945 {
 946         return thread_call_allocate_with_options(func, param0, pri, 0);
 947 }
 948
 949 thread_call_t
 950 thread_call_allocate_with_options(
 951         thread_call_func_t              func,
 952         thread_call_param_t             param0,
 953         thread_call_priority_t          pri,
 954         thread_call_options_t           options)
 955 {
 956         thread_call_t call = thread_call_allocate(func, param0);
 957
 958         switch (pri) {
 959         case THREAD_CALL_PRIORITY_HIGH:
 960                 call->tc_index = THREAD_CALL_INDEX_HIGH;
 961                 break;
 962         case THREAD_CALL_PRIORITY_KERNEL:
 963                 call->tc_index = THREAD_CALL_INDEX_KERNEL;
 964                 break;
 965         case THREAD_CALL_PRIORITY_USER:
 966                 call->tc_index = THREAD_CALL_INDEX_USER;
 967                 break;
 968         case THREAD_CALL_PRIORITY_LOW:
 969                 call->tc_index = THREAD_CALL_INDEX_LOW;
 970                 break;
 971         case THREAD_CALL_PRIORITY_KERNEL_HIGH:
 972                 call->tc_index = THREAD_CALL_INDEX_KERNEL_HIGH;
 973                 break;
 974         default:
 975                 panic("Invalid thread call pri value: %d", pri);
 976                 break;
 977         }
 978
 979         if (options & THREAD_CALL_OPTIONS_ONCE) {
 980                 call->tc_flags |= THREAD_CALL_ONCE;
 981         }
 982         if (options & THREAD_CALL_OPTIONS_SIGNAL) {
 983                 call->tc_flags |= THREAD_CALL_SIGNAL | THREAD_CALL_ONCE;
 984         }
 985
 986         return call;
 987 }
 988
 989 thread_call_t
 990 thread_call_allocate_with_qos(thread_call_func_t        func,
 991     thread_call_param_t       param0,
 992     int                       qos_tier,
 993     thread_call_options_t     options)
 994 {
 995         thread_call_t call = thread_call_allocate(func, param0);
 996
 997         switch (qos_tier) {
 998         case THREAD_QOS_UNSPECIFIED:
 999                 call->tc_index = THREAD_CALL_INDEX_HIGH;
1000                 break;
1001         case THREAD_QOS_LEGACY:
1002                 call->tc_index = THREAD_CALL_INDEX_USER;
1003                 break;
1004         case THREAD_QOS_MAINTENANCE:
1005         case THREAD_QOS_BACKGROUND:
1006                 call->tc_index = THREAD_CALL_INDEX_LOW;
1007                 break;
1008         case THREAD_QOS_UTILITY:
1009                 call->tc_index = THREAD_CALL_INDEX_QOS_UT;
1010                 break;
1011         case THREAD_QOS_USER_INITIATED:
1012                 call->tc_index = THREAD_CALL_INDEX_QOS_IN;
1013                 break;
1014         case THREAD_QOS_USER_INTERACTIVE:
1015                 call->tc_index = THREAD_CALL_INDEX_QOS_UI;
1016                 break;
1017         default:
1018                 panic("Invalid thread call qos value: %d", qos_tier);
1019                 break;
1020         }
1021
1022         if (options & THREAD_CALL_OPTIONS_ONCE) {
1023                 call->tc_flags |= THREAD_CALL_ONCE;
1024         }
1025
1026         /* does not support THREAD_CALL_OPTIONS_SIGNAL */
1027
1028         return call;
1029 }
1030
1031
1032 /*
1033  *      thread_call_allocate:
1034  *
1035  *      Allocate a callout entry.
1036  */
1037 thread_call_t
1038 thread_call_allocate(
1039         thread_call_func_t              func,
1040         thread_call_param_t             param0)
1041 {
1042         thread_call_t   call = zalloc(thread_call_zone);
1043
1044         thread_call_setup(call, func, param0);
1045         call->tc_refs = 1;
1046         call->tc_flags = THREAD_CALL_ALLOC;
1047
1048         return call;
1049 }
1050
1051 /*
1052  *      thread_call_free:
1053  *
1054  *      Release a callout.  If the callout is currently
1055  *      executing, it will be freed when all invocations
1056  *      finish.
1057  *
1058  *      If the callout is currently armed to fire again, then
1059  *      freeing is not allowed and returns FALSE.  The
1060  *      client must have canceled the pending invocation before freeing.
1061  */
1062 boolean_t
1063 thread_call_free(
1064         thread_call_t           call)
1065 {
1066         thread_call_group_t group = thread_call_get_group(call);
1067
1068         spl_t s = disable_ints_and_lock(group);
1069
1070         if (call->tc_queue != NULL ||
1071             ((call->tc_flags & THREAD_CALL_RESCHEDULE) != 0)) {
1072                 thread_call_unlock(group);
1073                 splx(s);
1074
1075                 return FALSE;
1076         }
1077
1078         int32_t refs = --call->tc_refs;
1079         if (refs < 0) {
1080                 panic("Refcount negative: %d\n", refs);
1081         }
1082
1083         if ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING)
1084             == ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING) & call->tc_flags)) {
1085                 thread_call_wait_once_locked(call, s);
1086                 /* thread call lock has been unlocked */
1087         } else {
1088                 enable_ints_and_unlock(group, s);
1089         }
1090
1091         if (refs == 0) {
1092                 assert(call->tc_finish_count == call->tc_submit_count);
1093                 zfree(thread_call_zone, call);
1094         }
1095
1096         return TRUE;
1097 }
1098
1099 /*
1100  *      thread_call_enter:
1101  *
1102  *      Enqueue a callout entry to occur "soon".
1103  *
1104  *      Returns TRUE if the call was
1105  *      already on a queue.
1106  */
1107 boolean_t
1108 thread_call_enter(
1109         thread_call_t           call)
1110 {
1111         return thread_call_enter1(call, 0);
1112 }
1113
1114 boolean_t
1115 thread_call_enter1(
1116         thread_call_t                   call,
1117         thread_call_param_t             param1)
1118 {
1119         assert(call->tc_func != NULL);
1120         assert((call->tc_flags & THREAD_CALL_SIGNAL) == 0);
1121
1122         thread_call_group_t group = thread_call_get_group(call);
1123         bool result = true;
1124
1125         spl_t s = disable_ints_and_lock(group);
1126
1127         if (call->tc_queue != &group->pending_queue) {
1128                 result = _pending_call_enqueue(call, group, mach_absolute_time());
1129         }
1130
1131         call->tc_param1 = param1;
1132
1133         enable_ints_and_unlock(group, s);
1134
1135         return result;
1136 }
1137
1138 /*
1139  *      thread_call_enter_delayed:
1140  *
1141  *      Enqueue a callout entry to occur
1142  *      at the stated time.
1143  *
1144  *      Returns TRUE if the call was
1145  *      already on a queue.
1146  */
1147 boolean_t
1148 thread_call_enter_delayed(
1149         thread_call_t           call,
1150         uint64_t                deadline)
1151 {
1152         assert(call != NULL);
1153         return thread_call_enter_delayed_internal(call, NULL, 0, 0, deadline, 0, 0);
1154 }
1155
1156 boolean_t
1157 thread_call_enter1_delayed(
1158         thread_call_t                   call,
1159         thread_call_param_t             param1,
1160         uint64_t                        deadline)
1161 {
1162         assert(call != NULL);
1163         return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, 0, 0);
1164 }
1165
1166 boolean_t
1167 thread_call_enter_delayed_with_leeway(
1168         thread_call_t           call,
1169         thread_call_param_t     param1,
1170         uint64_t                deadline,
1171         uint64_t                leeway,
1172         unsigned int            flags)
1173 {
1174         assert(call != NULL);
1175         return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, leeway, flags);
1176 }
1177
1178
1179 /*
1180  * thread_call_enter_delayed_internal:
1181  * enqueue a callout entry to occur at the stated time
1182  *
1183  * Returns True if the call was already on a queue
1184  * params:
1185  * call     - structure encapsulating state of the callout
1186  * alt_func/alt_param0 - if call is NULL, allocate temporary storage using these parameters
1187  * deadline - time deadline in nanoseconds
1188  * leeway   - timer slack represented as delta of deadline.
1189  * flags    - THREAD_CALL_DELAY_XXX : classification of caller's desires wrt timer coalescing.
1190  *            THREAD_CALL_DELAY_LEEWAY : value in leeway is used for timer coalescing.
1191  *            THREAD_CALL_CONTINUOUS: thread call will be called according to mach_continuous_time rather
1192  *                                                                        than mach_absolute_time
1193  */
1194 boolean_t
1195 thread_call_enter_delayed_internal(
1196         thread_call_t           call,
1197         thread_call_func_t      alt_func,
1198         thread_call_param_t     alt_param0,
1199         thread_call_param_t     param1,
1200         uint64_t                deadline,
1201         uint64_t                leeway,
1202         unsigned int            flags)
1203 {
1204         uint64_t                now, sdeadline;
1205
1206         thread_call_flavor_t flavor = (flags & THREAD_CALL_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
1207
1208         /* direct mapping between thread_call, timer_call, and timeout_urgency values */
1209         uint32_t urgency = (flags & TIMEOUT_URGENCY_MASK);
1210
1211         if (call == NULL) {
1212                 /* allocate a structure out of internal storage, as a convenience for BSD callers */
1213                 call = _internal_call_allocate(alt_func, alt_param0);
1214         }
1215
1216         assert(call->tc_func != NULL);
1217         thread_call_group_t group = thread_call_get_group(call);
1218
1219         spl_t s = disable_ints_and_lock(group);
1220
1221         /*
1222          * kevent and IOTES let you change flavor for an existing timer, so we have to
1223          * support flipping flavors for enqueued thread calls.
1224          */
1225         if (flavor == TCF_CONTINUOUS) {
1226                 now = mach_continuous_time();
1227         } else {
1228                 now = mach_absolute_time();
1229         }
1230
1231         call->tc_flags |= THREAD_CALL_DELAYED;
1232
1233         call->tc_soft_deadline = sdeadline = deadline;
1234
1235         boolean_t ratelimited = FALSE;
1236         uint64_t slop = timer_call_slop(deadline, now, urgency, current_thread(), &ratelimited);
1237
1238         if ((flags & THREAD_CALL_DELAY_LEEWAY) != 0 && leeway > slop) {
1239                 slop = leeway;
1240         }
1241
1242         if (UINT64_MAX - deadline <= slop) {
1243                 deadline = UINT64_MAX;
1244         } else {
1245                 deadline += slop;
1246         }
1247
1248         if (ratelimited) {
1249                 call->tc_flags |= THREAD_CALL_RATELIMITED;
1250         } else {
1251                 call->tc_flags &= ~THREAD_CALL_RATELIMITED;
1252         }
1253
1254         call->tc_param1 = param1;
1255
1256         call->tc_ttd = (sdeadline > now) ? (sdeadline - now) : 0;
1257
1258         bool result = _delayed_call_enqueue(call, group, deadline, flavor);
1259
1260         _arm_delayed_call_timer(call, group, flavor);
1261
1262 #if CONFIG_DTRACE
1263         DTRACE_TMR5(thread_callout__create, thread_call_func_t, call->tc_func,
1264             uint64_t, (deadline - sdeadline), uint64_t, (call->tc_ttd >> 32),
1265             (unsigned) (call->tc_ttd & 0xFFFFFFFF), call);
1266 #endif
1267
1268         enable_ints_and_unlock(group, s);
1269
1270         return result;
1271 }
1272
1273 /*
1274  * Remove a callout entry from the queue
1275  * Called with thread_call_lock held
1276  */
1277 static bool
1278 thread_call_cancel_locked(thread_call_t call)
1279 {
1280         bool canceled;
1281
1282         if (call->tc_flags & THREAD_CALL_RESCHEDULE) {
1283                 call->tc_flags &= ~THREAD_CALL_RESCHEDULE;
1284                 canceled = true;
1285
1286                 /* if reschedule was set, it must not have been queued */
1287                 assert(call->tc_queue == NULL);
1288         } else {
1289                 bool queue_head_changed = false;
1290
1291                 thread_call_flavor_t flavor = thread_call_get_flavor(call);
1292                 thread_call_group_t  group  = thread_call_get_group(call);
1293
1294                 if (call->tc_pqlink.deadline != 0 &&
1295                     call == priority_queue_min(&group->delayed_pqueues[flavor], struct thread_call, tc_pqlink)) {
1296                         assert(call->tc_queue == &group->delayed_queues[flavor]);
1297                         queue_head_changed = true;
1298                 }
1299
1300                 canceled = _call_dequeue(call, group);
1301
1302                 if (queue_head_changed) {
1303                         if (_arm_delayed_call_timer(NULL, group, flavor) == false) {
1304                                 timer_call_cancel(&group->delayed_timers[flavor]);
1305                         }
1306                 }
1307         }
1308
1309 #if CONFIG_DTRACE
1310         DTRACE_TMR4(thread_callout__cancel, thread_call_func_t, call->tc_func,
1311             0, (call->tc_ttd >> 32), (unsigned) (call->tc_ttd & 0xFFFFFFFF));
1312 #endif
1313
1314         return canceled;
1315 }
1316
1317 /*
1318  *      thread_call_cancel:
1319  *
1320  *      Dequeue a callout entry.
1321  *
1322  *      Returns TRUE if the call was
1323  *      on a queue.
1324  */
1325 boolean_t
1326 thread_call_cancel(thread_call_t call)
1327 {
1328         thread_call_group_t group = thread_call_get_group(call);
1329
1330         spl_t s = disable_ints_and_lock(group);
1331
1332         boolean_t result = thread_call_cancel_locked(call);
1333
1334         enable_ints_and_unlock(group, s);
1335
1336         return result;
1337 }
1338
1339 /*
1340  * Cancel a thread call.  If it cannot be cancelled (i.e.
1341  * is already in flight), waits for the most recent invocation
1342  * to finish.  Note that if clients re-submit this thread call,
1343  * it may still be pending or in flight when thread_call_cancel_wait
1344  * returns, but all requests to execute this work item prior
1345  * to the call to thread_call_cancel_wait will have finished.
1346  */
1347 boolean_t
1348 thread_call_cancel_wait(thread_call_t call)
1349 {
1350         thread_call_group_t group = thread_call_get_group(call);
1351
1352         if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
1353                 panic("thread_call_cancel_wait: can't wait on thread call whose storage I don't own");
1354         }
1355
1356         if (!ml_get_interrupts_enabled()) {
1357                 panic("unsafe thread_call_cancel_wait");
1358         }
1359
1360         thread_t self = current_thread();
1361
1362         if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) &&
1363             self->thc_state && self->thc_state->thc_call == call) {
1364                 panic("thread_call_cancel_wait: deadlock waiting on self from inside call: %p to function %p",
1365                     call, call->tc_func);
1366         }
1367
1368         spl_t s = disable_ints_and_lock(group);
1369
1370         boolean_t canceled = thread_call_cancel_locked(call);
1371
1372         if ((call->tc_flags & THREAD_CALL_ONCE) == THREAD_CALL_ONCE) {
1373                 /*
1374                  * A cancel-wait on a 'once' call will both cancel
1375                  * the pending call and wait for the in-flight call
1376                  */
1377
1378                 thread_call_wait_once_locked(call, s);
1379                 /* thread call lock unlocked */
1380         } else {
1381                 /*
1382                  * A cancel-wait on a normal call will only wait for the in-flight calls
1383                  * if it did not cancel the pending call.
1384                  *
1385                  * TODO: This seems less than useful - shouldn't it do the wait as well?
1386                  */
1387
1388                 if (canceled == FALSE) {
1389                         thread_call_wait_locked(call, s);
1390                         /* thread call lock unlocked */
1391                 } else {
1392                         enable_ints_and_unlock(group, s);
1393                 }
1394         }
1395
1396         return canceled;
1397 }
1398
1399
1400 /*
1401  *      thread_call_wake:
1402  *
1403  *      Wake a call thread to service
1404  *      pending call entries.  May wake
1405  *      the daemon thread in order to
1406  *      create additional call threads.
1407  *
1408  *      Called with thread_call_lock held.
1409  *
1410  *      For high-priority group, only does wakeup/creation if there are no threads
1411  *      running.
1412  */
1413 static void
1414 thread_call_wake(
1415         thread_call_group_t             group)
1416 {
1417         /*
1418          * New behavior: use threads if you've got 'em.
1419          * Traditional behavior: wake only if no threads running.
1420          */
1421         if (group_isparallel(group) || group->active_count == 0) {
1422                 if (group->idle_count) {
1423                         __assert_only kern_return_t kr;
1424
1425                         kr = waitq_wakeup64_one(&group->idle_waitq, NO_EVENT64,
1426                             THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1427                         assert(kr == KERN_SUCCESS);
1428
1429                         group->idle_count--;
1430                         group->active_count++;
1431
1432                         if (group->idle_count == 0 && (group->tcg_flags & TCG_DEALLOC_ACTIVE) == TCG_DEALLOC_ACTIVE) {
1433                                 if (timer_call_cancel(&group->dealloc_timer) == TRUE) {
1434                                         group->tcg_flags &= ~TCG_DEALLOC_ACTIVE;
1435                                 }
1436                         }
1437                 } else {
1438                         if (thread_call_group_should_add_thread(group) &&
1439                             os_atomic_cmpxchg(&thread_call_daemon_awake,
1440                             false, true, relaxed)) {
1441                                 waitq_wakeup64_all(&daemon_waitq, NO_EVENT64,
1442                                     THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1443                         }
1444                 }
1445         }
1446 }
1447
1448 /*
1449  *      sched_call_thread:
1450  *
1451  *      Call out invoked by the scheduler.
1452  */
1453 static void
1454 sched_call_thread(
1455         int                             type,
1456         thread_t                thread)
1457 {
1458         thread_call_group_t             group;
1459
1460         assert(thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT);
1461         assert(thread->thc_state != NULL);
1462
1463         group = thread->thc_state->thc_group;
1464         assert((group - &thread_call_groups[0]) < THREAD_CALL_INDEX_MAX);
1465
1466         thread_call_lock_spin(group);
1467
1468         switch (type) {
1469         case SCHED_CALL_BLOCK:
1470                 assert(group->active_count);
1471                 --group->active_count;
1472                 group->blocked_count++;
1473                 if (group->pending_count > 0) {
1474                         thread_call_wake(group);
1475                 }
1476                 break;
1477
1478         case SCHED_CALL_UNBLOCK:
1479                 assert(group->blocked_count);
1480                 --group->blocked_count;
1481                 group->active_count++;
1482                 break;
1483         }
1484
1485         thread_call_unlock(group);
1486 }
1487
1488 /*
1489  * Interrupts disabled, lock held; returns the same way.
1490  * Only called on thread calls whose storage we own.  Wakes up
1491  * anyone who might be waiting on this work item and frees it
1492  * if the client has so requested.
1493  */
1494 static bool
1495 thread_call_finish(thread_call_t call, thread_call_group_t group, spl_t *s)
1496 {
1497         assert(thread_call_get_group(call) == group);
1498
1499         bool repend = false;
1500         bool signal = call->tc_flags & THREAD_CALL_SIGNAL;
1501
1502         call->tc_finish_count++;
1503
1504         if (!signal) {
1505                 /* The thread call thread owns a ref until the call is finished */
1506                 if (call->tc_refs <= 0) {
1507                         panic("thread_call_finish: detected over-released thread call: %p", call);
1508                 }
1509                 call->tc_refs--;
1510         }
1511
1512         thread_call_flags_t old_flags = call->tc_flags;
1513         call->tc_flags &= ~(THREAD_CALL_RESCHEDULE | THREAD_CALL_RUNNING | THREAD_CALL_WAIT);
1514
1515         if (call->tc_refs != 0 && (old_flags & THREAD_CALL_RESCHEDULE) != 0) {
1516                 assert(old_flags & THREAD_CALL_ONCE);
1517                 thread_call_flavor_t flavor = thread_call_get_flavor(call);
1518
1519                 if (old_flags & THREAD_CALL_DELAYED) {
1520                         uint64_t now = mach_absolute_time();
1521                         if (flavor == TCF_CONTINUOUS) {
1522                                 now = absolutetime_to_continuoustime(now);
1523                         }
1524                         if (call->tc_soft_deadline <= now) {
1525                                 /* The deadline has already expired, go straight to pending */
1526                                 call->tc_flags &= ~(THREAD_CALL_DELAYED | THREAD_CALL_RATELIMITED);
1527                                 call->tc_pqlink.deadline = 0;
1528                         }
1529                 }
1530
1531                 if (call->tc_pqlink.deadline) {
1532                         _delayed_call_enqueue(call, group, call->tc_pqlink.deadline, flavor);
1533                         if (!signal) {
1534                                 _arm_delayed_call_timer(call, group, flavor);
1535                         }
1536                 } else if (signal) {
1537                         call->tc_submit_count++;
1538                         repend = true;
1539                 } else {
1540                         _pending_call_enqueue(call, group, mach_absolute_time());
1541                 }
1542         }
1543
1544         if (!signal && (call->tc_refs == 0)) {
1545                 if ((old_flags & THREAD_CALL_WAIT) != 0) {
1546                         panic("Someone waiting on a thread call that is scheduled for free: %p\n", call->tc_func);
1547                 }
1548
1549                 assert(call->tc_finish_count == call->tc_submit_count);
1550
1551                 enable_ints_and_unlock(group, *s);
1552
1553                 zfree(thread_call_zone, call);
1554
1555                 *s = disable_ints_and_lock(group);
1556         }
1557
1558         if ((old_flags & THREAD_CALL_WAIT) != 0) {
1559                 /*
1560                  * Dropping lock here because the sched call for the
1561                  * high-pri group can take the big lock from under
1562                  * a thread lock.
1563                  */
1564                 thread_call_unlock(group);
1565                 thread_wakeup((event_t)call);
1566                 thread_call_lock_spin(group);
1567                 /* THREAD_CALL_SIGNAL call may have been freed */
1568         }
1569
1570         return repend;
1571 }
1572
1573 /*
1574  * thread_call_invoke
1575  *
1576  * Invoke the function provided for this thread call
1577  *
1578  * Note that the thread call object can be deallocated by the function if we do not control its storage.
1579  */
1580 static void __attribute__((noinline))
1581 thread_call_invoke(thread_call_func_t func,
1582     thread_call_param_t param0,
1583     thread_call_param_t param1,
1584     __unused thread_call_t call)
1585 {
1586 #if DEVELOPMENT || DEBUG
1587         KERNEL_DEBUG_CONSTANT(
1588                 MACHDBG_CODE(DBG_MACH_SCHED, MACH_CALLOUT) | DBG_FUNC_START,
1589                 VM_KERNEL_UNSLIDE(func), VM_KERNEL_ADDRHIDE(param0), VM_KERNEL_ADDRHIDE(param1), 0, 0);
1590 #endif /* DEVELOPMENT || DEBUG */
1591
1592 #if CONFIG_DTRACE
1593         uint64_t tc_ttd = call->tc_ttd;
1594         boolean_t is_delayed = call->tc_flags & THREAD_CALL_DELAYED;
1595         DTRACE_TMR6(thread_callout__start, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1596             (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1597 #endif
1598
1599         (*func)(param0, param1);
1600
1601 #if CONFIG_DTRACE
1602         DTRACE_TMR6(thread_callout__end, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1603             (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1604 #endif
1605
1606 #if DEVELOPMENT || DEBUG
1607         KERNEL_DEBUG_CONSTANT(
1608                 MACHDBG_CODE(DBG_MACH_SCHED, MACH_CALLOUT) | DBG_FUNC_END,
1609                 VM_KERNEL_UNSLIDE(func), 0, 0, 0, 0);
1610 #endif /* DEVELOPMENT || DEBUG */
1611 }
1612
1613 /*
1614  *      thread_call_thread:
1615  */
1616 static void
1617 thread_call_thread(
1618         thread_call_group_t             group,
1619         wait_result_t                   wres)
1620 {
1621         thread_t self = current_thread();
1622
1623         if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0) {
1624                 (void)thread_set_tag_internal(self, THREAD_TAG_CALLOUT);
1625         }
1626
1627         /*
1628          * A wakeup with THREAD_INTERRUPTED indicates that
1629          * we should terminate.
1630          */
1631         if (wres == THREAD_INTERRUPTED) {
1632                 thread_terminate(self);
1633
1634                 /* NOTREACHED */
1635                 panic("thread_terminate() returned?");
1636         }
1637
1638         spl_t s = disable_ints_and_lock(group);
1639
1640         struct thread_call_thread_state thc_state = { .thc_group = group };
1641         self->thc_state = &thc_state;
1642
1643         thread_sched_call(self, sched_call_thread);
1644
1645         while (group->pending_count > 0) {
1646                 thread_call_t call = qe_dequeue_head(&group->pending_queue,
1647                     struct thread_call, tc_qlink);
1648                 assert(call != NULL);
1649
1650                 group->pending_count--;
1651                 if (group->pending_count == 0) {
1652                         assert(queue_empty(&group->pending_queue));
1653                 }
1654
1655                 thread_call_func_t  func   = call->tc_func;
1656                 thread_call_param_t param0 = call->tc_param0;
1657                 thread_call_param_t param1 = call->tc_param1;
1658
1659                 call->tc_queue = NULL;
1660
1661                 if (_is_internal_call(call)) {
1662                         _internal_call_release(call);
1663                 }
1664
1665                 /*
1666                  * Can only do wakeups for thread calls whose storage
1667                  * we control.
1668                  */
1669                 bool needs_finish = false;
1670                 if (call->tc_flags & THREAD_CALL_ALLOC) {
1671                         needs_finish = true;
1672                         call->tc_flags |= THREAD_CALL_RUNNING;
1673                         call->tc_refs++;        /* Delay free until we're done */
1674                 }
1675
1676                 thc_state.thc_call = call;
1677                 thc_state.thc_call_pending_timestamp = call->tc_pending_timestamp;
1678                 thc_state.thc_call_soft_deadline = call->tc_soft_deadline;
1679                 thc_state.thc_call_hard_deadline = call->tc_pqlink.deadline;
1680                 thc_state.thc_func = func;
1681                 thc_state.thc_param0 = param0;
1682                 thc_state.thc_param1 = param1;
1683                 thc_state.thc_IOTES_invocation_timestamp = 0;
1684
1685                 enable_ints_and_unlock(group, s);
1686
1687                 thc_state.thc_call_start = mach_absolute_time();
1688
1689                 thread_call_invoke(func, param0, param1, call);
1690
1691                 thc_state.thc_call = NULL;
1692
1693                 if (get_preemption_level() != 0) {
1694                         int pl = get_preemption_level();
1695                         panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
1696                             pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
1697                 }
1698
1699                 s = disable_ints_and_lock(group);
1700
1701                 if (needs_finish) {
1702                         /* Release refcount, may free */
1703                         thread_call_finish(call, group, &s);
1704                 }
1705         }
1706
1707         thread_sched_call(self, NULL);
1708         group->active_count--;
1709
1710         if (self->callout_woken_from_icontext && !self->callout_woke_thread) {
1711                 ledger_credit(self->t_ledger, task_ledgers.interrupt_wakeups, 1);
1712                 if (self->callout_woken_from_platform_idle) {
1713                         ledger_credit(self->t_ledger, task_ledgers.platform_idle_wakeups, 1);
1714                 }
1715         }
1716
1717         self->callout_woken_from_icontext = FALSE;
1718         self->callout_woken_from_platform_idle = FALSE;
1719         self->callout_woke_thread = FALSE;
1720
1721         self->thc_state = NULL;
1722
1723         if (group_isparallel(group)) {
1724                 /*
1725                  * For new style of thread group, thread always blocks.
1726                  * If we have more than the target number of threads,
1727                  * and this is the first to block, and it isn't active
1728                  * already, set a timer for deallocating a thread if we
1729                  * continue to have a surplus.
1730                  */
1731                 group->idle_count++;
1732
1733                 if (group->idle_count == 1) {
1734                         group->idle_timestamp = mach_absolute_time();
1735                 }
1736
1737                 if (((group->tcg_flags & TCG_DEALLOC_ACTIVE) == 0) &&
1738                     ((group->active_count + group->idle_count) > group->target_thread_count)) {
1739                         thread_call_start_deallocate_timer(group);
1740                 }
1741
1742                 /* Wait for more work (or termination) */
1743                 wres = waitq_assert_wait64(&group->idle_waitq, NO_EVENT64, THREAD_INTERRUPTIBLE, 0);
1744                 if (wres != THREAD_WAITING) {
1745                         panic("kcall worker unable to assert wait?");
1746                 }
1747
1748                 enable_ints_and_unlock(group, s);
1749
1750                 thread_block_parameter((thread_continue_t)thread_call_thread, group);
1751         } else {
1752                 if (group->idle_count < group->target_thread_count) {
1753                         group->idle_count++;
1754
1755                         waitq_assert_wait64(&group->idle_waitq, NO_EVENT64, THREAD_UNINT, 0); /* Interrupted means to exit */
1756
1757                         enable_ints_and_unlock(group, s);
1758
1759                         thread_block_parameter((thread_continue_t)thread_call_thread, group);
1760                         /* NOTREACHED */
1761                 }
1762         }
1763
1764         enable_ints_and_unlock(group, s);
1765
1766         thread_terminate(self);
1767         /* NOTREACHED */
1768 }
1769
1770 void
1771 thread_call_start_iotes_invocation(__assert_only thread_call_t call)
1772 {
1773         thread_t self = current_thread();
1774
1775         if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0) {
1776                 /* not a thread call thread, might be a workloop IOTES */
1777                 return;
1778         }
1779
1780         assert(self->thc_state);
1781         assert(self->thc_state->thc_call == call);
1782
1783         self->thc_state->thc_IOTES_invocation_timestamp = mach_absolute_time();
1784 }
1785
1786
1787 /*
1788  *      thread_call_daemon: walk list of groups, allocating
1789  *      threads if appropriate (as determined by
1790  *      thread_call_group_should_add_thread()).
1791  */
1792 static void
1793 thread_call_daemon_continue(__unused void *arg)
1794 {
1795         do {
1796                 os_atomic_store(&thread_call_daemon_awake, false, relaxed);
1797
1798                 /* Starting at zero happens to be high-priority first. */
1799                 for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
1800                         thread_call_group_t group = &thread_call_groups[i];
1801
1802                         spl_t s = disable_ints_and_lock(group);
1803
1804                         while (thread_call_group_should_add_thread(group)) {
1805                                 group->active_count++;
1806
1807                                 enable_ints_and_unlock(group, s);
1808
1809                                 thread_call_thread_create(group);
1810
1811                                 s = disable_ints_and_lock(group);
1812                         }
1813
1814                         enable_ints_and_unlock(group, s);
1815                 }
1816         } while (os_atomic_load(&thread_call_daemon_awake, relaxed));
1817
1818         waitq_assert_wait64(&daemon_waitq, NO_EVENT64, THREAD_UNINT, 0);
1819
1820         if (os_atomic_load(&thread_call_daemon_awake, relaxed)) {
1821                 clear_wait(current_thread(), THREAD_AWAKENED);
1822         }
1823
1824         thread_block_parameter((thread_continue_t)thread_call_daemon_continue, NULL);
1825         /* NOTREACHED */
1826 }
1827
1828 static void
1829 thread_call_daemon(
1830         __unused void    *arg)
1831 {
1832         thread_t        self = current_thread();
1833
1834         self->options |= TH_OPT_VMPRIV;
1835         vm_page_free_reserve(2);        /* XXX */
1836
1837         thread_set_thread_name(self, "thread_call_daemon");
1838
1839         thread_call_daemon_continue(NULL);
1840         /* NOTREACHED */
1841 }
1842
1843 /*
1844  * Schedule timer to deallocate a worker thread if we have a surplus
1845  * of threads (in excess of the group's target) and at least one thread
1846  * is idle the whole time.
1847  */
1848 static void
1849 thread_call_start_deallocate_timer(thread_call_group_t group)
1850 {
1851         __assert_only bool already_enqueued;
1852
1853         assert(group->idle_count > 0);
1854         assert((group->tcg_flags & TCG_DEALLOC_ACTIVE) == 0);
1855
1856         group->tcg_flags |= TCG_DEALLOC_ACTIVE;
1857
1858         uint64_t deadline = group->idle_timestamp + thread_call_dealloc_interval_abs;
1859
1860         already_enqueued = timer_call_enter(&group->dealloc_timer, deadline, 0);
1861
1862         assert(already_enqueued == false);
1863 }
1864
1865 /* non-static so dtrace can find it rdar://problem/31156135&31379348 */
1866 void
1867 thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1)
1868 {
1869         thread_call_group_t  group  = (thread_call_group_t)  p0;
1870         thread_call_flavor_t flavor = (thread_call_flavor_t) p1;
1871
1872         thread_call_t   call;
1873         uint64_t        now;
1874
1875         thread_call_lock_spin(group);
1876
1877         if (flavor == TCF_CONTINUOUS) {
1878                 now = mach_continuous_time();
1879         } else if (flavor == TCF_ABSOLUTE) {
1880                 now = mach_absolute_time();
1881         } else {
1882                 panic("invalid timer flavor: %d", flavor);
1883         }
1884
1885         while ((call = priority_queue_min(&group->delayed_pqueues[flavor],
1886             struct thread_call, tc_pqlink)) != NULL) {
1887                 assert(thread_call_get_group(call) == group);
1888                 assert(thread_call_get_flavor(call) == flavor);
1889
1890                 /*
1891                  * if we hit a call that isn't yet ready to expire,
1892                  * then we're done for now
1893                  * TODO: The next timer in the list could have a larger leeway
1894                  *       and therefore be ready to expire.
1895                  */
1896                 if (call->tc_soft_deadline > now) {
1897                         break;
1898                 }
1899
1900                 /*
1901                  * If we hit a rate-limited timer, don't eagerly wake it up.
1902                  * Wait until it reaches the end of the leeway window.
1903                  *
1904                  * TODO: What if the next timer is not rate-limited?
1905                  *       Have a separate rate-limited queue to avoid this
1906                  */
1907                 if ((call->tc_flags & THREAD_CALL_RATELIMITED) &&
1908                     (call->tc_pqlink.deadline > now) &&
1909                     (ml_timer_forced_evaluation() == FALSE)) {
1910                         break;
1911                 }
1912
1913                 if (THREAD_CALL_SIGNAL & call->tc_flags) {
1914                         __assert_only queue_head_t *old_queue;
1915                         old_queue = thread_call_dequeue(call);
1916                         assert(old_queue == &group->delayed_queues[flavor]);
1917
1918                         do {
1919                                 thread_call_func_t  func   = call->tc_func;
1920                                 thread_call_param_t param0 = call->tc_param0;
1921                                 thread_call_param_t param1 = call->tc_param1;
1922
1923                                 call->tc_flags |= THREAD_CALL_RUNNING;
1924
1925                                 thread_call_unlock(group);
1926                                 thread_call_invoke(func, param0, param1, call);
1927                                 thread_call_lock_spin(group);
1928
1929                                 /* finish may detect that the call has been re-pended */
1930                         } while (thread_call_finish(call, group, NULL));
1931                         /* call may have been freed by the finish */
1932                 } else {
1933                         _pending_call_enqueue(call, group, now);
1934                 }
1935         }
1936
1937         _arm_delayed_call_timer(call, group, flavor);
1938
1939         thread_call_unlock(group);
1940 }
1941
1942 static void
1943 thread_call_delayed_timer_rescan(thread_call_group_t group,
1944     thread_call_flavor_t flavor)
1945 {
1946         thread_call_t call;
1947         uint64_t now;
1948
1949         spl_t s = disable_ints_and_lock(group);
1950
1951         assert(ml_timer_forced_evaluation() == TRUE);
1952
1953         if (flavor == TCF_CONTINUOUS) {
1954                 now = mach_continuous_time();
1955         } else {
1956                 now = mach_absolute_time();
1957         }
1958
1959         qe_foreach_element_safe(call, &group->delayed_queues[flavor], tc_qlink) {
1960                 if (call->tc_soft_deadline <= now) {
1961                         _pending_call_enqueue(call, group, now);
1962                 } else {
1963                         uint64_t skew = call->tc_pqlink.deadline - call->tc_soft_deadline;
1964                         assert(call->tc_pqlink.deadline >= call->tc_soft_deadline);
1965                         /*
1966                          * On a latency quality-of-service level change,
1967                          * re-sort potentially rate-limited callout. The platform
1968                          * layer determines which timers require this.
1969                          *
1970                          * This trick works by updating the deadline value to
1971                          * equal soft-deadline, effectively crushing away
1972                          * timer coalescing slop values for any armed
1973                          * timer in the queue.
1974                          *
1975                          * TODO: keep a hint on the timer to tell whether its inputs changed, so we
1976                          * only have to crush coalescing for timers that need it.
1977                          *
1978                          * TODO: Keep a separate queue of timers above the re-sort
1979                          * threshold, so we only have to look at those.
1980                          */
1981                         if (timer_resort_threshold(skew)) {
1982                                 _call_dequeue(call, group);
1983                                 _delayed_call_enqueue(call, group, call->tc_soft_deadline, flavor);
1984                         }
1985                 }
1986         }
1987
1988         _arm_delayed_call_timer(NULL, group, flavor);
1989
1990         enable_ints_and_unlock(group, s);
1991 }
1992
1993 void
1994 thread_call_delayed_timer_rescan_all(void)
1995 {
1996         for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
1997                 for (thread_call_flavor_t flavor = 0; flavor < TCF_COUNT; flavor++) {
1998                         thread_call_delayed_timer_rescan(&thread_call_groups[i], flavor);
1999                 }
2000         }
2001 }
2002
2003 /*
2004  * Timer callback to tell a thread to terminate if
2005  * we have an excess of threads and at least one has been
2006  * idle for a long time.
2007  */
2008 static void
2009 thread_call_dealloc_timer(
2010         timer_call_param_t              p0,
2011         __unused timer_call_param_t     p1)
2012 {
2013         thread_call_group_t group = (thread_call_group_t)p0;
2014         uint64_t now;
2015         kern_return_t res;
2016         bool terminated = false;
2017
2018         thread_call_lock_spin(group);
2019
2020         assert(group->tcg_flags & TCG_DEALLOC_ACTIVE);
2021
2022         now = mach_absolute_time();
2023
2024         if (group->idle_count > 0) {
2025                 if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
2026                         terminated = true;
2027                         group->idle_count--;
2028                         res = waitq_wakeup64_one(&group->idle_waitq, NO_EVENT64,
2029                             THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES);
2030                         if (res != KERN_SUCCESS) {
2031                                 panic("Unable to wake up idle thread for termination?");
2032                         }
2033                 }
2034         }
2035
2036         group->tcg_flags &= ~TCG_DEALLOC_ACTIVE;
2037
2038         /*
2039          * If we still have an excess of threads, schedule another
2040          * invocation of this function.
2041          */
2042         if (group->idle_count > 0 && (group->idle_count + group->active_count > group->target_thread_count)) {
2043                 /*
2044                  * If we killed someone just now, push out the
2045                  * next deadline.
2046                  */
2047                 if (terminated) {
2048                         group->idle_timestamp = now;
2049                 }
2050
2051                 thread_call_start_deallocate_timer(group);
2052         }
2053
2054         thread_call_unlock(group);
2055 }
2056
2057 /*
2058  * Wait for the invocation of the thread call to complete
2059  * We know there's only one in flight because of the 'once' flag.
2060  *
2061  * If a subsequent invocation comes in before we wake up, that's OK
2062  *
2063  * TODO: Here is where we will add priority inheritance to the thread executing
2064  * the thread call in case it's lower priority than the current thread
2065  *      <rdar://problem/30321792> Priority inheritance for thread_call_wait_once
2066  *
2067  * Takes the thread call lock locked, returns unlocked
2068  *      This lets us avoid a spurious take/drop after waking up from thread_block
2069  */
2070 static bool
2071 thread_call_wait_once_locked(thread_call_t call, spl_t s)
2072 {
2073         assert(call->tc_flags & THREAD_CALL_ALLOC);
2074         assert(call->tc_flags & THREAD_CALL_ONCE);
2075
2076         thread_call_group_t group = thread_call_get_group(call);
2077
2078         if ((call->tc_flags & THREAD_CALL_RUNNING) == 0) {
2079                 enable_ints_and_unlock(group, s);
2080                 return false;
2081         }
2082
2083         /* call is running, so we have to wait for it */
2084         call->tc_flags |= THREAD_CALL_WAIT;
2085
2086         wait_result_t res = assert_wait(call, THREAD_UNINT);
2087         if (res != THREAD_WAITING) {
2088                 panic("Unable to assert wait: %d", res);
2089         }
2090
2091         enable_ints_and_unlock(group, s);
2092
2093         res = thread_block(THREAD_CONTINUE_NULL);
2094         if (res != THREAD_AWAKENED) {
2095                 panic("Awoken with %d?", res);
2096         }
2097
2098         /* returns unlocked */
2099         return true;
2100 }
2101
2102 /*
2103  * Wait for an in-flight invocation to complete
2104  * Does NOT try to cancel, so the client doesn't need to hold their
2105  * lock while calling this function.
2106  *
2107  * Returns whether or not it had to wait.
2108  *
2109  * Only works for THREAD_CALL_ONCE calls.
2110  */
2111 boolean_t
2112 thread_call_wait_once(thread_call_t call)
2113 {
2114         if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
2115                 panic("thread_call_wait_once: can't wait on thread call whose storage I don't own");
2116         }
2117
2118         if ((call->tc_flags & THREAD_CALL_ONCE) == 0) {
2119                 panic("thread_call_wait_once: can't wait_once on a non-once call");
2120         }
2121
2122         if (!ml_get_interrupts_enabled()) {
2123                 panic("unsafe thread_call_wait_once");
2124         }
2125
2126         thread_t self = current_thread();
2127
2128         if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) &&
2129             self->thc_state && self->thc_state->thc_call == call) {
2130                 panic("thread_call_wait_once: deadlock waiting on self from inside call: %p to function %p",
2131                     call, call->tc_func);
2132         }
2133
2134         thread_call_group_t group = thread_call_get_group(call);
2135
2136         spl_t s = disable_ints_and_lock(group);
2137
2138         bool waited = thread_call_wait_once_locked(call, s);
2139         /* thread call lock unlocked */
2140
2141         return waited;
2142 }
2143
2144
2145 /*
2146  * Wait for all requested invocations of a thread call prior to now
2147  * to finish.  Can only be invoked on thread calls whose storage we manage.
2148  * Just waits for the finish count to catch up to the submit count we find
2149  * at the beginning of our wait.
2150  *
2151  * Called with thread_call_lock held.  Returns with lock released.
2152  */
2153 static void
2154 thread_call_wait_locked(thread_call_t call, spl_t s)
2155 {
2156         thread_call_group_t group = thread_call_get_group(call);
2157
2158         assert(call->tc_flags & THREAD_CALL_ALLOC);
2159
2160         uint64_t submit_count = call->tc_submit_count;
2161
2162         while (call->tc_finish_count < submit_count) {
2163                 call->tc_flags |= THREAD_CALL_WAIT;
2164
2165                 wait_result_t res = assert_wait(call, THREAD_UNINT);
2166                 if (res != THREAD_WAITING) {
2167                         panic("Unable to assert wait: %d", res);
2168                 }
2169
2170                 enable_ints_and_unlock(group, s);
2171
2172                 res = thread_block(THREAD_CONTINUE_NULL);
2173                 if (res != THREAD_AWAKENED) {
2174                         panic("Awoken with %d?", res);
2175                 }
2176
2177                 s = disable_ints_and_lock(group);
2178         }
2179
2180         enable_ints_and_unlock(group, s);
2181 }
2182
2183 /*
2184  * Determine whether a thread call is either on a queue or
2185  * currently being executed.
2186  */
2187 boolean_t
2188 thread_call_isactive(thread_call_t call)
2189 {
2190         thread_call_group_t group = thread_call_get_group(call);
2191
2192         spl_t s = disable_ints_and_lock(group);
2193         boolean_t active = (call->tc_submit_count > call->tc_finish_count);
2194         enable_ints_and_unlock(group, s);
2195
2196         return active;
2197 }
2198
2199 /*
2200  * adjust_cont_time_thread_calls
2201  * on wake, reenqueue delayed call timer for continuous time thread call groups
2202  */
2203 void
2204 adjust_cont_time_thread_calls(void)
2205 {
2206         for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
2207                 thread_call_group_t group = &thread_call_groups[i];
2208                 spl_t s = disable_ints_and_lock(group);
2209
2210                 /* only the continuous timers need to be re-armed */
2211
2212                 _arm_delayed_call_timer(NULL, group, TCF_CONTINUOUS);
2213                 enable_ints_and_unlock(group, s);
2214         }
2215 }