osfmk/kern/thread_call.c

   1 /*
   2  * Copyright (c) 1993-1995, 1999-2008 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <mach/mach_types.h>
  30 #include <mach/thread_act.h>
  31
  32 #include <kern/kern_types.h>
  33 #include <kern/zalloc.h>
  34 #include <kern/sched_prim.h>
  35 #include <kern/clock.h>
  36 #include <kern/task.h>
  37 #include <kern/thread.h>
  38 #include <kern/wait_queue.h>
  39 #include <kern/ledger.h>
  40
  41 #include <vm/vm_pageout.h>
  42
  43 #include <kern/thread_call.h>
  44 #include <kern/call_entry.h>
  45 #include <kern/timer_call.h>
  46
  47 #include <libkern/OSAtomic.h>
  48 #include <kern/timer_queue.h>
  49
  50 #include <sys/kdebug.h>
  51 #if CONFIG_DTRACE
  52 #include <mach/sdt.h>
  53 #endif
  54 #include <machine/machine_routines.h>
  55
  56 static zone_t                   thread_call_zone;
  57 static struct wait_queue        daemon_wqueue;
  58
  59 struct thread_call_group {
  60         queue_head_t            pending_queue;
  61         uint32_t                pending_count;
  62
  63         queue_head_t            delayed_queue;
  64         uint32_t                delayed_count;
  65
  66         timer_call_data_t       delayed_timer;
  67         timer_call_data_t       dealloc_timer;
  68
  69         struct wait_queue       idle_wqueue;
  70         uint32_t                idle_count, active_count;
  71
  72         integer_t               pri;
  73         uint32_t                target_thread_count;
  74         uint64_t                idle_timestamp;
  75
  76         uint32_t                flags;
  77         sched_call_t            sched_call;
  78 };
  79
  80 typedef struct thread_call_group        *thread_call_group_t;
  81
  82 #define TCG_PARALLEL            0x01
  83 #define TCG_DEALLOC_ACTIVE      0x02
  84
  85 #define THREAD_CALL_GROUP_COUNT         4
  86 #define THREAD_CALL_THREAD_MIN          4
  87 #define INTERNAL_CALL_COUNT             768
  88 #define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * 1000 * 1000) /* 5 ms */
  89 #define THREAD_CALL_ADD_RATIO           4
  90 #define THREAD_CALL_MACH_FACTOR_CAP     3
  91
  92 static struct thread_call_group thread_call_groups[THREAD_CALL_GROUP_COUNT];
  93 static boolean_t                thread_call_daemon_awake;
  94 static thread_call_data_t       internal_call_storage[INTERNAL_CALL_COUNT];
  95 static queue_head_t             thread_call_internal_queue;
  96 int                                             thread_call_internal_queue_count = 0;
  97 static uint64_t                 thread_call_dealloc_interval_abs;
  98
  99 static __inline__ thread_call_t _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0);
 100 static __inline__ void          _internal_call_release(thread_call_t call);
 101 static __inline__ boolean_t     _pending_call_enqueue(thread_call_t call, thread_call_group_t group);
 102 static __inline__ boolean_t     _delayed_call_enqueue(thread_call_t call, thread_call_group_t group, uint64_t deadline);
 103 static __inline__ boolean_t     _call_dequeue(thread_call_t call, thread_call_group_t group);
 104 static __inline__ void          thread_call_wake(thread_call_group_t group);
 105 static __inline__ void          _set_delayed_call_timer(thread_call_t call, thread_call_group_t group);
 106 static boolean_t                _remove_from_pending_queue(thread_call_func_t func, thread_call_param_t param0, boolean_t remove_all);
 107 static boolean_t                _remove_from_delayed_queue(thread_call_func_t func, thread_call_param_t param0, boolean_t remove_all);
 108 static void                     thread_call_daemon(void *arg);
 109 static void                     thread_call_thread(thread_call_group_t group, wait_result_t wres);
 110 extern void                     thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1);
 111 static void                     thread_call_dealloc_timer(timer_call_param_t p0, timer_call_param_t p1);
 112 static void                     thread_call_group_setup(thread_call_group_t group, thread_call_priority_t pri, uint32_t target_thread_count, boolean_t parallel);
 113 static void                     sched_call_thread(int type, thread_t thread);
 114 static void                     thread_call_start_deallocate_timer(thread_call_group_t group);
 115 static void                     thread_call_wait_locked(thread_call_t call);
 116 static boolean_t                thread_call_enter_delayed_internal(thread_call_t call,
 117                                                 thread_call_func_t alt_func, thread_call_param_t alt_param0,
 118                                                 thread_call_param_t param1, uint64_t deadline,
 119                                                 uint64_t leeway, unsigned int flags);
 120
 121 #define qe(x)           ((queue_entry_t)(x))
 122 #define TC(x)           ((thread_call_t)(x))
 123
 124
 125 lck_grp_t               thread_call_queues_lck_grp;
 126 lck_grp_t               thread_call_lck_grp;
 127 lck_attr_t              thread_call_lck_attr;
 128 lck_grp_attr_t          thread_call_lck_grp_attr;
 129
 130 #if defined(__i386__) || defined(__x86_64__)
 131 lck_mtx_t               thread_call_lock_data;
 132 #else
 133 lck_spin_t              thread_call_lock_data;
 134 #endif
 135
 136
 137 #define thread_call_lock_spin()                 \
 138         lck_mtx_lock_spin_always(&thread_call_lock_data)
 139
 140 #define thread_call_unlock()                    \
 141         lck_mtx_unlock_always(&thread_call_lock_data)
 142
 143 extern boolean_t        mach_timer_coalescing_enabled;
 144
 145 static inline spl_t
 146 disable_ints_and_lock(void)
 147 {
 148         spl_t s;
 149
 150         s = splsched();
 151         thread_call_lock_spin();
 152
 153         return s;
 154 }
 155
 156 static inline void
 157 enable_ints_and_unlock(void)
 158 {
 159         thread_call_unlock();
 160         (void)spllo();
 161 }
 162
 163
 164 static inline boolean_t
 165 group_isparallel(thread_call_group_t group)
 166 {
 167         return ((group->flags & TCG_PARALLEL) != 0);
 168 }
 169
 170 static boolean_t
 171 thread_call_group_should_add_thread(thread_call_group_t group)
 172 {
 173         uint32_t thread_count;
 174
 175         if (!group_isparallel(group)) {
 176                 if (group->pending_count > 0 && group->active_count == 0) {
 177                         return TRUE;
 178                 }
 179
 180                 return FALSE;
 181         }
 182
 183         if (group->pending_count > 0) {
 184                 if (group->idle_count > 0) {
 185                         panic("Pending work, but threads are idle?");
 186                 }
 187
 188                 thread_count = group->active_count;
 189
 190                 /*
 191                  * Add a thread if either there are no threads,
 192                  * the group has fewer than its target number of
 193                  * threads, or the amount of work is large relative
 194                  * to the number of threads.  In the last case, pay attention
 195                  * to the total load on the system, and back off if
 196          * it's high.
 197                  */
 198                 if ((thread_count == 0) ||
 199                         (thread_count < group->target_thread_count) ||
 200                         ((group->pending_count > THREAD_CALL_ADD_RATIO * thread_count) &&
 201                          (sched_mach_factor < THREAD_CALL_MACH_FACTOR_CAP))) {
 202                         return TRUE;
 203                 }
 204         }
 205
 206         return FALSE;
 207 }
 208
 209 static inline integer_t
 210 thread_call_priority_to_sched_pri(thread_call_priority_t pri)
 211 {
 212         switch (pri) {
 213         case THREAD_CALL_PRIORITY_HIGH:
 214                 return BASEPRI_PREEMPT;
 215         case THREAD_CALL_PRIORITY_KERNEL:
 216                 return BASEPRI_KERNEL;
 217         case THREAD_CALL_PRIORITY_USER:
 218                 return BASEPRI_DEFAULT;
 219         case THREAD_CALL_PRIORITY_LOW:
 220                 return MAXPRI_THROTTLE;
 221         default:
 222                 panic("Invalid priority.");
 223         }
 224
 225         return 0;
 226 }
 227
 228 /* Lock held */
 229 static inline thread_call_group_t
 230 thread_call_get_group(
 231                 thread_call_t call)
 232 {
 233         thread_call_priority_t  pri = call->tc_pri;
 234
 235         assert(pri == THREAD_CALL_PRIORITY_LOW ||
 236                         pri == THREAD_CALL_PRIORITY_USER ||
 237                         pri == THREAD_CALL_PRIORITY_KERNEL ||
 238                         pri == THREAD_CALL_PRIORITY_HIGH);
 239
 240         return &thread_call_groups[pri];
 241 }
 242
 243 static void
 244 thread_call_group_setup(
 245                 thread_call_group_t             group,
 246                 thread_call_priority_t          pri,
 247                 uint32_t                        target_thread_count,
 248                 boolean_t                       parallel)
 249 {
 250         queue_init(&group->pending_queue);
 251         queue_init(&group->delayed_queue);
 252
 253         timer_call_setup(&group->delayed_timer, thread_call_delayed_timer, group);
 254         timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
 255
 256         wait_queue_init(&group->idle_wqueue, SYNC_POLICY_FIFO);
 257
 258         group->target_thread_count = target_thread_count;
 259         group->pri = thread_call_priority_to_sched_pri(pri);
 260
 261         group->sched_call = sched_call_thread;
 262         if (parallel) {
 263                 group->flags |= TCG_PARALLEL;
 264                 group->sched_call = NULL;
 265         }
 266 }
 267
 268 /*
 269  * Simple wrapper for creating threads bound to
 270  * thread call groups.
 271  */
 272 static kern_return_t
 273 thread_call_thread_create(
 274                 thread_call_group_t             group)
 275 {
 276         thread_t thread;
 277         kern_return_t result;
 278
 279         result = kernel_thread_start_priority((thread_continue_t)thread_call_thread, group, group->pri, &thread);
 280         if (result != KERN_SUCCESS) {
 281                 return result;
 282         }
 283
 284         if (group->pri < BASEPRI_PREEMPT) {
 285                 /*
 286                  * New style doesn't get to run to completion in
 287                  * kernel if there are higher priority threads
 288                  * available.
 289                  */
 290                 thread_set_eager_preempt(thread);
 291         }
 292
 293         thread_deallocate(thread);
 294         return KERN_SUCCESS;
 295 }
 296
 297 /*
 298  *      thread_call_initialize:
 299  *
 300  *      Initialize this module, called
 301  *      early during system initialization.
 302  */
 303 void
 304 thread_call_initialize(void)
 305 {
 306         thread_call_t                   call;
 307         kern_return_t                   result;
 308         thread_t                        thread;
 309         int                             i;
 310
 311         i = sizeof (thread_call_data_t);
 312         thread_call_zone = zinit(i, 4096 * i, 16 * i, "thread_call");
 313         zone_change(thread_call_zone, Z_CALLERACCT, FALSE);
 314         zone_change(thread_call_zone, Z_NOENCRYPT, TRUE);
 315
 316         lck_attr_setdefault(&thread_call_lck_attr);
 317         lck_grp_attr_setdefault(&thread_call_lck_grp_attr);
 318         lck_grp_init(&thread_call_queues_lck_grp, "thread_call_queues", &thread_call_lck_grp_attr);
 319         lck_grp_init(&thread_call_lck_grp, "thread_call", &thread_call_lck_grp_attr);
 320
 321 #if defined(__i386__) || defined(__x86_64__)
 322         lck_mtx_init(&thread_call_lock_data, &thread_call_lck_grp, &thread_call_lck_attr);
 323 #else
 324         lck_spin_init(&thread_call_lock_data, &thread_call_lck_grp, &thread_call_lck_attr);
 325 #endif
 326
 327         nanotime_to_absolutetime(0, THREAD_CALL_DEALLOC_INTERVAL_NS, &thread_call_dealloc_interval_abs);
 328         wait_queue_init(&daemon_wqueue, SYNC_POLICY_FIFO);
 329
 330         thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_LOW], THREAD_CALL_PRIORITY_LOW, 0, TRUE);
 331         thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_USER], THREAD_CALL_PRIORITY_USER, 0, TRUE);
 332         thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_KERNEL], THREAD_CALL_PRIORITY_KERNEL, 1, TRUE);
 333         thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_HIGH], THREAD_CALL_PRIORITY_HIGH, THREAD_CALL_THREAD_MIN, FALSE);
 334
 335         disable_ints_and_lock();
 336
 337         queue_init(&thread_call_internal_queue);
 338         for (
 339                         call = internal_call_storage;
 340                         call < &internal_call_storage[INTERNAL_CALL_COUNT];
 341                         call++) {
 342
 343                 enqueue_tail(&thread_call_internal_queue, qe(call));
 344                 thread_call_internal_queue_count++;
 345         }
 346
 347         thread_call_daemon_awake = TRUE;
 348
 349         enable_ints_and_unlock();
 350
 351         result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon, NULL, BASEPRI_PREEMPT + 1, &thread);
 352         if (result != KERN_SUCCESS)
 353                 panic("thread_call_initialize");
 354
 355         thread_deallocate(thread);
 356 }
 357
 358 void
 359 thread_call_setup(
 360         thread_call_t                   call,
 361         thread_call_func_t              func,
 362         thread_call_param_t             param0)
 363 {
 364         bzero(call, sizeof(*call));
 365         call_entry_setup((call_entry_t)call, func, param0);
 366         call->tc_pri = THREAD_CALL_PRIORITY_HIGH; /* Default priority */
 367 }
 368
 369 /*
 370  *      _internal_call_allocate:
 371  *
 372  *      Allocate an internal callout entry.
 373  *
 374  *      Called with thread_call_lock held.
 375  */
 376 static __inline__ thread_call_t
 377 _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0)
 378 {
 379     thread_call_t               call;
 380
 381     if (queue_empty(&thread_call_internal_queue))
 382         panic("_internal_call_allocate");
 383
 384     call = TC(dequeue_head(&thread_call_internal_queue));
 385     thread_call_internal_queue_count--;
 386
 387     thread_call_setup(call, func, param0);
 388     call->tc_refs = 0;
 389     call->tc_flags = 0; /* THREAD_CALL_ALLOC not set, do not free back to zone */
 390
 391     return (call);
 392 }
 393
 394 /*
 395  *      _internal_call_release:
 396  *
 397  *      Release an internal callout entry which
 398  *      is no longer pending (or delayed). This is
 399  *      safe to call on a non-internal entry, in which
 400  *      case nothing happens.
 401  *
 402  *      Called with thread_call_lock held.
 403  */
 404 static __inline__ void
 405 _internal_call_release(
 406     thread_call_t               call)
 407 {
 408     if (    call >= internal_call_storage                                               &&
 409                     call < &internal_call_storage[INTERNAL_CALL_COUNT]          ) {
 410                 assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
 411                 enqueue_head(&thread_call_internal_queue, qe(call));
 412                 thread_call_internal_queue_count++;
 413         }
 414 }
 415
 416 /*
 417  *      _pending_call_enqueue:
 418  *
 419  *      Place an entry at the end of the
 420  *      pending queue, to be executed soon.
 421  *
 422  *      Returns TRUE if the entry was already
 423  *      on a queue.
 424  *
 425  *      Called with thread_call_lock held.
 426  */
 427 static __inline__ boolean_t
 428 _pending_call_enqueue(
 429     thread_call_t               call,
 430         thread_call_group_t     group)
 431 {
 432         queue_head_t            *old_queue;
 433
 434         old_queue = call_entry_enqueue_tail(CE(call), &group->pending_queue);
 435
 436         if (old_queue == NULL) {
 437                 call->tc_submit_count++;
 438         }
 439
 440         group->pending_count++;
 441
 442         thread_call_wake(group);
 443
 444         return (old_queue != NULL);
 445 }
 446
 447 /*
 448  *      _delayed_call_enqueue:
 449  *
 450  *      Place an entry on the delayed queue,
 451  *      after existing entries with an earlier
 452  *      (or identical) deadline.
 453  *
 454  *      Returns TRUE if the entry was already
 455  *      on a queue.
 456  *
 457  *      Called with thread_call_lock held.
 458  */
 459 static __inline__ boolean_t
 460 _delayed_call_enqueue(
 461         thread_call_t           call,
 462         thread_call_group_t     group,
 463         uint64_t                deadline)
 464 {
 465         queue_head_t            *old_queue;
 466
 467         old_queue = call_entry_enqueue_deadline(CE(call), &group->delayed_queue, deadline);
 468
 469         if (old_queue == &group->pending_queue)
 470                 group->pending_count--;
 471         else if (old_queue == NULL)
 472                 call->tc_submit_count++;
 473
 474         return (old_queue != NULL);
 475 }
 476
 477 /*
 478  *      _call_dequeue:
 479  *
 480  *      Remove an entry from a queue.
 481  *
 482  *      Returns TRUE if the entry was on a queue.
 483  *
 484  *      Called with thread_call_lock held.
 485  */
 486 static __inline__ boolean_t
 487 _call_dequeue(
 488         thread_call_t           call,
 489         thread_call_group_t     group)
 490 {
 491         queue_head_t            *old_queue;
 492
 493         old_queue = call_entry_dequeue(CE(call));
 494
 495         if (old_queue != NULL) {
 496                 call->tc_finish_count++;
 497                 if (old_queue == &group->pending_queue)
 498                         group->pending_count--;
 499         }
 500
 501         return (old_queue != NULL);
 502 }
 503
 504 /*
 505  *      _set_delayed_call_timer:
 506  *
 507  *      Reset the timer so that it
 508  *      next expires when the entry is due.
 509  *
 510  *      Called with thread_call_lock held.
 511  */
 512 static __inline__ void
 513 _set_delayed_call_timer(
 514     thread_call_t               call,
 515         thread_call_group_t     group)
 516 {
 517         uint64_t leeway;
 518
 519         assert((call->tc_soft_deadline != 0) && ((call->tc_soft_deadline <= call->tc_call.deadline)));
 520
 521         leeway = call->tc_call.deadline - call->tc_soft_deadline;
 522         timer_call_enter_with_leeway(&group->delayed_timer, NULL,
 523             call->tc_soft_deadline, leeway,
 524             TIMER_CALL_SYS_CRITICAL|TIMER_CALL_LEEWAY,
 525             ((call->tc_soft_deadline & 0x1) == 0x1));
 526 }
 527
 528 /*
 529  *      _remove_from_pending_queue:
 530  *
 531  *      Remove the first (or all) matching
 532  *      entries from the pending queue.
 533  *
 534  *      Returns TRUE if any matching entries
 535  *      were found.
 536  *
 537  *      Called with thread_call_lock held.
 538  */
 539 static boolean_t
 540 _remove_from_pending_queue(
 541     thread_call_func_t          func,
 542     thread_call_param_t         param0,
 543     boolean_t                           remove_all)
 544 {
 545         boolean_t                               call_removed = FALSE;
 546         thread_call_t                   call;
 547         thread_call_group_t             group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH];
 548
 549         call = TC(queue_first(&group->pending_queue));
 550
 551         while (!queue_end(&group->pending_queue, qe(call))) {
 552                 if (call->tc_call.func == func &&
 553                                 call->tc_call.param0 == param0) {
 554                         thread_call_t   next = TC(queue_next(qe(call)));
 555
 556                         _call_dequeue(call, group);
 557
 558                         _internal_call_release(call);
 559
 560                         call_removed = TRUE;
 561                         if (!remove_all)
 562                                 break;
 563
 564                         call = next;
 565                 }
 566                 else
 567                         call = TC(queue_next(qe(call)));
 568         }
 569
 570         return (call_removed);
 571 }
 572
 573 /*
 574  *      _remove_from_delayed_queue:
 575  *
 576  *      Remove the first (or all) matching
 577  *      entries from the delayed queue.
 578  *
 579  *      Returns TRUE if any matching entries
 580  *      were found.
 581  *
 582  *      Called with thread_call_lock held.
 583  */
 584 static boolean_t
 585 _remove_from_delayed_queue(
 586     thread_call_func_t          func,
 587     thread_call_param_t         param0,
 588     boolean_t                           remove_all)
 589 {
 590         boolean_t                       call_removed = FALSE;
 591         thread_call_t                   call;
 592         thread_call_group_t             group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH];
 593
 594         call = TC(queue_first(&group->delayed_queue));
 595
 596         while (!queue_end(&group->delayed_queue, qe(call))) {
 597                 if (call->tc_call.func == func  &&
 598                                 call->tc_call.param0 == param0) {
 599                         thread_call_t   next = TC(queue_next(qe(call)));
 600
 601                         _call_dequeue(call, group);
 602
 603                         _internal_call_release(call);
 604
 605                         call_removed = TRUE;
 606                         if (!remove_all)
 607                                 break;
 608
 609                         call = next;
 610                 }
 611                 else
 612                         call = TC(queue_next(qe(call)));
 613         }
 614
 615         return (call_removed);
 616 }
 617
 618 /*
 619  *      thread_call_func_delayed:
 620  *
 621  *      Enqueue a function callout to
 622  *      occur at the stated time.
 623  */
 624 void
 625 thread_call_func_delayed(
 626                 thread_call_func_t              func,
 627                 thread_call_param_t             param,
 628                 uint64_t                        deadline)
 629 {
 630         (void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, 0, 0);
 631 }
 632
 633 /*
 634  * thread_call_func_delayed_with_leeway:
 635  *
 636  * Same as thread_call_func_delayed(), but with
 637  * leeway/flags threaded through.
 638  */
 639
 640 void
 641 thread_call_func_delayed_with_leeway(
 642         thread_call_func_t              func,
 643         thread_call_param_t             param,
 644         uint64_t                deadline,
 645         uint64_t                leeway,
 646         uint32_t                flags)
 647 {
 648         (void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, leeway, flags);
 649 }
 650
 651 /*
 652  *      thread_call_func_cancel:
 653  *
 654  *      Dequeue a function callout.
 655  *
 656  *      Removes one (or all) { function, argument }
 657  *      instance(s) from either (or both)
 658  *      the pending and the delayed queue,
 659  *      in that order.
 660  *
 661  *      Returns TRUE if any calls were cancelled.
 662  */
 663 boolean_t
 664 thread_call_func_cancel(
 665                 thread_call_func_t              func,
 666                 thread_call_param_t             param,
 667                 boolean_t                       cancel_all)
 668 {
 669         boolean_t       result;
 670         spl_t           s;
 671
 672         s = splsched();
 673         thread_call_lock_spin();
 674
 675         if (cancel_all)
 676                 result = _remove_from_pending_queue(func, param, cancel_all) |
 677                         _remove_from_delayed_queue(func, param, cancel_all);
 678         else
 679                 result = _remove_from_pending_queue(func, param, cancel_all) ||
 680                         _remove_from_delayed_queue(func, param, cancel_all);
 681
 682         thread_call_unlock();
 683         splx(s);
 684
 685         return (result);
 686 }
 687
 688 /*
 689  * Allocate a thread call with a given priority.  Importances
 690  * other than THREAD_CALL_PRIORITY_HIGH will be run in threads
 691  * with eager preemption enabled (i.e. may be aggressively preempted
 692  * by higher-priority threads which are not in the normal "urgent" bands).
 693  */
 694 thread_call_t
 695 thread_call_allocate_with_priority(
 696                 thread_call_func_t              func,
 697                 thread_call_param_t             param0,
 698                 thread_call_priority_t          pri)
 699 {
 700         thread_call_t call;
 701
 702         if (pri > THREAD_CALL_PRIORITY_LOW) {
 703                 panic("Invalid pri: %d\n", pri);
 704         }
 705
 706         call = thread_call_allocate(func, param0);
 707         call->tc_pri = pri;
 708
 709         return call;
 710 }
 711
 712 /*
 713  *      thread_call_allocate:
 714  *
 715  *      Allocate a callout entry.
 716  */
 717 thread_call_t
 718 thread_call_allocate(
 719                 thread_call_func_t              func,
 720                 thread_call_param_t             param0)
 721 {
 722         thread_call_t   call = zalloc(thread_call_zone);
 723
 724         thread_call_setup(call, func, param0);
 725         call->tc_refs = 1;
 726         call->tc_flags = THREAD_CALL_ALLOC;
 727
 728         return (call);
 729 }
 730
 731 /*
 732  *      thread_call_free:
 733  *
 734  *      Release a callout.  If the callout is currently
 735  *      executing, it will be freed when all invocations
 736  *      finish.
 737  */
 738 boolean_t
 739 thread_call_free(
 740                 thread_call_t           call)
 741 {
 742         spl_t   s;
 743         int32_t refs;
 744
 745         s = splsched();
 746         thread_call_lock_spin();
 747
 748         if (call->tc_call.queue != NULL) {
 749                 thread_call_unlock();
 750                 splx(s);
 751
 752                 return (FALSE);
 753         }
 754
 755         refs = --call->tc_refs;
 756         if (refs < 0) {
 757                 panic("Refcount negative: %d\n", refs);
 758         }
 759
 760         thread_call_unlock();
 761         splx(s);
 762
 763         if (refs == 0) {
 764                 zfree(thread_call_zone, call);
 765         }
 766
 767         return (TRUE);
 768 }
 769
 770 /*
 771  *      thread_call_enter:
 772  *
 773  *      Enqueue a callout entry to occur "soon".
 774  *
 775  *      Returns TRUE if the call was
 776  *      already on a queue.
 777  */
 778 boolean_t
 779 thread_call_enter(
 780                 thread_call_t           call)
 781 {
 782         boolean_t               result = TRUE;
 783         thread_call_group_t     group;
 784         spl_t                   s;
 785
 786         group = thread_call_get_group(call);
 787
 788         s = splsched();
 789         thread_call_lock_spin();
 790
 791         if (call->tc_call.queue != &group->pending_queue) {
 792                 result = _pending_call_enqueue(call, group);
 793         }
 794
 795         call->tc_call.param1 = 0;
 796
 797         thread_call_unlock();
 798         splx(s);
 799
 800         return (result);
 801 }
 802
 803 boolean_t
 804 thread_call_enter1(
 805                 thread_call_t                   call,
 806                 thread_call_param_t             param1)
 807 {
 808         boolean_t               result = TRUE;
 809         thread_call_group_t     group;
 810         spl_t                   s;
 811
 812         group = thread_call_get_group(call);
 813
 814         s = splsched();
 815         thread_call_lock_spin();
 816
 817         if (call->tc_call.queue != &group->pending_queue) {
 818                 result = _pending_call_enqueue(call, group);
 819         }
 820
 821         call->tc_call.param1 = param1;
 822
 823         thread_call_unlock();
 824         splx(s);
 825
 826         return (result);
 827 }
 828
 829 /*
 830  *      thread_call_enter_delayed:
 831  *
 832  *      Enqueue a callout entry to occur
 833  *      at the stated time.
 834  *
 835  *      Returns TRUE if the call was
 836  *      already on a queue.
 837  */
 838 boolean_t
 839 thread_call_enter_delayed(
 840                 thread_call_t           call,
 841                 uint64_t                deadline)
 842 {
 843         assert(call);
 844         return thread_call_enter_delayed_internal(call, NULL, 0, 0, deadline, 0, 0);
 845 }
 846
 847 boolean_t
 848 thread_call_enter1_delayed(
 849                 thread_call_t                   call,
 850                 thread_call_param_t             param1,
 851                 uint64_t                        deadline)
 852 {
 853         assert(call);
 854         return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, 0, 0);
 855 }
 856
 857 boolean_t
 858 thread_call_enter_delayed_with_leeway(
 859                 thread_call_t           call,
 860                 thread_call_param_t     param1,
 861                 uint64_t                deadline,
 862                 uint64_t                leeway,
 863                 unsigned int            flags)
 864 {
 865         assert(call);
 866         return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, leeway, flags);
 867 }
 868
 869
 870 /*
 871  * thread_call_enter_delayed_internal:
 872  * enqueue a callout entry to occur at the stated time
 873  *
 874  * Returns True if the call was already on a queue
 875  * params:
 876  * call     - structure encapsulating state of the callout
 877  * alt_func/alt_param0 - if call is NULL, allocate temporary storage using these parameters
 878  * deadline - time deadline in nanoseconds
 879  * leeway   - timer slack represented as delta of deadline.
 880  * flags    - THREAD_CALL_DELAY_XXX : classification of caller's desires wrt timer coalescing.
 881  *            THREAD_CALL_DELAY_LEEWAY : value in leeway is used for timer coalescing.
 882  */
 883 boolean_t
 884 thread_call_enter_delayed_internal(
 885                 thread_call_t           call,
 886                 thread_call_func_t      alt_func,
 887                 thread_call_param_t     alt_param0,
 888                 thread_call_param_t     param1,
 889                 uint64_t                deadline,
 890                 uint64_t                leeway,
 891                 unsigned int            flags)
 892 {
 893         boolean_t               result = TRUE;
 894         thread_call_group_t     group;
 895         spl_t                   s;
 896         uint64_t                abstime, sdeadline, slop;
 897         uint32_t                urgency;
 898
 899         /* direct mapping between thread_call, timer_call, and timeout_urgency values */
 900         urgency = (flags & TIMEOUT_URGENCY_MASK);
 901
 902         s = splsched();
 903         thread_call_lock_spin();
 904
 905         if (call == NULL) {
 906                 /* allocate a structure out of internal storage, as a convenience for BSD callers */
 907                 call = _internal_call_allocate(alt_func, alt_param0);
 908         }
 909
 910         group = thread_call_get_group(call);
 911         abstime =  mach_absolute_time();
 912
 913         call->tc_flags |= THREAD_CALL_DELAYED;
 914
 915         call->tc_soft_deadline = sdeadline = deadline;
 916
 917         boolean_t ratelimited = FALSE;
 918         slop = timer_call_slop(deadline, abstime, urgency, current_thread(), &ratelimited);
 919
 920         if ((flags & THREAD_CALL_DELAY_LEEWAY) != 0 && leeway > slop)
 921                 slop = leeway;
 922
 923         if (UINT64_MAX - deadline <= slop)
 924                 deadline = UINT64_MAX;
 925         else
 926                 deadline += slop;
 927
 928         /* Bit 0 of the "soft" deadline indicates that
 929          * this particular callout requires rate-limiting
 930          * behaviour. Maintain the invariant deadline >= soft_deadline
 931          */
 932         deadline |= 1;
 933         if (ratelimited) {
 934                 call->tc_soft_deadline |= 0x1ULL;
 935         } else {
 936                 call->tc_soft_deadline &= ~0x1ULL;
 937         }
 938
 939         call->tc_call.param1 = param1;
 940         call->ttd = (sdeadline > abstime) ? (sdeadline - abstime) : 0;
 941
 942         result = _delayed_call_enqueue(call, group, deadline);
 943
 944         if (queue_first(&group->delayed_queue) == qe(call))
 945                 _set_delayed_call_timer(call, group);
 946
 947 #if CONFIG_DTRACE
 948         DTRACE_TMR5(thread_callout__create, thread_call_func_t, call->tc_call.func, uint64_t, (deadline - sdeadline), uint64_t, (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF), call);
 949 #endif
 950         thread_call_unlock();
 951         splx(s);
 952
 953         return (result);
 954 }
 955
 956 /*
 957  *      thread_call_cancel:
 958  *
 959  *      Dequeue a callout entry.
 960  *
 961  *      Returns TRUE if the call was
 962  *      on a queue.
 963  */
 964 boolean_t
 965 thread_call_cancel(
 966                 thread_call_t           call)
 967 {
 968         boolean_t               result, do_cancel_callout = FALSE;
 969         thread_call_group_t     group;
 970         spl_t                   s;
 971
 972         group = thread_call_get_group(call);
 973
 974         s = splsched();
 975         thread_call_lock_spin();
 976
 977         if ((call->tc_call.deadline != 0) &&
 978             (queue_first(&group->delayed_queue) == qe(call))) {
 979                 assert (call->tc_call.queue == &group->delayed_queue);
 980                 do_cancel_callout = TRUE;
 981         }
 982
 983         result = _call_dequeue(call, group);
 984
 985         if (do_cancel_callout) {
 986                 timer_call_cancel(&group->delayed_timer);
 987                 if (!queue_empty(&group->delayed_queue)) {
 988                         _set_delayed_call_timer(TC(queue_first(&group->delayed_queue)), group);
 989                 }
 990         }
 991
 992         thread_call_unlock();
 993         splx(s);
 994 #if CONFIG_DTRACE
 995         DTRACE_TMR4(thread_callout__cancel, thread_call_func_t, call->tc_call.func, 0, (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF));
 996 #endif
 997
 998         return (result);
 999 }
1000
1001 /*
1002  * Cancel a thread call.  If it cannot be cancelled (i.e.
1003  * is already in flight), waits for the most recent invocation
1004  * to finish.  Note that if clients re-submit this thread call,
1005  * it may still be pending or in flight when thread_call_cancel_wait
1006  * returns, but all requests to execute this work item prior
1007  * to the call to thread_call_cancel_wait will have finished.
1008  */
1009 boolean_t
1010 thread_call_cancel_wait(
1011                 thread_call_t           call)
1012 {
1013         boolean_t               result;
1014         thread_call_group_t     group;
1015
1016         if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
1017                 panic("%s: Can't wait on thread call whose storage I don't own.", __FUNCTION__);
1018         }
1019
1020         group = thread_call_get_group(call);
1021
1022         (void) splsched();
1023         thread_call_lock_spin();
1024
1025         result = _call_dequeue(call, group);
1026         if (result == FALSE) {
1027                 thread_call_wait_locked(call);
1028         }
1029
1030         thread_call_unlock();
1031         (void) spllo();
1032
1033         return result;
1034 }
1035
1036
1037 /*
1038  *      thread_call_wake:
1039  *
1040  *      Wake a call thread to service
1041  *      pending call entries.  May wake
1042  *      the daemon thread in order to
1043  *      create additional call threads.
1044  *
1045  *      Called with thread_call_lock held.
1046  *
1047  *      For high-priority group, only does wakeup/creation if there are no threads
1048  *      running.
1049  */
1050 static __inline__ void
1051 thread_call_wake(
1052         thread_call_group_t             group)
1053 {
1054         /*
1055          * New behavior: use threads if you've got 'em.
1056          * Traditional behavior: wake only if no threads running.
1057          */
1058         if (group_isparallel(group) || group->active_count == 0) {
1059                 if (wait_queue_wakeup_one(&group->idle_wqueue, NO_EVENT, THREAD_AWAKENED, -1) == KERN_SUCCESS) {
1060                         group->idle_count--; group->active_count++;
1061
1062                         if (group->idle_count == 0) {
1063                                 timer_call_cancel(&group->dealloc_timer);
1064                                 group->flags &= TCG_DEALLOC_ACTIVE;
1065                         }
1066                 } else {
1067                         if (!thread_call_daemon_awake && thread_call_group_should_add_thread(group)) {
1068                                 thread_call_daemon_awake = TRUE;
1069                                 wait_queue_wakeup_one(&daemon_wqueue, NO_EVENT, THREAD_AWAKENED, -1);
1070                         }
1071                 }
1072         }
1073 }
1074
1075 /*
1076  *      sched_call_thread:
1077  *
1078  *      Call out invoked by the scheduler.  Used only for high-priority
1079  *      thread call group.
1080  */
1081 static void
1082 sched_call_thread(
1083                 int                             type,
1084                 __unused        thread_t                thread)
1085 {
1086         thread_call_group_t             group;
1087
1088         group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH]; /* XXX */
1089
1090         thread_call_lock_spin();
1091
1092         switch (type) {
1093
1094                 case SCHED_CALL_BLOCK:
1095                         --group->active_count;
1096                         if (group->pending_count > 0)
1097                                 thread_call_wake(group);
1098                         break;
1099
1100                 case SCHED_CALL_UNBLOCK:
1101                         group->active_count++;
1102                         break;
1103         }
1104
1105         thread_call_unlock();
1106 }
1107
1108 /*
1109  * Interrupts disabled, lock held; returns the same way.
1110  * Only called on thread calls whose storage we own.  Wakes up
1111  * anyone who might be waiting on this work item and frees it
1112  * if the client has so requested.
1113  */
1114 static void
1115 thread_call_finish(thread_call_t call)
1116 {
1117         boolean_t dowake = FALSE;
1118
1119         call->tc_finish_count++;
1120         call->tc_refs--;
1121
1122         if ((call->tc_flags & THREAD_CALL_WAIT) != 0) {
1123                 dowake = TRUE;
1124                 call->tc_flags &= ~THREAD_CALL_WAIT;
1125
1126                 /*
1127                  * Dropping lock here because the sched call for the
1128                  * high-pri group can take the big lock from under
1129                  * a thread lock.
1130                  */
1131                 thread_call_unlock();
1132                 thread_wakeup((event_t)call);
1133                 thread_call_lock_spin();
1134         }
1135
1136         if (call->tc_refs == 0) {
1137                 if (dowake) {
1138                         panic("Someone waiting on a thread call that is scheduled for free: %p\n", call->tc_call.func);
1139                 }
1140
1141                 enable_ints_and_unlock();
1142
1143                 zfree(thread_call_zone, call);
1144
1145                 (void)disable_ints_and_lock();
1146         }
1147
1148 }
1149
1150 /*
1151  *      thread_call_thread:
1152  */
1153 static void
1154 thread_call_thread(
1155                 thread_call_group_t             group,
1156                 wait_result_t                   wres)
1157 {
1158         thread_t        self = current_thread();
1159         boolean_t       canwait;
1160
1161         if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0)
1162                 (void)thread_set_tag_internal(self, THREAD_TAG_CALLOUT);
1163
1164         /*
1165          * A wakeup with THREAD_INTERRUPTED indicates that
1166          * we should terminate.
1167          */
1168         if (wres == THREAD_INTERRUPTED) {
1169                 thread_terminate(self);
1170
1171                 /* NOTREACHED */
1172                 panic("thread_terminate() returned?");
1173         }
1174
1175         (void)disable_ints_and_lock();
1176
1177         thread_sched_call(self, group->sched_call);
1178
1179         while (group->pending_count > 0) {
1180                 thread_call_t                   call;
1181                 thread_call_func_t              func;
1182                 thread_call_param_t             param0, param1;
1183
1184                 call = TC(dequeue_head(&group->pending_queue));
1185                 group->pending_count--;
1186
1187                 func = call->tc_call.func;
1188                 param0 = call->tc_call.param0;
1189                 param1 = call->tc_call.param1;
1190
1191                 call->tc_call.queue = NULL;
1192
1193                 _internal_call_release(call);
1194
1195                 /*
1196                  * Can only do wakeups for thread calls whose storage
1197                  * we control.
1198                  */
1199                 if ((call->tc_flags & THREAD_CALL_ALLOC) != 0) {
1200                         canwait = TRUE;
1201                         call->tc_refs++;        /* Delay free until we're done */
1202                 } else
1203                         canwait = FALSE;
1204
1205                 enable_ints_and_unlock();
1206
1207                 KERNEL_DEBUG_CONSTANT(
1208                                 MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_NONE,
1209                                 VM_KERNEL_UNSLIDE(func), param0, param1, 0, 0);
1210
1211 #if CONFIG_DTRACE
1212                 DTRACE_TMR6(thread_callout__start, thread_call_func_t, func, int, 0, int, (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF), (call->tc_flags & THREAD_CALL_DELAYED), call);
1213 #endif
1214
1215                 (*func)(param0, param1);
1216
1217 #if CONFIG_DTRACE
1218                 DTRACE_TMR6(thread_callout__end, thread_call_func_t, func, int, 0, int, (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF), (call->tc_flags & THREAD_CALL_DELAYED), call);
1219 #endif
1220
1221                 if (get_preemption_level() != 0) {
1222                         int pl = get_preemption_level();
1223                         panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
1224                                         pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
1225                 }
1226
1227                 (void)thread_funnel_set(self->funnel_lock, FALSE);              /* XXX */
1228
1229                 (void) disable_ints_and_lock();
1230
1231                 if (canwait) {
1232                         /* Frees if so desired */
1233                         thread_call_finish(call);
1234                 }
1235         }
1236
1237         thread_sched_call(self, NULL);
1238         group->active_count--;
1239
1240         if (self->callout_woken_from_icontext && !self->callout_woke_thread) {
1241                 ledger_credit(self->t_ledger, task_ledgers.interrupt_wakeups, 1);
1242                 if (self->callout_woken_from_platform_idle)
1243                         ledger_credit(self->t_ledger, task_ledgers.platform_idle_wakeups, 1);
1244         }
1245
1246         self->callout_woken_from_icontext = FALSE;
1247         self->callout_woken_from_platform_idle = FALSE;
1248         self->callout_woke_thread = FALSE;
1249
1250         if (group_isparallel(group)) {
1251                 /*
1252                  * For new style of thread group, thread always blocks.
1253                  * If we have more than the target number of threads,
1254                  * and this is the first to block, and it isn't active
1255                  * already, set a timer for deallocating a thread if we
1256                  * continue to have a surplus.
1257                  */
1258                 group->idle_count++;
1259
1260                 if (group->idle_count == 1) {
1261                         group->idle_timestamp = mach_absolute_time();
1262                 }
1263
1264                 if (((group->flags & TCG_DEALLOC_ACTIVE) == 0) &&
1265                                 ((group->active_count + group->idle_count) > group->target_thread_count)) {
1266                         group->flags |= TCG_DEALLOC_ACTIVE;
1267                         thread_call_start_deallocate_timer(group);
1268                 }
1269
1270                 /* Wait for more work (or termination) */
1271                 wres = wait_queue_assert_wait(&group->idle_wqueue, NO_EVENT, THREAD_INTERRUPTIBLE, 0);
1272                 if (wres != THREAD_WAITING) {
1273                         panic("kcall worker unable to assert wait?");
1274                 }
1275
1276                 enable_ints_and_unlock();
1277
1278                 thread_block_parameter((thread_continue_t)thread_call_thread, group);
1279         } else {
1280                 if (group->idle_count < group->target_thread_count) {
1281                         group->idle_count++;
1282
1283                         wait_queue_assert_wait(&group->idle_wqueue, NO_EVENT, THREAD_UNINT, 0); /* Interrupted means to exit */
1284
1285                         enable_ints_and_unlock();
1286
1287                         thread_block_parameter((thread_continue_t)thread_call_thread, group);
1288                         /* NOTREACHED */
1289                 }
1290         }
1291
1292         enable_ints_and_unlock();
1293
1294         thread_terminate(self);
1295         /* NOTREACHED */
1296 }
1297
1298 /*
1299  *      thread_call_daemon: walk list of groups, allocating
1300  *      threads if appropriate (as determined by
1301  *      thread_call_group_should_add_thread()).
1302  */
1303 static void
1304 thread_call_daemon_continue(__unused void *arg)
1305 {
1306         int             i;
1307         kern_return_t   kr;
1308         thread_call_group_t group;
1309
1310         (void)disable_ints_and_lock();
1311
1312         /* Starting at zero happens to be high-priority first. */
1313         for (i = 0; i < THREAD_CALL_GROUP_COUNT; i++) {
1314                 group = &thread_call_groups[i];
1315                 while (thread_call_group_should_add_thread(group)) {
1316                         group->active_count++;
1317
1318                         enable_ints_and_unlock();
1319
1320                         kr = thread_call_thread_create(group);
1321                         if (kr != KERN_SUCCESS) {
1322                                 /*
1323                                  * On failure, just pause for a moment and give up.
1324                                  * We can try again later.
1325                                  */
1326                                 delay(10000); /* 10 ms */
1327                                 (void)disable_ints_and_lock();
1328                                 goto out;
1329                         }
1330
1331                         (void)disable_ints_and_lock();
1332                 }
1333         }
1334
1335 out:
1336         thread_call_daemon_awake = FALSE;
1337         wait_queue_assert_wait(&daemon_wqueue, NO_EVENT, THREAD_UNINT, 0);
1338
1339         enable_ints_and_unlock();
1340
1341         thread_block_parameter((thread_continue_t)thread_call_daemon_continue, NULL);
1342         /* NOTREACHED */
1343 }
1344
1345 static void
1346 thread_call_daemon(
1347                 __unused void    *arg)
1348 {
1349         thread_t        self = current_thread();
1350
1351         self->options |= TH_OPT_VMPRIV;
1352         vm_page_free_reserve(2);        /* XXX */
1353
1354         thread_call_daemon_continue(NULL);
1355         /* NOTREACHED */
1356 }
1357
1358 /*
1359  * Schedule timer to deallocate a worker thread if we have a surplus
1360  * of threads (in excess of the group's target) and at least one thread
1361  * is idle the whole time.
1362  */
1363 static void
1364 thread_call_start_deallocate_timer(
1365                 thread_call_group_t group)
1366 {
1367         uint64_t deadline;
1368         boolean_t onqueue;
1369
1370         assert(group->idle_count > 0);
1371
1372         group->flags |= TCG_DEALLOC_ACTIVE;
1373         deadline = group->idle_timestamp + thread_call_dealloc_interval_abs;
1374         onqueue = timer_call_enter(&group->dealloc_timer, deadline, 0);
1375
1376         if (onqueue) {
1377                 panic("Deallocate timer already active?");
1378         }
1379 }
1380
1381 void
1382 thread_call_delayed_timer(
1383                 timer_call_param_t              p0,
1384                 __unused timer_call_param_t     p1
1385 )
1386 {
1387         thread_call_t                   call;
1388         thread_call_group_t             group = p0;
1389         uint64_t                        timestamp;
1390
1391         thread_call_lock_spin();
1392
1393         timestamp = mach_absolute_time();
1394
1395         call = TC(queue_first(&group->delayed_queue));
1396
1397         while (!queue_end(&group->delayed_queue, qe(call))) {
1398                 if (call->tc_soft_deadline <= timestamp) {
1399                         /* Bit 0 of the "soft" deadline indicates that
1400                          * this particular callout is rate-limited
1401                          * and hence shouldn't be processed before its
1402                          * hard deadline. Rate limited timers aren't
1403                          * skipped when a forcible reevaluation is in progress.
1404                          */
1405                         if ((call->tc_soft_deadline & 0x1) &&
1406                             (CE(call)->deadline > timestamp) &&
1407                             (ml_timer_forced_evaluation() == FALSE)) {
1408                                 break;
1409                         }
1410                         _pending_call_enqueue(call, group);
1411                 } /* TODO, identify differentially coalesced timers */
1412                 else
1413                         break;
1414
1415                 call = TC(queue_first(&group->delayed_queue));
1416         }
1417
1418         if (!queue_end(&group->delayed_queue, qe(call)))
1419                 _set_delayed_call_timer(call, group);
1420
1421         thread_call_unlock();
1422 }
1423
1424 static void
1425 thread_call_delayed_timer_rescan(timer_call_param_t             p0, __unused timer_call_param_t p1)
1426 {
1427         thread_call_t                   call;
1428         thread_call_group_t             group = p0;
1429         uint64_t                                timestamp;
1430         boolean_t               istate;
1431
1432         istate = ml_set_interrupts_enabled(FALSE);
1433         thread_call_lock_spin();
1434
1435         assert(ml_timer_forced_evaluation() == TRUE);
1436         timestamp = mach_absolute_time();
1437
1438         call = TC(queue_first(&group->delayed_queue));
1439
1440         while (!queue_end(&group->delayed_queue, qe(call))) {
1441                 if (call->tc_soft_deadline <= timestamp) {
1442                         _pending_call_enqueue(call, group);
1443                         call = TC(queue_first(&group->delayed_queue));
1444                 }
1445                 else {
1446                         uint64_t skew = call->tc_call.deadline - call->tc_soft_deadline;
1447                         assert (call->tc_call.deadline >= call->tc_soft_deadline);
1448                         /* On a latency quality-of-service level change,
1449                          * re-sort potentially rate-limited callout. The platform
1450                          * layer determines which timers require this.
1451                          */
1452                         if (timer_resort_threshold(skew)) {
1453                                 _call_dequeue(call, group);
1454                                 _delayed_call_enqueue(call, group, call->tc_soft_deadline);
1455                         }
1456                         call = TC(queue_next(qe(call)));
1457                 }
1458         }
1459
1460         if (!queue_empty(&group->delayed_queue))
1461                 _set_delayed_call_timer(TC(queue_first(&group->delayed_queue)), group);
1462         thread_call_unlock();
1463         ml_set_interrupts_enabled(istate);
1464 }
1465
1466 void
1467 thread_call_delayed_timer_rescan_all(void) {
1468         thread_call_delayed_timer_rescan((timer_call_param_t)&thread_call_groups[THREAD_CALL_PRIORITY_LOW], NULL);
1469         thread_call_delayed_timer_rescan((timer_call_param_t)&thread_call_groups[THREAD_CALL_PRIORITY_USER], NULL);
1470         thread_call_delayed_timer_rescan((timer_call_param_t)&thread_call_groups[THREAD_CALL_PRIORITY_KERNEL], NULL);
1471         thread_call_delayed_timer_rescan((timer_call_param_t)&thread_call_groups[THREAD_CALL_PRIORITY_HIGH], NULL);
1472 }
1473
1474 /*
1475  * Timer callback to tell a thread to terminate if
1476  * we have an excess of threads and at least one has been
1477  * idle for a long time.
1478  */
1479 static void
1480 thread_call_dealloc_timer(
1481                 timer_call_param_t              p0,
1482                 __unused timer_call_param_t     p1)
1483 {
1484         thread_call_group_t group = (thread_call_group_t)p0;
1485         uint64_t now;
1486         kern_return_t res;
1487         boolean_t terminated = FALSE;
1488
1489         thread_call_lock_spin();
1490
1491         now = mach_absolute_time();
1492         if (group->idle_count > 0) {
1493                 if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
1494                         terminated = TRUE;
1495                         group->idle_count--;
1496                         res = wait_queue_wakeup_one(&group->idle_wqueue, NO_EVENT, THREAD_INTERRUPTED, -1);
1497                         if (res != KERN_SUCCESS) {
1498                                 panic("Unable to wake up idle thread for termination?");
1499                         }
1500                 }
1501
1502         }
1503
1504         /*
1505          * If we still have an excess of threads, schedule another
1506          * invocation of this function.
1507          */
1508         if (group->idle_count > 0 && (group->idle_count + group->active_count > group->target_thread_count)) {
1509                 /*
1510                  * If we killed someone just now, push out the
1511                  * next deadline.
1512                  */
1513                 if (terminated) {
1514                         group->idle_timestamp = now;
1515                 }
1516
1517                 thread_call_start_deallocate_timer(group);
1518         } else {
1519                 group->flags &= ~TCG_DEALLOC_ACTIVE;
1520         }
1521
1522         thread_call_unlock();
1523 }
1524
1525 /*
1526  * Wait for all requested invocations of a thread call prior to now
1527  * to finish.  Can only be invoked on thread calls whose storage we manage.
1528  * Just waits for the finish count to catch up to the submit count we find
1529  * at the beginning of our wait.
1530  */
1531 static void
1532 thread_call_wait_locked(thread_call_t call)
1533 {
1534         uint64_t submit_count;
1535         wait_result_t res;
1536
1537         assert(call->tc_flags & THREAD_CALL_ALLOC);
1538
1539         submit_count = call->tc_submit_count;
1540
1541         while (call->tc_finish_count < submit_count) {
1542                 call->tc_flags |= THREAD_CALL_WAIT;
1543
1544                 res = assert_wait(call, THREAD_UNINT);
1545                 if (res != THREAD_WAITING) {
1546                         panic("Unable to assert wait?");
1547                 }
1548
1549                 thread_call_unlock();
1550                 (void) spllo();
1551
1552                 res = thread_block(NULL);
1553                 if (res != THREAD_AWAKENED) {
1554                         panic("Awoken with %d?", res);
1555                 }
1556
1557                 (void) splsched();
1558                 thread_call_lock_spin();
1559         }
1560 }
1561
1562 /*
1563  * Determine whether a thread call is either on a queue or
1564  * currently being executed.
1565  */
1566 boolean_t
1567 thread_call_isactive(thread_call_t call)
1568 {
1569         boolean_t active;
1570
1571         disable_ints_and_lock();
1572         active = (call->tc_submit_count > call->tc_finish_count);
1573         enable_ints_and_unlock();
1574
1575         return active;
1576 }