osfmk/kern/thread_call.c

   1 /*
   2  * Copyright (c) 1993-1995, 1999-2008 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <mach/mach_types.h>
  30 #include <mach/thread_act.h>
  31
  32 #include <kern/kern_types.h>
  33 #include <kern/zalloc.h>
  34 #include <kern/sched_prim.h>
  35 #include <kern/clock.h>
  36 #include <kern/task.h>
  37 #include <kern/thread.h>
  38 #include <kern/waitq.h>
  39 #include <kern/ledger.h>
  40
  41 #include <vm/vm_pageout.h>
  42
  43 #include <kern/thread_call.h>
  44 #include <kern/call_entry.h>
  45 #include <kern/timer_call.h>
  46
  47 #include <libkern/OSAtomic.h>
  48 #include <kern/timer_queue.h>
  49
  50 #include <sys/kdebug.h>
  51 #if CONFIG_DTRACE
  52 #include <mach/sdt.h>
  53 #endif
  54 #include <machine/machine_routines.h>
  55
  56 static zone_t                   thread_call_zone;
  57 static struct waitq             daemon_waitq;
  58
  59 struct thread_call_group {
  60         queue_head_t            pending_queue;
  61         uint32_t                pending_count;
  62
  63         queue_head_t            delayed_queue;
  64         uint32_t                delayed_count;
  65
  66         timer_call_data_t       delayed_timer;
  67         timer_call_data_t       dealloc_timer;
  68
  69         struct waitq            idle_waitq;
  70         uint32_t                idle_count, active_count;
  71
  72         integer_t               pri;
  73         uint32_t                target_thread_count;
  74         uint64_t                idle_timestamp;
  75
  76         uint32_t                flags;
  77         sched_call_t            sched_call;
  78 };
  79
  80 typedef struct thread_call_group        *thread_call_group_t;
  81
  82 #define TCG_PARALLEL            0x01
  83 #define TCG_DEALLOC_ACTIVE      0x02
  84 #define TCG_CONTINUOUS      0x04
  85
  86 #define THREAD_CALL_PRIO_COUNT          4
  87 #define THREAD_CALL_ABSTIME_COUNT       4
  88 #define THREAD_CALL_CONTTIME_COUNT      4
  89 #define THREAD_CALL_GROUP_COUNT         (THREAD_CALL_CONTTIME_COUNT + THREAD_CALL_ABSTIME_COUNT)
  90 #define THREAD_CALL_THREAD_MIN          4
  91 #define INTERNAL_CALL_COUNT             768
  92 #define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * 1000 * 1000) /* 5 ms */
  93 #define THREAD_CALL_ADD_RATIO           4
  94 #define THREAD_CALL_MACH_FACTOR_CAP     3
  95
  96 #define IS_CONT_GROUP(group) \
  97         (((group)->flags & TCG_CONTINUOUS) ? TRUE : FALSE)
  98
  99 // groups [0..4]: thread calls in mach_absolute_time
 100 // groups [4..8]: thread calls in mach_continuous_time
 101 static struct thread_call_group thread_call_groups[THREAD_CALL_GROUP_COUNT];
 102
 103 static struct thread_call_group *abstime_thread_call_groups;
 104 static struct thread_call_group *conttime_thread_call_groups;
 105
 106 static boolean_t                thread_call_daemon_awake;
 107 static thread_call_data_t       internal_call_storage[INTERNAL_CALL_COUNT];
 108 static queue_head_t             thread_call_internal_queue;
 109 int                                             thread_call_internal_queue_count = 0;
 110 static uint64_t                 thread_call_dealloc_interval_abs;
 111
 112 static __inline__ thread_call_t _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0);
 113 static __inline__ void          _internal_call_release(thread_call_t call);
 114 static __inline__ boolean_t     _pending_call_enqueue(thread_call_t call, thread_call_group_t group);
 115 static __inline__ boolean_t     _delayed_call_enqueue(thread_call_t call, thread_call_group_t group, uint64_t deadline);
 116 static __inline__ boolean_t     _call_dequeue(thread_call_t call, thread_call_group_t group);
 117 static __inline__ void          thread_call_wake(thread_call_group_t group);
 118 static __inline__ void          _set_delayed_call_timer(thread_call_t call, thread_call_group_t group);
 119 static boolean_t                _remove_from_pending_queue(thread_call_func_t func, thread_call_param_t param0, boolean_t remove_all);
 120 static boolean_t                _remove_from_delayed_queue(thread_call_func_t func, thread_call_param_t param0, boolean_t remove_all);
 121 static void                     thread_call_daemon(void *arg);
 122 static void                     thread_call_thread(thread_call_group_t group, wait_result_t wres);
 123 extern void                     thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1);
 124 static void                     thread_call_dealloc_timer(timer_call_param_t p0, timer_call_param_t p1);
 125 static void                     thread_call_group_setup(thread_call_group_t group, thread_call_priority_t pri, uint32_t target_thread_count, boolean_t parallel, boolean_t continuous);
 126 static void                     sched_call_thread(int type, thread_t thread);
 127 static void                     thread_call_start_deallocate_timer(thread_call_group_t group);
 128 static void                     thread_call_wait_locked(thread_call_t call);
 129 static boolean_t                thread_call_enter_delayed_internal(thread_call_t call,
 130                                                 thread_call_func_t alt_func, thread_call_param_t alt_param0,
 131                                                 thread_call_param_t param1, uint64_t deadline,
 132                                                 uint64_t leeway, unsigned int flags);
 133
 134 #define qe(x)           ((queue_entry_t)(x))
 135 #define TC(x)           ((thread_call_t)(x))
 136
 137
 138 lck_grp_t               thread_call_queues_lck_grp;
 139 lck_grp_t               thread_call_lck_grp;
 140 lck_attr_t              thread_call_lck_attr;
 141 lck_grp_attr_t          thread_call_lck_grp_attr;
 142
 143 lck_mtx_t               thread_call_lock_data;
 144
 145
 146 #define thread_call_lock_spin()                 \
 147         lck_mtx_lock_spin_always(&thread_call_lock_data)
 148
 149 #define thread_call_unlock()                    \
 150         lck_mtx_unlock_always(&thread_call_lock_data)
 151
 152 extern boolean_t        mach_timer_coalescing_enabled;
 153
 154 static inline spl_t
 155 disable_ints_and_lock(void)
 156 {
 157         spl_t s;
 158
 159         s = splsched();
 160         thread_call_lock_spin();
 161
 162         return s;
 163 }
 164
 165 static inline void
 166 enable_ints_and_unlock(spl_t s)
 167 {
 168         thread_call_unlock();
 169         splx(s);
 170 }
 171
 172
 173 static inline boolean_t
 174 group_isparallel(thread_call_group_t group)
 175 {
 176         return ((group->flags & TCG_PARALLEL) != 0);
 177 }
 178
 179 static boolean_t
 180 thread_call_group_should_add_thread(thread_call_group_t group)
 181 {
 182         uint32_t thread_count;
 183
 184         if (!group_isparallel(group)) {
 185                 if (group->pending_count > 0 && group->active_count == 0) {
 186                         return TRUE;
 187                 }
 188
 189                 return FALSE;
 190         }
 191
 192         if (group->pending_count > 0) {
 193                 if (group->idle_count > 0) {
 194                         panic("Pending work, but threads are idle?");
 195                 }
 196
 197                 thread_count = group->active_count;
 198
 199                 /*
 200                  * Add a thread if either there are no threads,
 201                  * the group has fewer than its target number of
 202                  * threads, or the amount of work is large relative
 203                  * to the number of threads.  In the last case, pay attention
 204                  * to the total load on the system, and back off if
 205          * it's high.
 206                  */
 207                 if ((thread_count == 0) ||
 208                         (thread_count < group->target_thread_count) ||
 209                         ((group->pending_count > THREAD_CALL_ADD_RATIO * thread_count) &&
 210                          (sched_mach_factor < THREAD_CALL_MACH_FACTOR_CAP))) {
 211                         return TRUE;
 212                 }
 213         }
 214
 215         return FALSE;
 216 }
 217
 218 static inline integer_t
 219 thread_call_priority_to_sched_pri(thread_call_priority_t pri)
 220 {
 221         switch (pri) {
 222         case THREAD_CALL_PRIORITY_HIGH:
 223                 return BASEPRI_PREEMPT;
 224         case THREAD_CALL_PRIORITY_KERNEL:
 225                 return BASEPRI_KERNEL;
 226         case THREAD_CALL_PRIORITY_USER:
 227                 return BASEPRI_DEFAULT;
 228         case THREAD_CALL_PRIORITY_LOW:
 229                 return MAXPRI_THROTTLE;
 230         default:
 231                 panic("Invalid priority.");
 232         }
 233
 234         return 0;
 235 }
 236
 237 /* Lock held */
 238 static inline thread_call_group_t
 239 thread_call_get_group(
 240                 thread_call_t call)
 241 {
 242         thread_call_priority_t  pri = call->tc_pri;
 243
 244         assert(pri == THREAD_CALL_PRIORITY_LOW ||
 245                         pri == THREAD_CALL_PRIORITY_USER ||
 246                         pri == THREAD_CALL_PRIORITY_KERNEL ||
 247                         pri == THREAD_CALL_PRIORITY_HIGH);
 248
 249         thread_call_group_t group;
 250
 251         if(call->tc_flags & THREAD_CALL_CONTINUOUS) {
 252                 group = &conttime_thread_call_groups[pri];
 253         } else {
 254                 group = &abstime_thread_call_groups[pri];
 255         }
 256
 257         assert(IS_CONT_GROUP(group) == ((call->tc_flags & THREAD_CALL_CONTINUOUS) ? TRUE : FALSE));
 258         return group;
 259 }
 260
 261 static void
 262 thread_call_group_setup(
 263                 thread_call_group_t             group,
 264                 thread_call_priority_t          pri,
 265                 uint32_t                        target_thread_count,
 266                 boolean_t                       parallel,
 267                 boolean_t                       continuous)
 268 {
 269         queue_init(&group->pending_queue);
 270         queue_init(&group->delayed_queue);
 271
 272         timer_call_setup(&group->delayed_timer, thread_call_delayed_timer, group);
 273         timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
 274
 275         waitq_init(&group->idle_waitq, SYNC_POLICY_FIFO|SYNC_POLICY_DISABLE_IRQ);
 276
 277         group->target_thread_count = target_thread_count;
 278         group->pri = thread_call_priority_to_sched_pri(pri);
 279
 280         group->sched_call = sched_call_thread;
 281         if (parallel) {
 282                 group->flags |= TCG_PARALLEL;
 283                 group->sched_call = NULL;
 284         }
 285
 286         if(continuous) {
 287                 group->flags |= TCG_CONTINUOUS;
 288         }
 289 }
 290
 291 /*
 292  * Simple wrapper for creating threads bound to
 293  * thread call groups.
 294  */
 295 static kern_return_t
 296 thread_call_thread_create(
 297                 thread_call_group_t             group)
 298 {
 299         thread_t thread;
 300         kern_return_t result;
 301
 302         result = kernel_thread_start_priority((thread_continue_t)thread_call_thread, group, group->pri, &thread);
 303         if (result != KERN_SUCCESS) {
 304                 return result;
 305         }
 306
 307         if (group->pri < BASEPRI_PREEMPT) {
 308                 /*
 309                  * New style doesn't get to run to completion in
 310                  * kernel if there are higher priority threads
 311                  * available.
 312                  */
 313                 thread_set_eager_preempt(thread);
 314         }
 315
 316         thread_deallocate(thread);
 317         return KERN_SUCCESS;
 318 }
 319
 320 /*
 321  *      thread_call_initialize:
 322  *
 323  *      Initialize this module, called
 324  *      early during system initialization.
 325  */
 326 void
 327 thread_call_initialize(void)
 328 {
 329         thread_call_t                   call;
 330         kern_return_t                   result;
 331         thread_t                        thread;
 332         int                             i;
 333         spl_t                   s;
 334
 335         i = sizeof (thread_call_data_t);
 336         thread_call_zone = zinit(i, 4096 * i, 16 * i, "thread_call");
 337         zone_change(thread_call_zone, Z_CALLERACCT, FALSE);
 338         zone_change(thread_call_zone, Z_NOENCRYPT, TRUE);
 339
 340         abstime_thread_call_groups  = &thread_call_groups[0];
 341         conttime_thread_call_groups = &thread_call_groups[THREAD_CALL_ABSTIME_COUNT];
 342
 343         lck_attr_setdefault(&thread_call_lck_attr);
 344         lck_grp_attr_setdefault(&thread_call_lck_grp_attr);
 345         lck_grp_init(&thread_call_queues_lck_grp, "thread_call_queues", &thread_call_lck_grp_attr);
 346         lck_grp_init(&thread_call_lck_grp, "thread_call", &thread_call_lck_grp_attr);
 347         lck_mtx_init(&thread_call_lock_data, &thread_call_lck_grp, &thread_call_lck_attr);
 348         nanotime_to_absolutetime(0, THREAD_CALL_DEALLOC_INTERVAL_NS, &thread_call_dealloc_interval_abs);
 349         waitq_init(&daemon_waitq, SYNC_POLICY_DISABLE_IRQ | SYNC_POLICY_FIFO);
 350
 351         thread_call_group_setup(&abstime_thread_call_groups[THREAD_CALL_PRIORITY_LOW],      THREAD_CALL_PRIORITY_LOW,                       0, TRUE,  FALSE);
 352         thread_call_group_setup(&abstime_thread_call_groups[THREAD_CALL_PRIORITY_USER],     THREAD_CALL_PRIORITY_USER,                      0, TRUE,  FALSE);
 353         thread_call_group_setup(&abstime_thread_call_groups[THREAD_CALL_PRIORITY_KERNEL],   THREAD_CALL_PRIORITY_KERNEL,                    1, TRUE,  FALSE);
 354         thread_call_group_setup(&abstime_thread_call_groups[THREAD_CALL_PRIORITY_HIGH],     THREAD_CALL_PRIORITY_HIGH, THREAD_CALL_THREAD_MIN, FALSE, FALSE);
 355         thread_call_group_setup(&conttime_thread_call_groups[THREAD_CALL_PRIORITY_LOW],     THREAD_CALL_PRIORITY_LOW,                       0, TRUE,  TRUE);
 356         thread_call_group_setup(&conttime_thread_call_groups[THREAD_CALL_PRIORITY_USER],    THREAD_CALL_PRIORITY_USER,                      0, TRUE,  TRUE);
 357         thread_call_group_setup(&conttime_thread_call_groups[THREAD_CALL_PRIORITY_KERNEL],  THREAD_CALL_PRIORITY_KERNEL,                    0, TRUE,  TRUE);
 358         thread_call_group_setup(&conttime_thread_call_groups[THREAD_CALL_PRIORITY_HIGH],    THREAD_CALL_PRIORITY_HIGH,                      1, FALSE, TRUE);
 359
 360         s = disable_ints_and_lock();
 361
 362         queue_init(&thread_call_internal_queue);
 363         for (
 364                         call = internal_call_storage;
 365                         call < &internal_call_storage[INTERNAL_CALL_COUNT];
 366                         call++) {
 367
 368                 enqueue_tail(&thread_call_internal_queue, qe(call));
 369                 thread_call_internal_queue_count++;
 370         }
 371
 372         thread_call_daemon_awake = TRUE;
 373
 374         enable_ints_and_unlock(s);
 375
 376         result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon, NULL, BASEPRI_PREEMPT + 1, &thread);
 377         if (result != KERN_SUCCESS)
 378                 panic("thread_call_initialize");
 379
 380         thread_deallocate(thread);
 381 }
 382
 383 void
 384 thread_call_setup(
 385         thread_call_t                   call,
 386         thread_call_func_t              func,
 387         thread_call_param_t             param0)
 388 {
 389         bzero(call, sizeof(*call));
 390         call_entry_setup((call_entry_t)call, func, param0);
 391         call->tc_pri = THREAD_CALL_PRIORITY_HIGH; /* Default priority */
 392 }
 393
 394 /*
 395  *      _internal_call_allocate:
 396  *
 397  *      Allocate an internal callout entry.
 398  *
 399  *      Called with thread_call_lock held.
 400  */
 401 static __inline__ thread_call_t
 402 _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0)
 403 {
 404     thread_call_t               call;
 405
 406     if (queue_empty(&thread_call_internal_queue))
 407         panic("_internal_call_allocate");
 408
 409     call = TC(dequeue_head(&thread_call_internal_queue));
 410     thread_call_internal_queue_count--;
 411
 412     thread_call_setup(call, func, param0);
 413     call->tc_refs = 0;
 414     call->tc_flags = 0; /* THREAD_CALL_ALLOC not set, do not free back to zone */
 415
 416     return (call);
 417 }
 418
 419 /*
 420  *      _internal_call_release:
 421  *
 422  *      Release an internal callout entry which
 423  *      is no longer pending (or delayed). This is
 424  *      safe to call on a non-internal entry, in which
 425  *      case nothing happens.
 426  *
 427  *      Called with thread_call_lock held.
 428  */
 429 static __inline__ void
 430 _internal_call_release(
 431     thread_call_t               call)
 432 {
 433     if (    call >= internal_call_storage                                               &&
 434                     call < &internal_call_storage[INTERNAL_CALL_COUNT]          ) {
 435                 assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
 436                 enqueue_head(&thread_call_internal_queue, qe(call));
 437                 thread_call_internal_queue_count++;
 438         }
 439 }
 440
 441 /*
 442  *      _pending_call_enqueue:
 443  *
 444  *      Place an entry at the end of the
 445  *      pending queue, to be executed soon.
 446  *
 447  *      Returns TRUE if the entry was already
 448  *      on a queue.
 449  *
 450  *      Called with thread_call_lock held.
 451  */
 452 static __inline__ boolean_t
 453 _pending_call_enqueue(
 454     thread_call_t               call,
 455         thread_call_group_t     group)
 456 {
 457         queue_head_t            *old_queue;
 458
 459         old_queue = call_entry_enqueue_tail(CE(call), &group->pending_queue);
 460
 461         if (old_queue == NULL) {
 462                 call->tc_submit_count++;
 463         } else if (old_queue != &group->pending_queue &&
 464                            old_queue != &group->delayed_queue){
 465                 panic("tried to move a thread call (%p) between groups (old_queue: %p)", call, old_queue);
 466         }
 467
 468         group->pending_count++;
 469
 470         thread_call_wake(group);
 471
 472         return (old_queue != NULL);
 473 }
 474
 475 /*
 476  *      _delayed_call_enqueue:
 477  *
 478  *      Place an entry on the delayed queue,
 479  *      after existing entries with an earlier
 480  *      (or identical) deadline.
 481  *
 482  *      Returns TRUE if the entry was already
 483  *      on a queue.
 484  *
 485  *      Called with thread_call_lock held.
 486  */
 487 static __inline__ boolean_t
 488 _delayed_call_enqueue(
 489         thread_call_t           call,
 490         thread_call_group_t     group,
 491         uint64_t                deadline)
 492 {
 493         queue_head_t            *old_queue;
 494
 495         old_queue = call_entry_enqueue_deadline(CE(call), &group->delayed_queue, deadline);
 496
 497         if (old_queue == &group->pending_queue) {
 498                 group->pending_count--;
 499         } else if (old_queue == NULL) {
 500                 call->tc_submit_count++;
 501         } else if (old_queue == &group->delayed_queue) {
 502                 // we did nothing, and that's fine
 503         } else {
 504                 panic("tried to move a thread call (%p) between groups (old_queue: %p)", call, old_queue);
 505         }
 506
 507         return (old_queue != NULL);
 508 }
 509
 510 /*
 511  *      _call_dequeue:
 512  *
 513  *      Remove an entry from a queue.
 514  *
 515  *      Returns TRUE if the entry was on a queue.
 516  *
 517  *      Called with thread_call_lock held.
 518  */
 519 static __inline__ boolean_t
 520 _call_dequeue(
 521         thread_call_t           call,
 522         thread_call_group_t     group)
 523 {
 524         queue_head_t            *old_queue;
 525
 526         old_queue = call_entry_dequeue(CE(call));
 527
 528         if (old_queue != NULL) {
 529                 call->tc_finish_count++;
 530                 if (old_queue == &group->pending_queue)
 531                         group->pending_count--;
 532         }
 533
 534         return (old_queue != NULL);
 535 }
 536
 537 /*
 538  *      _set_delayed_call_timer:
 539  *
 540  *      Reset the timer so that it
 541  *      next expires when the entry is due.
 542  *
 543  *      Called with thread_call_lock held.
 544  */
 545 static __inline__ void
 546 _set_delayed_call_timer(
 547     thread_call_t               call,
 548         thread_call_group_t     group)
 549 {
 550         uint64_t leeway, fire_at;
 551
 552         assert((call->tc_soft_deadline != 0) && ((call->tc_soft_deadline <= call->tc_call.deadline)));
 553         assert(IS_CONT_GROUP(group) == ((call->tc_flags & THREAD_CALL_CONTINUOUS) ? TRUE : FALSE));
 554
 555         fire_at = call->tc_soft_deadline;
 556
 557         if (IS_CONT_GROUP(group)) {
 558                 fire_at = continuoustime_to_absolutetime(fire_at);
 559         }
 560
 561         leeway = call->tc_call.deadline - call->tc_soft_deadline;
 562         timer_call_enter_with_leeway(&group->delayed_timer, NULL,
 563             fire_at, leeway,
 564             TIMER_CALL_SYS_CRITICAL|TIMER_CALL_LEEWAY,
 565             ((call->tc_flags & THREAD_CALL_RATELIMITED) == THREAD_CALL_RATELIMITED));
 566 }
 567
 568 /*
 569  *      _remove_from_pending_queue:
 570  *
 571  *      Remove the first (or all) matching
 572  *      entries from the pending queue.
 573  *
 574  *      Returns TRUE if any matching entries
 575  *      were found.
 576  *
 577  *      Called with thread_call_lock held.
 578  */
 579 static boolean_t
 580 _remove_from_pending_queue(
 581     thread_call_func_t          func,
 582     thread_call_param_t         param0,
 583     boolean_t                           remove_all)
 584 {
 585         boolean_t                               call_removed = FALSE;
 586         thread_call_t                   call;
 587         thread_call_group_t             group = &abstime_thread_call_groups[THREAD_CALL_PRIORITY_HIGH];
 588
 589         call = TC(queue_first(&group->pending_queue));
 590
 591         while (!queue_end(&group->pending_queue, qe(call))) {
 592                 if (call->tc_call.func == func &&
 593                                 call->tc_call.param0 == param0) {
 594                         thread_call_t   next = TC(queue_next(qe(call)));
 595
 596                         _call_dequeue(call, group);
 597
 598                         _internal_call_release(call);
 599
 600                         call_removed = TRUE;
 601                         if (!remove_all)
 602                                 break;
 603
 604                         call = next;
 605                 }
 606                 else
 607                         call = TC(queue_next(qe(call)));
 608         }
 609
 610         return (call_removed);
 611 }
 612
 613 /*
 614  *      _remove_from_delayed_queue:
 615  *
 616  *      Remove the first (or all) matching
 617  *      entries from the delayed queue.
 618  *
 619  *      Returns TRUE if any matching entries
 620  *      were found.
 621  *
 622  *      Called with thread_call_lock held.
 623  */
 624 static boolean_t
 625 _remove_from_delayed_queue(
 626     thread_call_func_t          func,
 627     thread_call_param_t         param0,
 628     boolean_t                           remove_all)
 629 {
 630         boolean_t                       call_removed = FALSE;
 631         thread_call_t                   call;
 632         thread_call_group_t             group = &abstime_thread_call_groups[THREAD_CALL_PRIORITY_HIGH];
 633
 634         call = TC(queue_first(&group->delayed_queue));
 635
 636         while (!queue_end(&group->delayed_queue, qe(call))) {
 637                 if (call->tc_call.func == func  &&
 638                                 call->tc_call.param0 == param0) {
 639                         thread_call_t   next = TC(queue_next(qe(call)));
 640
 641                         _call_dequeue(call, group);
 642
 643                         _internal_call_release(call);
 644
 645                         call_removed = TRUE;
 646                         if (!remove_all)
 647                                 break;
 648
 649                         call = next;
 650                 }
 651                 else
 652                         call = TC(queue_next(qe(call)));
 653         }
 654
 655         return (call_removed);
 656 }
 657
 658 /*
 659  *      thread_call_func_delayed:
 660  *
 661  *      Enqueue a function callout to
 662  *      occur at the stated time.
 663  */
 664 void
 665 thread_call_func_delayed(
 666                 thread_call_func_t              func,
 667                 thread_call_param_t             param,
 668                 uint64_t                        deadline)
 669 {
 670         (void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, 0, 0);
 671 }
 672
 673 /*
 674  * thread_call_func_delayed_with_leeway:
 675  *
 676  * Same as thread_call_func_delayed(), but with
 677  * leeway/flags threaded through.
 678  */
 679
 680 void
 681 thread_call_func_delayed_with_leeway(
 682         thread_call_func_t              func,
 683         thread_call_param_t             param,
 684         uint64_t                deadline,
 685         uint64_t                leeway,
 686         uint32_t                flags)
 687 {
 688         (void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, leeway, flags);
 689 }
 690
 691 /*
 692  *      thread_call_func_cancel:
 693  *
 694  *      Dequeue a function callout.
 695  *
 696  *      Removes one (or all) { function, argument }
 697  *      instance(s) from either (or both)
 698  *      the pending and the delayed queue,
 699  *      in that order.
 700  *
 701  *      Returns TRUE if any calls were cancelled.
 702  */
 703 boolean_t
 704 thread_call_func_cancel(
 705                 thread_call_func_t              func,
 706                 thread_call_param_t             param,
 707                 boolean_t                       cancel_all)
 708 {
 709         boolean_t       result;
 710         spl_t           s;
 711
 712         assert(func != NULL);
 713
 714         s = splsched();
 715         thread_call_lock_spin();
 716
 717         if (cancel_all)
 718                 result = _remove_from_pending_queue(func, param, cancel_all) |
 719                         _remove_from_delayed_queue(func, param, cancel_all);
 720         else
 721                 result = _remove_from_pending_queue(func, param, cancel_all) ||
 722                         _remove_from_delayed_queue(func, param, cancel_all);
 723
 724         thread_call_unlock();
 725         splx(s);
 726
 727         return (result);
 728 }
 729
 730 /*
 731  * Allocate a thread call with a given priority.  Importances
 732  * other than THREAD_CALL_PRIORITY_HIGH will be run in threads
 733  * with eager preemption enabled (i.e. may be aggressively preempted
 734  * by higher-priority threads which are not in the normal "urgent" bands).
 735  */
 736 thread_call_t
 737 thread_call_allocate_with_priority(
 738                 thread_call_func_t              func,
 739                 thread_call_param_t             param0,
 740                 thread_call_priority_t          pri)
 741 {
 742         thread_call_t call;
 743
 744         if (pri > THREAD_CALL_PRIORITY_LOW) {
 745                 panic("Invalid pri: %d\n", pri);
 746         }
 747
 748         call = thread_call_allocate(func, param0);
 749         call->tc_pri = pri;
 750
 751         return call;
 752 }
 753
 754 /*
 755  *      thread_call_allocate:
 756  *
 757  *      Allocate a callout entry.
 758  */
 759 thread_call_t
 760 thread_call_allocate(
 761                 thread_call_func_t              func,
 762                 thread_call_param_t             param0)
 763 {
 764         thread_call_t   call = zalloc(thread_call_zone);
 765
 766         thread_call_setup(call, func, param0);
 767         call->tc_refs = 1;
 768         call->tc_flags = THREAD_CALL_ALLOC;
 769
 770         return (call);
 771 }
 772
 773 /*
 774  *      thread_call_free:
 775  *
 776  *      Release a callout.  If the callout is currently
 777  *      executing, it will be freed when all invocations
 778  *      finish.
 779  */
 780 boolean_t
 781 thread_call_free(
 782                 thread_call_t           call)
 783 {
 784         spl_t   s;
 785         int32_t refs;
 786
 787         s = splsched();
 788         thread_call_lock_spin();
 789
 790         if (call->tc_call.queue != NULL) {
 791                 thread_call_unlock();
 792                 splx(s);
 793
 794                 return (FALSE);
 795         }
 796
 797         refs = --call->tc_refs;
 798         if (refs < 0) {
 799                 panic("Refcount negative: %d\n", refs);
 800         }
 801
 802         thread_call_unlock();
 803         splx(s);
 804
 805         if (refs == 0) {
 806                 zfree(thread_call_zone, call);
 807         }
 808
 809         return (TRUE);
 810 }
 811
 812 /*
 813  *      thread_call_enter:
 814  *
 815  *      Enqueue a callout entry to occur "soon".
 816  *
 817  *      Returns TRUE if the call was
 818  *      already on a queue.
 819  */
 820 boolean_t
 821 thread_call_enter(
 822                 thread_call_t           call)
 823 {
 824         return thread_call_enter1(call, 0);
 825 }
 826
 827 boolean_t
 828 thread_call_enter1(
 829                 thread_call_t                   call,
 830                 thread_call_param_t             param1)
 831 {
 832         boolean_t               result = TRUE;
 833         thread_call_group_t     group;
 834         spl_t                   s;
 835
 836         assert(call->tc_call.func != NULL);
 837
 838         group = thread_call_get_group(call);
 839
 840         s = splsched();
 841         thread_call_lock_spin();
 842
 843         if (call->tc_call.queue != &group->pending_queue) {
 844                 result = _pending_call_enqueue(call, group);
 845         }
 846
 847         call->tc_call.param1 = param1;
 848
 849         thread_call_unlock();
 850         splx(s);
 851
 852         return (result);
 853 }
 854
 855 /*
 856  *      thread_call_enter_delayed:
 857  *
 858  *      Enqueue a callout entry to occur
 859  *      at the stated time.
 860  *
 861  *      Returns TRUE if the call was
 862  *      already on a queue.
 863  */
 864 boolean_t
 865 thread_call_enter_delayed(
 866                 thread_call_t           call,
 867                 uint64_t                deadline)
 868 {
 869         assert(call != NULL);
 870         return thread_call_enter_delayed_internal(call, NULL, 0, 0, deadline, 0, 0);
 871 }
 872
 873 boolean_t
 874 thread_call_enter1_delayed(
 875                 thread_call_t                   call,
 876                 thread_call_param_t             param1,
 877                 uint64_t                        deadline)
 878 {
 879         assert(call != NULL);
 880         return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, 0, 0);
 881 }
 882
 883 boolean_t
 884 thread_call_enter_delayed_with_leeway(
 885                 thread_call_t           call,
 886                 thread_call_param_t     param1,
 887                 uint64_t                deadline,
 888                 uint64_t                leeway,
 889                 unsigned int            flags)
 890 {
 891         assert(call != NULL);
 892         return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, leeway, flags);
 893 }
 894
 895
 896 /*
 897  * thread_call_enter_delayed_internal:
 898  * enqueue a callout entry to occur at the stated time
 899  *
 900  * Returns True if the call was already on a queue
 901  * params:
 902  * call     - structure encapsulating state of the callout
 903  * alt_func/alt_param0 - if call is NULL, allocate temporary storage using these parameters
 904  * deadline - time deadline in nanoseconds
 905  * leeway   - timer slack represented as delta of deadline.
 906  * flags    - THREAD_CALL_DELAY_XXX : classification of caller's desires wrt timer coalescing.
 907  *            THREAD_CALL_DELAY_LEEWAY : value in leeway is used for timer coalescing.
 908  *            THREAD_CALL_CONTINUOUS: thread call will be called according to mach_continuous_time rather
 909  *                                                                        than mach_absolute_time
 910  */
 911 boolean_t
 912 thread_call_enter_delayed_internal(
 913                 thread_call_t           call,
 914                 thread_call_func_t      alt_func,
 915                 thread_call_param_t     alt_param0,
 916                 thread_call_param_t     param1,
 917                 uint64_t                deadline,
 918                 uint64_t                leeway,
 919                 unsigned int            flags)
 920 {
 921         boolean_t               result = TRUE;
 922         thread_call_group_t     group;
 923         spl_t                   s;
 924         uint64_t                abstime, conttime, sdeadline, slop;
 925         uint32_t                urgency;
 926         const boolean_t is_cont_time = (flags & THREAD_CALL_CONTINUOUS) ? TRUE : FALSE;
 927
 928         /* direct mapping between thread_call, timer_call, and timeout_urgency values */
 929         urgency = (flags & TIMEOUT_URGENCY_MASK);
 930
 931         s = splsched();
 932         thread_call_lock_spin();
 933
 934         if (call == NULL) {
 935                 /* allocate a structure out of internal storage, as a convenience for BSD callers */
 936                 call = _internal_call_allocate(alt_func, alt_param0);
 937         }
 938
 939         if (is_cont_time) {
 940                 call->tc_flags |= THREAD_CALL_CONTINUOUS;
 941         }
 942
 943         assert(call->tc_call.func != NULL);
 944         group = thread_call_get_group(call);
 945         abstime =  mach_absolute_time();
 946         conttime =  absolutetime_to_continuoustime(abstime);
 947
 948         call->tc_flags |= THREAD_CALL_DELAYED;
 949
 950         call->tc_soft_deadline = sdeadline = deadline;
 951
 952         boolean_t ratelimited = FALSE;
 953         slop = timer_call_slop(deadline, is_cont_time ? conttime : abstime, urgency, current_thread(), &ratelimited);
 954
 955         if ((flags & THREAD_CALL_DELAY_LEEWAY) != 0 && leeway > slop)
 956                 slop = leeway;
 957
 958         if (UINT64_MAX - deadline <= slop)
 959                 deadline = UINT64_MAX;
 960         else
 961                 deadline += slop;
 962
 963         if (ratelimited) {
 964                 call->tc_flags |= TIMER_CALL_RATELIMITED;
 965         } else {
 966                 call->tc_flags &= ~TIMER_CALL_RATELIMITED;
 967         }
 968
 969
 970         call->tc_call.param1 = param1;
 971
 972         if(is_cont_time) {
 973                 call->ttd = (sdeadline > conttime) ? (sdeadline - conttime) : 0;
 974         }
 975         else {
 976                 call->ttd = (sdeadline > abstime) ? (sdeadline - abstime) : 0;
 977         }
 978
 979         result = _delayed_call_enqueue(call, group, deadline);
 980
 981         if (queue_first(&group->delayed_queue) == qe(call)) {
 982                 _set_delayed_call_timer(call, group);
 983         }
 984
 985 #if CONFIG_DTRACE
 986         DTRACE_TMR5(thread_callout__create, thread_call_func_t, call->tc_call.func, uint64_t, (deadline - sdeadline), uint64_t, (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF), call);
 987 #endif
 988
 989         thread_call_unlock();
 990         splx(s);
 991
 992         return (result);
 993 }
 994
 995 /*
 996  *      thread_call_cancel:
 997  *
 998  *      Dequeue a callout entry.
 999  *
1000  *      Returns TRUE if the call was
1001  *      on a queue.
1002  */
1003 boolean_t
1004 thread_call_cancel(
1005                 thread_call_t           call)
1006 {
1007         boolean_t               result, do_cancel_callout = FALSE;
1008         thread_call_group_t     group;
1009         spl_t                   s;
1010
1011         group = thread_call_get_group(call);
1012
1013         s = splsched();
1014         thread_call_lock_spin();
1015
1016         if ((call->tc_call.deadline != 0) &&
1017             (queue_first(&group->delayed_queue) == qe(call))) {
1018                 assert (call->tc_call.queue == &group->delayed_queue);
1019                 do_cancel_callout = TRUE;
1020         }
1021
1022         result = _call_dequeue(call, group);
1023
1024         if (do_cancel_callout) {
1025                 timer_call_cancel(&group->delayed_timer);
1026                 if (!queue_empty(&group->delayed_queue)) {
1027                         _set_delayed_call_timer(TC(queue_first(&group->delayed_queue)), group);
1028                 }
1029         }
1030
1031         thread_call_unlock();
1032         splx(s);
1033 #if CONFIG_DTRACE
1034         DTRACE_TMR4(thread_callout__cancel, thread_call_func_t, call->tc_call.func, 0, (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF));
1035 #endif
1036
1037         return (result);
1038 }
1039
1040 /*
1041  * Cancel a thread call.  If it cannot be cancelled (i.e.
1042  * is already in flight), waits for the most recent invocation
1043  * to finish.  Note that if clients re-submit this thread call,
1044  * it may still be pending or in flight when thread_call_cancel_wait
1045  * returns, but all requests to execute this work item prior
1046  * to the call to thread_call_cancel_wait will have finished.
1047  */
1048 boolean_t
1049 thread_call_cancel_wait(
1050                 thread_call_t           call)
1051 {
1052         boolean_t               result;
1053         thread_call_group_t     group;
1054
1055         if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
1056                 panic("%s: Can't wait on thread call whose storage I don't own.", __FUNCTION__);
1057         }
1058
1059         group = thread_call_get_group(call);
1060
1061         (void) splsched();
1062         thread_call_lock_spin();
1063
1064         result = _call_dequeue(call, group);
1065         if (result == FALSE) {
1066                 thread_call_wait_locked(call);
1067         }
1068
1069         thread_call_unlock();
1070         (void) spllo();
1071
1072         return result;
1073 }
1074
1075
1076 /*
1077  *      thread_call_wake:
1078  *
1079  *      Wake a call thread to service
1080  *      pending call entries.  May wake
1081  *      the daemon thread in order to
1082  *      create additional call threads.
1083  *
1084  *      Called with thread_call_lock held.
1085  *
1086  *      For high-priority group, only does wakeup/creation if there are no threads
1087  *      running.
1088  */
1089 static __inline__ void
1090 thread_call_wake(
1091         thread_call_group_t             group)
1092 {
1093         /*
1094          * New behavior: use threads if you've got 'em.
1095          * Traditional behavior: wake only if no threads running.
1096          */
1097         if (group_isparallel(group) || group->active_count == 0) {
1098                 if (waitq_wakeup64_one(&group->idle_waitq, NO_EVENT64,
1099                                        THREAD_AWAKENED, WAITQ_ALL_PRIORITIES) == KERN_SUCCESS) {
1100                         group->idle_count--; group->active_count++;
1101
1102                         if (group->idle_count == 0) {
1103                                 timer_call_cancel(&group->dealloc_timer);
1104                                 group->flags &= ~TCG_DEALLOC_ACTIVE;
1105                         }
1106                 } else {
1107                         if (!thread_call_daemon_awake && thread_call_group_should_add_thread(group)) {
1108                                 thread_call_daemon_awake = TRUE;
1109                                 waitq_wakeup64_one(&daemon_waitq, NO_EVENT64,
1110                                                    THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1111                         }
1112                 }
1113         }
1114 }
1115
1116 /*
1117  *      sched_call_thread:
1118  *
1119  *      Call out invoked by the scheduler.  Used only for high-priority
1120  *      thread call group.
1121  */
1122 static void
1123 sched_call_thread(
1124                 int                             type,
1125                 __unused        thread_t                thread)
1126 {
1127         thread_call_group_t             group;
1128
1129         group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH]; /* XXX */
1130
1131         thread_call_lock_spin();
1132
1133         switch (type) {
1134
1135                 case SCHED_CALL_BLOCK:
1136                         --group->active_count;
1137                         if (group->pending_count > 0)
1138                                 thread_call_wake(group);
1139                         break;
1140
1141                 case SCHED_CALL_UNBLOCK:
1142                         group->active_count++;
1143                         break;
1144         }
1145
1146         thread_call_unlock();
1147 }
1148
1149 /*
1150  * Interrupts disabled, lock held; returns the same way.
1151  * Only called on thread calls whose storage we own.  Wakes up
1152  * anyone who might be waiting on this work item and frees it
1153  * if the client has so requested.
1154  */
1155 static void
1156 thread_call_finish(thread_call_t call, spl_t *s)
1157 {
1158         boolean_t dowake = FALSE;
1159
1160         call->tc_finish_count++;
1161         call->tc_refs--;
1162
1163         if ((call->tc_flags & THREAD_CALL_WAIT) != 0) {
1164                 dowake = TRUE;
1165                 call->tc_flags &= ~THREAD_CALL_WAIT;
1166
1167                 /*
1168                  * Dropping lock here because the sched call for the
1169                  * high-pri group can take the big lock from under
1170                  * a thread lock.
1171                  */
1172                 thread_call_unlock();
1173                 thread_wakeup((event_t)call);
1174                 thread_call_lock_spin();
1175         }
1176
1177         if (call->tc_refs == 0) {
1178                 if (dowake) {
1179                         panic("Someone waiting on a thread call that is scheduled for free: %p\n", call->tc_call.func);
1180                 }
1181
1182                 enable_ints_and_unlock(*s);
1183
1184                 zfree(thread_call_zone, call);
1185
1186                 *s = disable_ints_and_lock();
1187         }
1188
1189 }
1190
1191 /*
1192  *      thread_call_thread:
1193  */
1194 static void
1195 thread_call_thread(
1196                 thread_call_group_t             group,
1197                 wait_result_t                   wres)
1198 {
1199         thread_t        self = current_thread();
1200         boolean_t       canwait;
1201         spl_t           s;
1202
1203         if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0)
1204                 (void)thread_set_tag_internal(self, THREAD_TAG_CALLOUT);
1205
1206         /*
1207          * A wakeup with THREAD_INTERRUPTED indicates that
1208          * we should terminate.
1209          */
1210         if (wres == THREAD_INTERRUPTED) {
1211                 thread_terminate(self);
1212
1213                 /* NOTREACHED */
1214                 panic("thread_terminate() returned?");
1215         }
1216
1217         s = disable_ints_and_lock();
1218
1219         thread_sched_call(self, group->sched_call);
1220
1221         while (group->pending_count > 0) {
1222                 thread_call_t                   call;
1223                 thread_call_func_t              func;
1224                 thread_call_param_t             param0, param1;
1225
1226                 call = TC(dequeue_head(&group->pending_queue));
1227                 assert(call != NULL);
1228                 group->pending_count--;
1229
1230                 func = call->tc_call.func;
1231                 param0 = call->tc_call.param0;
1232                 param1 = call->tc_call.param1;
1233
1234                 call->tc_call.queue = NULL;
1235
1236                 _internal_call_release(call);
1237
1238                 /*
1239                  * Can only do wakeups for thread calls whose storage
1240                  * we control.
1241                  */
1242                 if ((call->tc_flags & THREAD_CALL_ALLOC) != 0) {
1243                         canwait = TRUE;
1244                         call->tc_refs++;        /* Delay free until we're done */
1245                 } else
1246                         canwait = FALSE;
1247
1248                 enable_ints_and_unlock(s);
1249
1250 #if DEVELOPMENT || DEBUG
1251                 KERNEL_DEBUG_CONSTANT(
1252                                 MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_NONE,
1253                                 VM_KERNEL_UNSLIDE(func), VM_KERNEL_UNSLIDE_OR_PERM(param0), VM_KERNEL_UNSLIDE_OR_PERM(param1), 0, 0);
1254 #endif /* DEVELOPMENT || DEBUG */
1255
1256 #if CONFIG_DTRACE
1257                 DTRACE_TMR6(thread_callout__start, thread_call_func_t, func, int, 0, int, (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF), (call->tc_flags & THREAD_CALL_DELAYED), call);
1258 #endif
1259
1260                 (*func)(param0, param1);
1261
1262 #if CONFIG_DTRACE
1263                 DTRACE_TMR6(thread_callout__end, thread_call_func_t, func, int, 0, int, (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF), (call->tc_flags & THREAD_CALL_DELAYED), call);
1264 #endif
1265
1266                 if (get_preemption_level() != 0) {
1267                         int pl = get_preemption_level();
1268                         panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
1269                                         pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
1270                 }
1271
1272                 s = disable_ints_and_lock();
1273
1274                 if (canwait) {
1275                         /* Frees if so desired */
1276                         thread_call_finish(call, &s);
1277                 }
1278         }
1279
1280         thread_sched_call(self, NULL);
1281         group->active_count--;
1282
1283         if (self->callout_woken_from_icontext && !self->callout_woke_thread) {
1284                 ledger_credit(self->t_ledger, task_ledgers.interrupt_wakeups, 1);
1285                 if (self->callout_woken_from_platform_idle)
1286                         ledger_credit(self->t_ledger, task_ledgers.platform_idle_wakeups, 1);
1287         }
1288
1289         self->callout_woken_from_icontext = FALSE;
1290         self->callout_woken_from_platform_idle = FALSE;
1291         self->callout_woke_thread = FALSE;
1292
1293         if (group_isparallel(group)) {
1294                 /*
1295                  * For new style of thread group, thread always blocks.
1296                  * If we have more than the target number of threads,
1297                  * and this is the first to block, and it isn't active
1298                  * already, set a timer for deallocating a thread if we
1299                  * continue to have a surplus.
1300                  */
1301                 group->idle_count++;
1302
1303                 if (group->idle_count == 1) {
1304                         group->idle_timestamp = mach_absolute_time();
1305                 }
1306
1307                 if (((group->flags & TCG_DEALLOC_ACTIVE) == 0) &&
1308                                 ((group->active_count + group->idle_count) > group->target_thread_count)) {
1309                         group->flags |= TCG_DEALLOC_ACTIVE;
1310                         thread_call_start_deallocate_timer(group);
1311                 }
1312
1313                 /* Wait for more work (or termination) */
1314                 wres = waitq_assert_wait64(&group->idle_waitq, NO_EVENT64, THREAD_INTERRUPTIBLE, 0);
1315                 if (wres != THREAD_WAITING) {
1316                         panic("kcall worker unable to assert wait?");
1317                 }
1318
1319                 enable_ints_and_unlock(s);
1320
1321                 thread_block_parameter((thread_continue_t)thread_call_thread, group);
1322         } else {
1323                 if (group->idle_count < group->target_thread_count) {
1324                         group->idle_count++;
1325
1326                         waitq_assert_wait64(&group->idle_waitq, NO_EVENT64, THREAD_UNINT, 0); /* Interrupted means to exit */
1327
1328                         enable_ints_and_unlock(s);
1329
1330                         thread_block_parameter((thread_continue_t)thread_call_thread, group);
1331                         /* NOTREACHED */
1332                 }
1333         }
1334
1335         enable_ints_and_unlock(s);
1336
1337         thread_terminate(self);
1338         /* NOTREACHED */
1339 }
1340
1341 /*
1342  *      thread_call_daemon: walk list of groups, allocating
1343  *      threads if appropriate (as determined by
1344  *      thread_call_group_should_add_thread()).
1345  */
1346 static void
1347 thread_call_daemon_continue(__unused void *arg)
1348 {
1349         int             i;
1350         kern_return_t   kr;
1351         thread_call_group_t group;
1352         spl_t   s;
1353
1354         s = disable_ints_and_lock();
1355
1356         /* Starting at zero happens to be high-priority first. */
1357         for (i = 0; i < THREAD_CALL_GROUP_COUNT; i++) {
1358                 group = &thread_call_groups[i];
1359                 while (thread_call_group_should_add_thread(group)) {
1360                         group->active_count++;
1361
1362                         enable_ints_and_unlock(s);
1363
1364                         kr = thread_call_thread_create(group);
1365                         if (kr != KERN_SUCCESS) {
1366                                 /*
1367                                  * On failure, just pause for a moment and give up.
1368                                  * We can try again later.
1369                                  */
1370                                 delay(10000); /* 10 ms */
1371                                 s = disable_ints_and_lock();
1372                                 goto out;
1373                         }
1374
1375                         s = disable_ints_and_lock();
1376                 }
1377         }
1378
1379 out:
1380         thread_call_daemon_awake = FALSE;
1381         waitq_assert_wait64(&daemon_waitq, NO_EVENT64, THREAD_UNINT, 0);
1382
1383         enable_ints_and_unlock(s);
1384
1385         thread_block_parameter((thread_continue_t)thread_call_daemon_continue, NULL);
1386         /* NOTREACHED */
1387 }
1388
1389 static void
1390 thread_call_daemon(
1391                 __unused void    *arg)
1392 {
1393         thread_t        self = current_thread();
1394
1395         self->options |= TH_OPT_VMPRIV;
1396         vm_page_free_reserve(2);        /* XXX */
1397
1398         thread_call_daemon_continue(NULL);
1399         /* NOTREACHED */
1400 }
1401
1402 /*
1403  * Schedule timer to deallocate a worker thread if we have a surplus
1404  * of threads (in excess of the group's target) and at least one thread
1405  * is idle the whole time.
1406  */
1407 static void
1408 thread_call_start_deallocate_timer(
1409                 thread_call_group_t group)
1410 {
1411         uint64_t deadline;
1412         boolean_t onqueue;
1413
1414         assert(group->idle_count > 0);
1415
1416         group->flags |= TCG_DEALLOC_ACTIVE;
1417         deadline = group->idle_timestamp + thread_call_dealloc_interval_abs;
1418         onqueue = timer_call_enter(&group->dealloc_timer, deadline, 0);
1419
1420         if (onqueue) {
1421                 panic("Deallocate timer already active?");
1422         }
1423 }
1424
1425 void
1426 thread_call_delayed_timer(
1427                 timer_call_param_t              p0,
1428                 __unused timer_call_param_t     p1
1429 )
1430 {
1431         thread_call_t                   call;
1432         thread_call_group_t             group = p0;
1433         uint64_t                        timestamp;
1434
1435         thread_call_lock_spin();
1436
1437         const boolean_t is_cont_time = IS_CONT_GROUP(group) ? TRUE : FALSE;
1438
1439         if (is_cont_time) {
1440                 timestamp = mach_continuous_time();
1441         }
1442         else {
1443                 timestamp = mach_absolute_time();
1444         }
1445
1446         call = TC(queue_first(&group->delayed_queue));
1447
1448         while (!queue_end(&group->delayed_queue, qe(call))) {
1449                 assert((!is_cont_time) || (call->tc_flags & THREAD_CALL_CONTINUOUS));
1450
1451                 if (call->tc_soft_deadline <= timestamp) {
1452                         if ((call->tc_flags & THREAD_CALL_RATELIMITED) &&
1453                             (CE(call)->deadline > timestamp) &&
1454                             (ml_timer_forced_evaluation() == FALSE)) {
1455                                 break;
1456                         }
1457                         _pending_call_enqueue(call, group);
1458                 } /* TODO, identify differentially coalesced timers */
1459                 else
1460                         break;
1461
1462                 call = TC(queue_first(&group->delayed_queue));
1463         }
1464
1465         if (!queue_end(&group->delayed_queue, qe(call))) {
1466                 _set_delayed_call_timer(call, group);
1467         }
1468
1469         thread_call_unlock();
1470 }
1471
1472 static void
1473 thread_call_delayed_timer_rescan(thread_call_group_t group)
1474 {
1475         thread_call_t                   call;
1476         uint64_t                                timestamp;
1477         boolean_t               istate;
1478
1479         istate = ml_set_interrupts_enabled(FALSE);
1480         thread_call_lock_spin();
1481
1482         assert(ml_timer_forced_evaluation() == TRUE);
1483
1484         if (IS_CONT_GROUP(group)) {
1485                 timestamp = mach_continuous_time();
1486         } else {
1487                 timestamp = mach_absolute_time();
1488         }
1489
1490         call = TC(queue_first(&group->delayed_queue));
1491
1492         while (!queue_end(&group->delayed_queue, qe(call))) {
1493                 if (call->tc_soft_deadline <= timestamp) {
1494                         _pending_call_enqueue(call, group);
1495                         call = TC(queue_first(&group->delayed_queue));
1496                 }
1497                 else {
1498                         uint64_t skew = call->tc_call.deadline - call->tc_soft_deadline;
1499                         assert (call->tc_call.deadline >= call->tc_soft_deadline);
1500                         /* On a latency quality-of-service level change,
1501                          * re-sort potentially rate-limited callout. The platform
1502                          * layer determines which timers require this.
1503                          */
1504                         if (timer_resort_threshold(skew)) {
1505                                 _call_dequeue(call, group);
1506                                 _delayed_call_enqueue(call, group, call->tc_soft_deadline);
1507                         }
1508                         call = TC(queue_next(qe(call)));
1509                 }
1510         }
1511
1512         if (!queue_empty(&group->delayed_queue))
1513                 _set_delayed_call_timer(TC(queue_first(&group->delayed_queue)), group);
1514         thread_call_unlock();
1515         ml_set_interrupts_enabled(istate);
1516 }
1517
1518 void
1519 thread_call_delayed_timer_rescan_all(void) {
1520         int i;
1521         for(i = 0; i < THREAD_CALL_GROUP_COUNT; i++) {
1522                 thread_call_delayed_timer_rescan(&thread_call_groups[i]);
1523         }
1524 }
1525
1526 /*
1527  * Timer callback to tell a thread to terminate if
1528  * we have an excess of threads and at least one has been
1529  * idle for a long time.
1530  */
1531 static void
1532 thread_call_dealloc_timer(
1533                 timer_call_param_t              p0,
1534                 __unused timer_call_param_t     p1)
1535 {
1536         thread_call_group_t group = (thread_call_group_t)p0;
1537         uint64_t now;
1538         kern_return_t res;
1539         boolean_t terminated = FALSE;
1540
1541         thread_call_lock_spin();
1542
1543         now = mach_absolute_time();
1544         if (group->idle_count > 0) {
1545                 if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
1546                         terminated = TRUE;
1547                         group->idle_count--;
1548                         res = waitq_wakeup64_one(&group->idle_waitq, NO_EVENT64,
1549                                                  THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES);
1550                         if (res != KERN_SUCCESS) {
1551                                 panic("Unable to wake up idle thread for termination?");
1552                         }
1553                 }
1554
1555         }
1556
1557         /*
1558          * If we still have an excess of threads, schedule another
1559          * invocation of this function.
1560          */
1561         if (group->idle_count > 0 && (group->idle_count + group->active_count > group->target_thread_count)) {
1562                 /*
1563                  * If we killed someone just now, push out the
1564                  * next deadline.
1565                  */
1566                 if (terminated) {
1567                         group->idle_timestamp = now;
1568                 }
1569
1570                 thread_call_start_deallocate_timer(group);
1571         } else {
1572                 group->flags &= ~TCG_DEALLOC_ACTIVE;
1573         }
1574
1575         thread_call_unlock();
1576 }
1577
1578 /*
1579  * Wait for all requested invocations of a thread call prior to now
1580  * to finish.  Can only be invoked on thread calls whose storage we manage.
1581  * Just waits for the finish count to catch up to the submit count we find
1582  * at the beginning of our wait.
1583  */
1584 static void
1585 thread_call_wait_locked(thread_call_t call)
1586 {
1587         uint64_t submit_count;
1588         wait_result_t res;
1589
1590         assert(call->tc_flags & THREAD_CALL_ALLOC);
1591
1592         submit_count = call->tc_submit_count;
1593
1594         while (call->tc_finish_count < submit_count) {
1595                 call->tc_flags |= THREAD_CALL_WAIT;
1596
1597                 res = assert_wait(call, THREAD_UNINT);
1598                 if (res != THREAD_WAITING) {
1599                         panic("Unable to assert wait?");
1600                 }
1601
1602                 thread_call_unlock();
1603                 (void) spllo();
1604
1605                 res = thread_block(NULL);
1606                 if (res != THREAD_AWAKENED) {
1607                         panic("Awoken with %d?", res);
1608                 }
1609
1610                 (void) splsched();
1611                 thread_call_lock_spin();
1612         }
1613 }
1614
1615 /*
1616  * Determine whether a thread call is either on a queue or
1617  * currently being executed.
1618  */
1619 boolean_t
1620 thread_call_isactive(thread_call_t call)
1621 {
1622         boolean_t active;
1623         spl_t   s;
1624
1625         s = disable_ints_and_lock();
1626         active = (call->tc_submit_count > call->tc_finish_count);
1627         enable_ints_and_unlock(s);
1628
1629         return active;
1630 }
1631
1632 /*
1633  * adjust_cont_time_thread_calls
1634  * on wake, reenqueue delayed call timer for continuous time thread call groups
1635  */
1636 void
1637 adjust_cont_time_thread_calls(void)
1638 {
1639         thread_call_group_t group;
1640
1641         spl_t s;
1642         int i;
1643         s = disable_ints_and_lock();
1644
1645         for (i = 0; i < THREAD_CALL_CONTTIME_COUNT; i++) {
1646                 // only the continuous thread call groups
1647                 group = &conttime_thread_call_groups[i];
1648                 assert(IS_CONT_GROUP(group));
1649
1650                 if (!queue_empty(&group->delayed_queue)) {
1651                         _set_delayed_call_timer(TC(queue_first(&group->delayed_queue)), group);
1652                 }
1653         }
1654
1655         enable_ints_and_unlock(s);
1656 }