osfmk/kern/thread_call.c

   1 /*
   2  * Copyright (c) 1993-1995, 1999-2008 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <mach/mach_types.h>
  30 #include <mach/thread_act.h>
  31
  32 #include <kern/kern_types.h>
  33 #include <kern/zalloc.h>
  34 #include <kern/sched_prim.h>
  35 #include <kern/clock.h>
  36 #include <kern/task.h>
  37 #include <kern/thread.h>
  38 #include <kern/wait_queue.h>
  39
  40 #include <vm/vm_pageout.h>
  41
  42 #include <kern/thread_call.h>
  43 #include <kern/call_entry.h>
  44 #include <kern/timer_call.h>
  45
  46 #include <libkern/OSAtomic.h>
  47
  48 #include <sys/kdebug.h>
  49
  50
  51 static zone_t                   thread_call_zone;
  52 static struct wait_queue        daemon_wqueue;
  53
  54 struct thread_call_group {
  55         queue_head_t            pending_queue;
  56         uint32_t                pending_count;
  57
  58         queue_head_t            delayed_queue;
  59         uint32_t                delayed_count;
  60
  61         timer_call_data_t       delayed_timer;
  62         timer_call_data_t       dealloc_timer;
  63
  64         struct wait_queue       idle_wqueue;
  65         uint32_t                idle_count, active_count;
  66
  67         integer_t               pri;
  68         uint32_t                target_thread_count;
  69         uint64_t                idle_timestamp;
  70
  71         uint32_t                flags;
  72         sched_call_t            sched_call;
  73 };
  74
  75 typedef struct thread_call_group        *thread_call_group_t;
  76
  77 #define TCG_PARALLEL            0x01
  78 #define TCG_DEALLOC_ACTIVE      0x02
  79
  80 #define THREAD_CALL_GROUP_COUNT         4
  81 #define THREAD_CALL_THREAD_MIN          4
  82 #define INTERNAL_CALL_COUNT             768
  83 #define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * 1000 * 1000) /* 5 ms */
  84 #define THREAD_CALL_ADD_RATIO           4
  85 #define THREAD_CALL_MACH_FACTOR_CAP     3
  86
  87 static struct thread_call_group thread_call_groups[THREAD_CALL_GROUP_COUNT];
  88 static boolean_t                thread_call_daemon_awake;
  89 static thread_call_data_t       internal_call_storage[INTERNAL_CALL_COUNT];
  90 static queue_head_t             thread_call_internal_queue;
  91 static uint64_t                 thread_call_dealloc_interval_abs;
  92
  93 static __inline__ thread_call_t _internal_call_allocate(void);
  94 static __inline__ void          _internal_call_release(thread_call_t call);
  95 static __inline__ boolean_t     _pending_call_enqueue(thread_call_t call, thread_call_group_t group);
  96 static __inline__ boolean_t     _delayed_call_enqueue(thread_call_t call, thread_call_group_t group, uint64_t deadline);
  97 static __inline__ boolean_t     _call_dequeue(thread_call_t call, thread_call_group_t group);
  98 static __inline__ void          thread_call_wake(thread_call_group_t group);
  99 static __inline__ void          _set_delayed_call_timer(thread_call_t call, thread_call_group_t group);
 100 static boolean_t                _remove_from_pending_queue(thread_call_func_t func, thread_call_param_t param0, boolean_t remove_all);
 101 static boolean_t                _remove_from_delayed_queue(thread_call_func_t func, thread_call_param_t param0, boolean_t remove_all);
 102 static void                     thread_call_daemon(void *arg);
 103 static void                     thread_call_thread(thread_call_group_t group, wait_result_t wres);
 104 extern void                     thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1);
 105 static void                     thread_call_dealloc_timer(timer_call_param_t p0, timer_call_param_t p1);
 106 static void                     thread_call_group_setup(thread_call_group_t group, thread_call_priority_t pri, uint32_t target_thread_count, boolean_t parallel);
 107 static void                     sched_call_thread(int type, thread_t thread);
 108 static void                     thread_call_start_deallocate_timer(thread_call_group_t group);
 109 static void                     thread_call_wait_locked(thread_call_t call);
 110
 111 #define qe(x)           ((queue_entry_t)(x))
 112 #define TC(x)           ((thread_call_t)(x))
 113
 114
 115 lck_grp_t               thread_call_queues_lck_grp;
 116 lck_grp_t               thread_call_lck_grp;
 117 lck_attr_t              thread_call_lck_attr;
 118 lck_grp_attr_t          thread_call_lck_grp_attr;
 119
 120 #if defined(__i386__) || defined(__x86_64__)
 121 lck_mtx_t               thread_call_lock_data;
 122 #else
 123 lck_spin_t              thread_call_lock_data;
 124 #endif
 125
 126
 127 #define thread_call_lock_spin()                 \
 128         lck_mtx_lock_spin_always(&thread_call_lock_data)
 129
 130 #define thread_call_unlock()                    \
 131         lck_mtx_unlock_always(&thread_call_lock_data)
 132
 133
 134 static inline spl_t
 135 disable_ints_and_lock(void)
 136 {
 137         spl_t s;
 138
 139         s = splsched();
 140         thread_call_lock_spin();
 141
 142         return s;
 143 }
 144
 145 static inline void
 146 enable_ints_and_unlock(void)
 147 {
 148         thread_call_unlock();
 149         (void)spllo();
 150 }
 151
 152
 153 static inline boolean_t
 154 group_isparallel(thread_call_group_t group)
 155 {
 156         return ((group->flags & TCG_PARALLEL) != 0);
 157 }
 158
 159 static boolean_t
 160 thread_call_group_should_add_thread(thread_call_group_t group)
 161 {
 162         uint32_t thread_count;
 163
 164         if (!group_isparallel(group)) {
 165                 if (group->pending_count > 0 && group->active_count == 0) {
 166                         return TRUE;
 167                 }
 168
 169                 return FALSE;
 170         }
 171
 172         if (group->pending_count > 0) {
 173                 if (group->idle_count > 0) {
 174                         panic("Pending work, but threads are idle?");
 175                 }
 176
 177                 thread_count = group->active_count;
 178
 179                 /*
 180                  * Add a thread if either there are no threads,
 181                  * the group has fewer than its target number of
 182                  * threads, or the amount of work is large relative
 183                  * to the number of threads.  In the last case, pay attention
 184                  * to the total load on the system, and back off if
 185          * it's high.
 186                  */
 187                 if ((thread_count == 0) ||
 188                         (thread_count < group->target_thread_count) ||
 189                         ((group->pending_count > THREAD_CALL_ADD_RATIO * thread_count) &&
 190                          (sched_mach_factor < THREAD_CALL_MACH_FACTOR_CAP))) {
 191                         return TRUE;
 192                 }
 193         }
 194
 195         return FALSE;
 196 }
 197
 198 static inline integer_t
 199 thread_call_priority_to_sched_pri(thread_call_priority_t pri)
 200 {
 201         switch (pri) {
 202         case THREAD_CALL_PRIORITY_HIGH:
 203                 return BASEPRI_PREEMPT;
 204         case THREAD_CALL_PRIORITY_KERNEL:
 205                 return BASEPRI_KERNEL;
 206         case THREAD_CALL_PRIORITY_USER:
 207                 return BASEPRI_DEFAULT;
 208         case THREAD_CALL_PRIORITY_LOW:
 209                 return DEPRESSPRI;
 210         default:
 211                 panic("Invalid priority.");
 212         }
 213
 214         return 0;
 215 }
 216
 217 /* Lock held */
 218 static inline thread_call_group_t
 219 thread_call_get_group(
 220                 thread_call_t call)
 221 {
 222         thread_call_priority_t  pri = call->tc_pri;
 223
 224         assert(pri == THREAD_CALL_PRIORITY_LOW ||
 225                         pri == THREAD_CALL_PRIORITY_USER ||
 226                         pri == THREAD_CALL_PRIORITY_KERNEL ||
 227                         pri == THREAD_CALL_PRIORITY_HIGH);
 228
 229         return &thread_call_groups[pri];
 230 }
 231
 232 static void
 233 thread_call_group_setup(
 234                 thread_call_group_t             group,
 235                 thread_call_priority_t          pri,
 236                 uint32_t                        target_thread_count,
 237                 boolean_t                       parallel)
 238 {
 239         queue_init(&group->pending_queue);
 240         queue_init(&group->delayed_queue);
 241
 242         timer_call_setup(&group->delayed_timer, thread_call_delayed_timer, group);
 243         timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
 244
 245         wait_queue_init(&group->idle_wqueue, SYNC_POLICY_FIFO);
 246
 247         group->target_thread_count = target_thread_count;
 248         group->pri = thread_call_priority_to_sched_pri(pri);
 249
 250         group->sched_call = sched_call_thread;
 251         if (parallel) {
 252                 group->flags |= TCG_PARALLEL;
 253                 group->sched_call = NULL;
 254         }
 255 }
 256
 257 /*
 258  * Simple wrapper for creating threads bound to
 259  * thread call groups.
 260  */
 261 static kern_return_t
 262 thread_call_thread_create(
 263                 thread_call_group_t             group)
 264 {
 265         thread_t thread;
 266         kern_return_t result;
 267
 268         result = kernel_thread_start_priority((thread_continue_t)thread_call_thread, group, group->pri, &thread);
 269         if (result != KERN_SUCCESS) {
 270                 return result;
 271         }
 272
 273         if (group->pri < BASEPRI_PREEMPT) {
 274                 /*
 275                  * New style doesn't get to run to completion in
 276                  * kernel if there are higher priority threads
 277                  * available.
 278                  */
 279                 thread_set_eager_preempt(thread);
 280         }
 281
 282         thread_deallocate(thread);
 283         return KERN_SUCCESS;
 284 }
 285
 286 /*
 287  *      thread_call_initialize:
 288  *
 289  *      Initialize this module, called
 290  *      early during system initialization.
 291  */
 292 void
 293 thread_call_initialize(void)
 294 {
 295         thread_call_t                   call;
 296         kern_return_t                   result;
 297         thread_t                        thread;
 298         int                             i;
 299
 300         i = sizeof (thread_call_data_t);
 301         thread_call_zone = zinit(i, 4096 * i, 16 * i, "thread_call");
 302         zone_change(thread_call_zone, Z_CALLERACCT, FALSE);
 303         zone_change(thread_call_zone, Z_NOENCRYPT, TRUE);
 304
 305         lck_attr_setdefault(&thread_call_lck_attr);
 306         lck_grp_attr_setdefault(&thread_call_lck_grp_attr);
 307         lck_grp_init(&thread_call_queues_lck_grp, "thread_call_queues", &thread_call_lck_grp_attr);
 308         lck_grp_init(&thread_call_lck_grp, "thread_call", &thread_call_lck_grp_attr);
 309
 310 #if defined(__i386__) || defined(__x86_64__)
 311         lck_mtx_init(&thread_call_lock_data, &thread_call_lck_grp, &thread_call_lck_attr);
 312 #else
 313         lck_spin_init(&thread_call_lock_data, &thread_call_lck_grp, &thread_call_lck_attr);
 314 #endif
 315
 316         nanotime_to_absolutetime(0, THREAD_CALL_DEALLOC_INTERVAL_NS, &thread_call_dealloc_interval_abs);
 317         wait_queue_init(&daemon_wqueue, SYNC_POLICY_FIFO);
 318
 319         thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_LOW], THREAD_CALL_PRIORITY_LOW, 0, TRUE);
 320         thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_USER], THREAD_CALL_PRIORITY_USER, 0, TRUE);
 321         thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_KERNEL], THREAD_CALL_PRIORITY_KERNEL, 1, TRUE);
 322         thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_HIGH], THREAD_CALL_PRIORITY_HIGH, THREAD_CALL_THREAD_MIN, FALSE);
 323
 324         disable_ints_and_lock();
 325
 326         queue_init(&thread_call_internal_queue);
 327         for (
 328                         call = internal_call_storage;
 329                         call < &internal_call_storage[INTERNAL_CALL_COUNT];
 330                         call++) {
 331
 332                 enqueue_tail(&thread_call_internal_queue, qe(call));
 333         }
 334
 335         thread_call_daemon_awake = TRUE;
 336
 337         enable_ints_and_unlock();
 338
 339         result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon, NULL, BASEPRI_PREEMPT + 1, &thread);
 340         if (result != KERN_SUCCESS)
 341                 panic("thread_call_initialize");
 342
 343         thread_deallocate(thread);
 344 }
 345
 346 void
 347 thread_call_setup(
 348         thread_call_t                   call,
 349         thread_call_func_t              func,
 350         thread_call_param_t             param0)
 351 {
 352         bzero(call, sizeof(*call));
 353         call_entry_setup((call_entry_t)call, func, param0);
 354         call->tc_pri = THREAD_CALL_PRIORITY_HIGH; /* Default priority */
 355 }
 356
 357 /*
 358  *      _internal_call_allocate:
 359  *
 360  *      Allocate an internal callout entry.
 361  *
 362  *      Called with thread_call_lock held.
 363  */
 364 static __inline__ thread_call_t
 365 _internal_call_allocate(void)
 366 {
 367     thread_call_t               call;
 368
 369     if (queue_empty(&thread_call_internal_queue))
 370         panic("_internal_call_allocate");
 371
 372     call = TC(dequeue_head(&thread_call_internal_queue));
 373
 374     return (call);
 375 }
 376
 377 /*
 378  *      _internal_call_release:
 379  *
 380  *      Release an internal callout entry which
 381  *      is no longer pending (or delayed).
 382  *
 383  *      Called with thread_call_lock held.
 384  */
 385 static __inline__ void
 386 _internal_call_release(
 387     thread_call_t               call)
 388 {
 389     if (    call >= internal_call_storage                                               &&
 390                     call < &internal_call_storage[INTERNAL_CALL_COUNT]          )
 391                 enqueue_head(&thread_call_internal_queue, qe(call));
 392 }
 393
 394 /*
 395  *      _pending_call_enqueue:
 396  *
 397  *      Place an entry at the end of the
 398  *      pending queue, to be executed soon.
 399  *
 400  *      Returns TRUE if the entry was already
 401  *      on a queue.
 402  *
 403  *      Called with thread_call_lock held.
 404  */
 405 static __inline__ boolean_t
 406 _pending_call_enqueue(
 407     thread_call_t               call,
 408         thread_call_group_t     group)
 409 {
 410         queue_head_t            *old_queue;
 411
 412         old_queue = call_entry_enqueue_tail(CE(call), &group->pending_queue);
 413
 414         if (old_queue == NULL) {
 415                 call->tc_submit_count++;
 416         }
 417
 418         group->pending_count++;
 419
 420         thread_call_wake(group);
 421
 422         return (old_queue != NULL);
 423 }
 424
 425 /*
 426  *      _delayed_call_enqueue:
 427  *
 428  *      Place an entry on the delayed queue,
 429  *      after existing entries with an earlier
 430  *      (or identical) deadline.
 431  *
 432  *      Returns TRUE if the entry was already
 433  *      on a queue.
 434  *
 435  *      Called with thread_call_lock held.
 436  */
 437 static __inline__ boolean_t
 438 _delayed_call_enqueue(
 439         thread_call_t           call,
 440         thread_call_group_t     group,
 441         uint64_t                deadline)
 442 {
 443         queue_head_t            *old_queue;
 444
 445         old_queue = call_entry_enqueue_deadline(CE(call), &group->delayed_queue, deadline);
 446
 447         if (old_queue == &group->pending_queue)
 448                 group->pending_count--;
 449         else if (old_queue == NULL)
 450                 call->tc_submit_count++;
 451
 452         return (old_queue != NULL);
 453 }
 454
 455 /*
 456  *      _call_dequeue:
 457  *
 458  *      Remove an entry from a queue.
 459  *
 460  *      Returns TRUE if the entry was on a queue.
 461  *
 462  *      Called with thread_call_lock held.
 463  */
 464 static __inline__ boolean_t
 465 _call_dequeue(
 466         thread_call_t           call,
 467         thread_call_group_t     group)
 468 {
 469         queue_head_t            *old_queue;
 470
 471         old_queue = call_entry_dequeue(CE(call));
 472
 473         if (old_queue != NULL) {
 474                 call->tc_finish_count++;
 475                 if (old_queue == &group->pending_queue)
 476                         group->pending_count--;
 477         }
 478
 479         return (old_queue != NULL);
 480 }
 481
 482 /*
 483  *      _set_delayed_call_timer:
 484  *
 485  *      Reset the timer so that it
 486  *      next expires when the entry is due.
 487  *
 488  *      Called with thread_call_lock held.
 489  */
 490 static __inline__ void
 491 _set_delayed_call_timer(
 492     thread_call_t               call,
 493         thread_call_group_t     group)
 494 {
 495     timer_call_enter(&group->delayed_timer, call->tc_call.deadline, 0);
 496 }
 497
 498 /*
 499  *      _remove_from_pending_queue:
 500  *
 501  *      Remove the first (or all) matching
 502  *      entries from the pending queue.
 503  *
 504  *      Returns TRUE if any matching entries
 505  *      were found.
 506  *
 507  *      Called with thread_call_lock held.
 508  */
 509 static boolean_t
 510 _remove_from_pending_queue(
 511     thread_call_func_t          func,
 512     thread_call_param_t         param0,
 513     boolean_t                           remove_all)
 514 {
 515         boolean_t                               call_removed = FALSE;
 516         thread_call_t                   call;
 517         thread_call_group_t             group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH];
 518
 519         call = TC(queue_first(&group->pending_queue));
 520
 521         while (!queue_end(&group->pending_queue, qe(call))) {
 522                 if (call->tc_call.func == func &&
 523                                 call->tc_call.param0 == param0) {
 524                         thread_call_t   next = TC(queue_next(qe(call)));
 525
 526                         _call_dequeue(call, group);
 527
 528                         _internal_call_release(call);
 529
 530                         call_removed = TRUE;
 531                         if (!remove_all)
 532                                 break;
 533
 534                         call = next;
 535                 }
 536                 else
 537                         call = TC(queue_next(qe(call)));
 538         }
 539
 540         return (call_removed);
 541 }
 542
 543 /*
 544  *      _remove_from_delayed_queue:
 545  *
 546  *      Remove the first (or all) matching
 547  *      entries from the delayed queue.
 548  *
 549  *      Returns TRUE if any matching entries
 550  *      were found.
 551  *
 552  *      Called with thread_call_lock held.
 553  */
 554 static boolean_t
 555 _remove_from_delayed_queue(
 556     thread_call_func_t          func,
 557     thread_call_param_t         param0,
 558     boolean_t                           remove_all)
 559 {
 560         boolean_t                       call_removed = FALSE;
 561         thread_call_t                   call;
 562         thread_call_group_t             group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH];
 563
 564         call = TC(queue_first(&group->delayed_queue));
 565
 566         while (!queue_end(&group->delayed_queue, qe(call))) {
 567                 if (call->tc_call.func == func  &&
 568                                 call->tc_call.param0 == param0) {
 569                         thread_call_t   next = TC(queue_next(qe(call)));
 570
 571                         _call_dequeue(call, group);
 572
 573                         _internal_call_release(call);
 574
 575                         call_removed = TRUE;
 576                         if (!remove_all)
 577                                 break;
 578
 579                         call = next;
 580                 }
 581                 else
 582                         call = TC(queue_next(qe(call)));
 583         }
 584
 585         return (call_removed);
 586 }
 587
 588 #ifndef __LP64__
 589
 590 /*
 591  *      thread_call_func:
 592  *
 593  *      Enqueue a function callout.
 594  *
 595  *      Guarantees { function, argument }
 596  *      uniqueness if unique_call is TRUE.
 597  */
 598 void
 599 thread_call_func(
 600     thread_call_func_t          func,
 601     thread_call_param_t         param,
 602     boolean_t                           unique_call)
 603 {
 604         thread_call_t           call;
 605         thread_call_group_t     group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH];
 606         spl_t                   s;
 607
 608         s = splsched();
 609         thread_call_lock_spin();
 610
 611         call = TC(queue_first(&group->pending_queue));
 612
 613         while (unique_call && !queue_end(&group->pending_queue, qe(call))) {
 614                 if (call->tc_call.func == func && call->tc_call.param0 == param) {
 615                         break;
 616                 }
 617
 618                 call = TC(queue_next(qe(call)));
 619         }
 620
 621         if (!unique_call || queue_end(&group->pending_queue, qe(call))) {
 622                 call = _internal_call_allocate();
 623                 call->tc_call.func      = func;
 624                 call->tc_call.param0    = param;
 625                 call->tc_call.param1    = NULL;
 626
 627                 _pending_call_enqueue(call, group);
 628         }
 629
 630         thread_call_unlock();
 631         splx(s);
 632 }
 633
 634 #endif  /* __LP64__ */
 635
 636 /*
 637  *      thread_call_func_delayed:
 638  *
 639  *      Enqueue a function callout to
 640  *      occur at the stated time.
 641  */
 642 void
 643 thread_call_func_delayed(
 644                 thread_call_func_t              func,
 645                 thread_call_param_t             param,
 646                 uint64_t                        deadline)
 647 {
 648         thread_call_t           call;
 649         thread_call_group_t     group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH];
 650         spl_t                   s;
 651
 652         s = splsched();
 653         thread_call_lock_spin();
 654
 655         call = _internal_call_allocate();
 656         call->tc_call.func      = func;
 657         call->tc_call.param0    = param;
 658         call->tc_call.param1    = 0;
 659
 660         _delayed_call_enqueue(call, group, deadline);
 661
 662         if (queue_first(&group->delayed_queue) == qe(call))
 663                 _set_delayed_call_timer(call, group);
 664
 665         thread_call_unlock();
 666         splx(s);
 667 }
 668
 669 /*
 670  *      thread_call_func_cancel:
 671  *
 672  *      Dequeue a function callout.
 673  *
 674  *      Removes one (or all) { function, argument }
 675  *      instance(s) from either (or both)
 676  *      the pending and the delayed queue,
 677  *      in that order.
 678  *
 679  *      Returns TRUE if any calls were cancelled.
 680  */
 681 boolean_t
 682 thread_call_func_cancel(
 683                 thread_call_func_t              func,
 684                 thread_call_param_t             param,
 685                 boolean_t                       cancel_all)
 686 {
 687         boolean_t       result;
 688         spl_t           s;
 689
 690         s = splsched();
 691         thread_call_lock_spin();
 692
 693         if (cancel_all)
 694                 result = _remove_from_pending_queue(func, param, cancel_all) |
 695                         _remove_from_delayed_queue(func, param, cancel_all);
 696         else
 697                 result = _remove_from_pending_queue(func, param, cancel_all) ||
 698                         _remove_from_delayed_queue(func, param, cancel_all);
 699
 700         thread_call_unlock();
 701         splx(s);
 702
 703         return (result);
 704 }
 705
 706 /*
 707  * Allocate a thread call with a given priority.  Importances
 708  * other than THREAD_CALL_PRIORITY_HIGH will be run in threads
 709  * with eager preemption enabled (i.e. may be aggressively preempted
 710  * by higher-priority threads which are not in the normal "urgent" bands).
 711  */
 712 thread_call_t
 713 thread_call_allocate_with_priority(
 714                 thread_call_func_t              func,
 715                 thread_call_param_t             param0,
 716                 thread_call_priority_t          pri)
 717 {
 718         thread_call_t call;
 719
 720         if (pri > THREAD_CALL_PRIORITY_LOW) {
 721                 panic("Invalid pri: %d\n", pri);
 722         }
 723
 724         call = thread_call_allocate(func, param0);
 725         call->tc_pri = pri;
 726
 727         return call;
 728 }
 729
 730 /*
 731  *      thread_call_allocate:
 732  *
 733  *      Allocate a callout entry.
 734  */
 735 thread_call_t
 736 thread_call_allocate(
 737                 thread_call_func_t              func,
 738                 thread_call_param_t             param0)
 739 {
 740         thread_call_t   call = zalloc(thread_call_zone);
 741
 742         thread_call_setup(call, func, param0);
 743         call->tc_refs = 1;
 744         call->tc_flags = THREAD_CALL_ALLOC;
 745
 746         return (call);
 747 }
 748
 749 /*
 750  *      thread_call_free:
 751  *
 752  *      Release a callout.  If the callout is currently
 753  *      executing, it will be freed when all invocations
 754  *      finish.
 755  */
 756 boolean_t
 757 thread_call_free(
 758                 thread_call_t           call)
 759 {
 760         spl_t   s;
 761         int32_t refs;
 762
 763         s = splsched();
 764         thread_call_lock_spin();
 765
 766         if (call->tc_call.queue != NULL) {
 767                 thread_call_unlock();
 768                 splx(s);
 769
 770                 return (FALSE);
 771         }
 772
 773         refs = --call->tc_refs;
 774         if (refs < 0) {
 775                 panic("Refcount negative: %d\n", refs);
 776         }
 777
 778         thread_call_unlock();
 779         splx(s);
 780
 781         if (refs == 0) {
 782                 zfree(thread_call_zone, call);
 783         }
 784
 785         return (TRUE);
 786 }
 787
 788 /*
 789  *      thread_call_enter:
 790  *
 791  *      Enqueue a callout entry to occur "soon".
 792  *
 793  *      Returns TRUE if the call was
 794  *      already on a queue.
 795  */
 796 boolean_t
 797 thread_call_enter(
 798                 thread_call_t           call)
 799 {
 800         boolean_t               result = TRUE;
 801         thread_call_group_t     group;
 802         spl_t                   s;
 803
 804         group = thread_call_get_group(call);
 805
 806         s = splsched();
 807         thread_call_lock_spin();
 808
 809         if (call->tc_call.queue != &group->pending_queue) {
 810                 result = _pending_call_enqueue(call, group);
 811         }
 812
 813         call->tc_call.param1 = 0;
 814
 815         thread_call_unlock();
 816         splx(s);
 817
 818         return (result);
 819 }
 820
 821 boolean_t
 822 thread_call_enter1(
 823                 thread_call_t                   call,
 824                 thread_call_param_t             param1)
 825 {
 826         boolean_t               result = TRUE;
 827         thread_call_group_t     group;
 828         spl_t                   s;
 829
 830         group = thread_call_get_group(call);
 831
 832         s = splsched();
 833         thread_call_lock_spin();
 834
 835         if (call->tc_call.queue != &group->pending_queue) {
 836                 result = _pending_call_enqueue(call, group);
 837         }
 838
 839         call->tc_call.param1 = param1;
 840
 841         thread_call_unlock();
 842         splx(s);
 843
 844         return (result);
 845 }
 846
 847 /*
 848  *      thread_call_enter_delayed:
 849  *
 850  *      Enqueue a callout entry to occur
 851  *      at the stated time.
 852  *
 853  *      Returns TRUE if the call was
 854  *      already on a queue.
 855  */
 856 boolean_t
 857 thread_call_enter_delayed(
 858                 thread_call_t           call,
 859                 uint64_t                        deadline)
 860 {
 861         boolean_t               result = TRUE;
 862         thread_call_group_t     group;
 863         spl_t                   s;
 864
 865         group = thread_call_get_group(call);
 866
 867         s = splsched();
 868         thread_call_lock_spin();
 869
 870         result = _delayed_call_enqueue(call, group, deadline);
 871
 872         if (queue_first(&group->delayed_queue) == qe(call))
 873                 _set_delayed_call_timer(call, group);
 874
 875         call->tc_call.param1 = 0;
 876
 877         thread_call_unlock();
 878         splx(s);
 879
 880         return (result);
 881 }
 882
 883 boolean_t
 884 thread_call_enter1_delayed(
 885                 thread_call_t                   call,
 886                 thread_call_param_t             param1,
 887                 uint64_t                        deadline)
 888 {
 889         boolean_t               result = TRUE;
 890         thread_call_group_t     group;
 891         spl_t                   s;
 892
 893         group = thread_call_get_group(call);
 894
 895         s = splsched();
 896         thread_call_lock_spin();
 897
 898         result = _delayed_call_enqueue(call, group, deadline);
 899
 900         if (queue_first(&group->delayed_queue) == qe(call))
 901                 _set_delayed_call_timer(call, group);
 902
 903         call->tc_call.param1 = param1;
 904
 905         thread_call_unlock();
 906         splx(s);
 907
 908         return (result);
 909 }
 910
 911 /*
 912  *      thread_call_cancel:
 913  *
 914  *      Dequeue a callout entry.
 915  *
 916  *      Returns TRUE if the call was
 917  *      on a queue.
 918  */
 919 boolean_t
 920 thread_call_cancel(
 921                 thread_call_t           call)
 922 {
 923         boolean_t               result;
 924         thread_call_group_t     group;
 925         spl_t                   s;
 926
 927         group = thread_call_get_group(call);
 928
 929         s = splsched();
 930         thread_call_lock_spin();
 931
 932         result = _call_dequeue(call, group);
 933
 934         thread_call_unlock();
 935         splx(s);
 936
 937         return (result);
 938 }
 939
 940 /*
 941  * Cancel a thread call.  If it cannot be cancelled (i.e.
 942  * is already in flight), waits for the most recent invocation
 943  * to finish.  Note that if clients re-submit this thread call,
 944  * it may still be pending or in flight when thread_call_cancel_wait
 945  * returns, but all requests to execute this work item prior
 946  * to the call to thread_call_cancel_wait will have finished.
 947  */
 948 boolean_t
 949 thread_call_cancel_wait(
 950                 thread_call_t           call)
 951 {
 952         boolean_t               result;
 953         thread_call_group_t     group;
 954
 955         if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
 956                 panic("%s: Can't wait on thread call whose storage I don't own.", __FUNCTION__);
 957         }
 958
 959         group = thread_call_get_group(call);
 960
 961         (void) splsched();
 962         thread_call_lock_spin();
 963
 964         result = _call_dequeue(call, group);
 965         if (result == FALSE) {
 966                 thread_call_wait_locked(call);
 967         }
 968
 969         thread_call_unlock();
 970         (void) spllo();
 971
 972         return result;
 973 }
 974
 975
 976 #ifndef __LP64__
 977
 978 /*
 979  *      thread_call_is_delayed:
 980  *
 981  *      Returns TRUE if the call is
 982  *      currently on a delayed queue.
 983  *
 984  *      Optionally returns the expiration time.
 985  */
 986 boolean_t
 987 thread_call_is_delayed(
 988         thread_call_t           call,
 989         uint64_t                        *deadline)
 990 {
 991         boolean_t                       result = FALSE;
 992         thread_call_group_t             group;
 993         spl_t                           s;
 994
 995         group = thread_call_get_group(call);
 996
 997         s = splsched();
 998         thread_call_lock_spin();
 999
1000         if (call->tc_call.queue == &group->delayed_queue) {
1001                 if (deadline != NULL)
1002                         *deadline = call->tc_call.deadline;
1003                 result = TRUE;
1004         }
1005
1006         thread_call_unlock();
1007         splx(s);
1008
1009         return (result);
1010 }
1011
1012 #endif  /* __LP64__ */
1013
1014 /*
1015  *      thread_call_wake:
1016  *
1017  *      Wake a call thread to service
1018  *      pending call entries.  May wake
1019  *      the daemon thread in order to
1020  *      create additional call threads.
1021  *
1022  *      Called with thread_call_lock held.
1023  *
1024  *      For high-priority group, only does wakeup/creation if there are no threads
1025  *      running.
1026  */
1027 static __inline__ void
1028 thread_call_wake(
1029         thread_call_group_t             group)
1030 {
1031         /*
1032          * New behavior: use threads if you've got 'em.
1033          * Traditional behavior: wake only if no threads running.
1034          */
1035         if (group_isparallel(group) || group->active_count == 0) {
1036                 if (wait_queue_wakeup_one(&group->idle_wqueue, NO_EVENT, THREAD_AWAKENED, -1) == KERN_SUCCESS) {
1037                         group->idle_count--; group->active_count++;
1038
1039                         if (group->idle_count == 0) {
1040                                 timer_call_cancel(&group->dealloc_timer);
1041                                 group->flags &= TCG_DEALLOC_ACTIVE;
1042                         }
1043                 } else {
1044                         if (!thread_call_daemon_awake && thread_call_group_should_add_thread(group)) {
1045                                 thread_call_daemon_awake = TRUE;
1046                                 wait_queue_wakeup_one(&daemon_wqueue, NO_EVENT, THREAD_AWAKENED, -1);
1047                         }
1048                 }
1049         }
1050 }
1051
1052 /*
1053  *      sched_call_thread:
1054  *
1055  *      Call out invoked by the scheduler.  Used only for high-priority
1056  *      thread call group.
1057  */
1058 static void
1059 sched_call_thread(
1060                 int                             type,
1061                 __unused        thread_t                thread)
1062 {
1063         thread_call_group_t             group;
1064
1065         group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH]; /* XXX */
1066
1067         thread_call_lock_spin();
1068
1069         switch (type) {
1070
1071                 case SCHED_CALL_BLOCK:
1072                         --group->active_count;
1073                         if (group->pending_count > 0)
1074                                 thread_call_wake(group);
1075                         break;
1076
1077                 case SCHED_CALL_UNBLOCK:
1078                         group->active_count++;
1079                         break;
1080         }
1081
1082         thread_call_unlock();
1083 }
1084
1085 /*
1086  * Interrupts disabled, lock held; returns the same way.
1087  * Only called on thread calls whose storage we own.  Wakes up
1088  * anyone who might be waiting on this work item and frees it
1089  * if the client has so requested.
1090  */
1091 static void
1092 thread_call_finish(thread_call_t call)
1093 {
1094         boolean_t dowake = FALSE;
1095
1096         call->tc_finish_count++;
1097         call->tc_refs--;
1098
1099         if ((call->tc_flags & THREAD_CALL_WAIT) != 0) {
1100                 dowake = TRUE;
1101                 call->tc_flags &= ~THREAD_CALL_WAIT;
1102
1103                 /*
1104                  * Dropping lock here because the sched call for the
1105                  * high-pri group can take the big lock from under
1106                  * a thread lock.
1107                  */
1108                 thread_call_unlock();
1109                 thread_wakeup((event_t)call);
1110                 thread_call_lock_spin();
1111         }
1112
1113         if (call->tc_refs == 0) {
1114                 if (dowake) {
1115                         panic("Someone waiting on a thread call that is scheduled for free: %p\n", call->tc_call.func);
1116                 }
1117
1118                 enable_ints_and_unlock();
1119
1120                 zfree(thread_call_zone, call);
1121
1122                 (void)disable_ints_and_lock();
1123         }
1124
1125 }
1126
1127 /*
1128  *      thread_call_thread:
1129  */
1130 static void
1131 thread_call_thread(
1132                 thread_call_group_t             group,
1133                 wait_result_t                   wres)
1134 {
1135         thread_t        self = current_thread();
1136         boolean_t       canwait;
1137
1138         /*
1139          * A wakeup with THREAD_INTERRUPTED indicates that
1140          * we should terminate.
1141          */
1142         if (wres == THREAD_INTERRUPTED) {
1143                 thread_terminate(self);
1144
1145                 /* NOTREACHED */
1146                 panic("thread_terminate() returned?");
1147         }
1148
1149         (void)disable_ints_and_lock();
1150
1151         thread_sched_call(self, group->sched_call);
1152
1153         while (group->pending_count > 0) {
1154                 thread_call_t                   call;
1155                 thread_call_func_t              func;
1156                 thread_call_param_t             param0, param1;
1157
1158                 call = TC(dequeue_head(&group->pending_queue));
1159                 group->pending_count--;
1160
1161                 func = call->tc_call.func;
1162                 param0 = call->tc_call.param0;
1163                 param1 = call->tc_call.param1;
1164
1165                 call->tc_call.queue = NULL;
1166
1167                 _internal_call_release(call);
1168
1169                 /*
1170                  * Can only do wakeups for thread calls whose storage
1171                  * we control.
1172                  */
1173                 if ((call->tc_flags & THREAD_CALL_ALLOC) != 0) {
1174                         canwait = TRUE;
1175                         call->tc_refs++;        /* Delay free until we're done */
1176                 } else
1177                         canwait = FALSE;
1178
1179                 enable_ints_and_unlock();
1180
1181                 KERNEL_DEBUG_CONSTANT(
1182                                 MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_NONE,
1183                                 VM_KERNEL_UNSLIDE(func), param0, param1, 0, 0);
1184
1185                 (*func)(param0, param1);
1186
1187                 if (get_preemption_level() != 0) {
1188                         int pl = get_preemption_level();
1189                         panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
1190                                         pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
1191                 }
1192
1193                 (void)thread_funnel_set(self->funnel_lock, FALSE);              /* XXX */
1194
1195                 (void) disable_ints_and_lock();
1196
1197                 if (canwait) {
1198                         /* Frees if so desired */
1199                         thread_call_finish(call);
1200                 }
1201         }
1202
1203         thread_sched_call(self, NULL);
1204         group->active_count--;
1205
1206         if (group_isparallel(group)) {
1207                 /*
1208                  * For new style of thread group, thread always blocks.
1209                  * If we have more than the target number of threads,
1210                  * and this is the first to block, and it isn't active
1211                  * already, set a timer for deallocating a thread if we
1212                  * continue to have a surplus.
1213                  */
1214                 group->idle_count++;
1215
1216                 if (group->idle_count == 1) {
1217                         group->idle_timestamp = mach_absolute_time();
1218                 }
1219
1220                 if (((group->flags & TCG_DEALLOC_ACTIVE) == 0) &&
1221                                 ((group->active_count + group->idle_count) > group->target_thread_count)) {
1222                         group->flags |= TCG_DEALLOC_ACTIVE;
1223                         thread_call_start_deallocate_timer(group);
1224                 }
1225
1226                 /* Wait for more work (or termination) */
1227                 wres = wait_queue_assert_wait(&group->idle_wqueue, NO_EVENT, THREAD_INTERRUPTIBLE, 0);
1228                 if (wres != THREAD_WAITING) {
1229                         panic("kcall worker unable to assert wait?");
1230                 }
1231
1232                 enable_ints_and_unlock();
1233
1234                 thread_block_parameter((thread_continue_t)thread_call_thread, group);
1235         } else {
1236                 if (group->idle_count < group->target_thread_count) {
1237                         group->idle_count++;
1238
1239                         wait_queue_assert_wait(&group->idle_wqueue, NO_EVENT, THREAD_UNINT, 0); /* Interrupted means to exit */
1240
1241                         enable_ints_and_unlock();
1242
1243                         thread_block_parameter((thread_continue_t)thread_call_thread, group);
1244                         /* NOTREACHED */
1245                 }
1246         }
1247
1248         enable_ints_and_unlock();
1249
1250         thread_terminate(self);
1251         /* NOTREACHED */
1252 }
1253
1254 /*
1255  *      thread_call_daemon: walk list of groups, allocating
1256  *      threads if appropriate (as determined by
1257  *      thread_call_group_should_add_thread()).
1258  */
1259 static void
1260 thread_call_daemon_continue(__unused void *arg)
1261 {
1262         int             i;
1263         kern_return_t   kr;
1264         thread_call_group_t group;
1265
1266         (void)disable_ints_and_lock();
1267
1268         /* Starting at zero happens to be high-priority first. */
1269         for (i = 0; i < THREAD_CALL_GROUP_COUNT; i++) {
1270                 group = &thread_call_groups[i];
1271                 while (thread_call_group_should_add_thread(group)) {
1272                         group->active_count++;
1273
1274                         enable_ints_and_unlock();
1275
1276                         kr = thread_call_thread_create(group);
1277                         if (kr != KERN_SUCCESS) {
1278                                 /*
1279                                  * On failure, just pause for a moment and give up.
1280                                  * We can try again later.
1281                                  */
1282                                 delay(10000); /* 10 ms */
1283                                 (void)disable_ints_and_lock();
1284                                 goto out;
1285                         }
1286
1287                         (void)disable_ints_and_lock();
1288                 }
1289         }
1290
1291 out:
1292         thread_call_daemon_awake = FALSE;
1293         wait_queue_assert_wait(&daemon_wqueue, NO_EVENT, THREAD_UNINT, 0);
1294
1295         enable_ints_and_unlock();
1296
1297         thread_block_parameter((thread_continue_t)thread_call_daemon_continue, NULL);
1298         /* NOTREACHED */
1299 }
1300
1301 static void
1302 thread_call_daemon(
1303                 __unused void    *arg)
1304 {
1305         thread_t        self = current_thread();
1306
1307         self->options |= TH_OPT_VMPRIV;
1308         vm_page_free_reserve(2);        /* XXX */
1309
1310         thread_call_daemon_continue(NULL);
1311         /* NOTREACHED */
1312 }
1313
1314 /*
1315  * Schedule timer to deallocate a worker thread if we have a surplus
1316  * of threads (in excess of the group's target) and at least one thread
1317  * is idle the whole time.
1318  */
1319 static void
1320 thread_call_start_deallocate_timer(
1321                 thread_call_group_t group)
1322 {
1323         uint64_t deadline;
1324         boolean_t onqueue;
1325
1326         assert(group->idle_count > 0);
1327
1328         group->flags |= TCG_DEALLOC_ACTIVE;
1329         deadline = group->idle_timestamp + thread_call_dealloc_interval_abs;
1330         onqueue = timer_call_enter(&group->dealloc_timer, deadline, 0);
1331
1332         if (onqueue) {
1333                 panic("Deallocate timer already active?");
1334         }
1335 }
1336
1337 void
1338 thread_call_delayed_timer(
1339                 timer_call_param_t              p0,
1340                 __unused timer_call_param_t     p1
1341 )
1342 {
1343         thread_call_t                   call;
1344         thread_call_group_t             group = p0;
1345         uint64_t                                timestamp;
1346
1347         thread_call_lock_spin();
1348
1349         timestamp = mach_absolute_time();
1350
1351         call = TC(queue_first(&group->delayed_queue));
1352
1353         while (!queue_end(&group->delayed_queue, qe(call))) {
1354                 if (call->tc_call.deadline <= timestamp) {
1355                         _pending_call_enqueue(call, group);
1356                 }
1357                 else
1358                         break;
1359
1360                 call = TC(queue_first(&group->delayed_queue));
1361         }
1362
1363         if (!queue_end(&group->delayed_queue, qe(call)))
1364                 _set_delayed_call_timer(call, group);
1365
1366         thread_call_unlock();
1367 }
1368
1369 /*
1370  * Timer callback to tell a thread to terminate if
1371  * we have an excess of threads and at least one has been
1372  * idle for a long time.
1373  */
1374 static void
1375 thread_call_dealloc_timer(
1376                 timer_call_param_t              p0,
1377                 __unused timer_call_param_t     p1)
1378 {
1379         thread_call_group_t group = (thread_call_group_t)p0;
1380         uint64_t now;
1381         kern_return_t res;
1382         boolean_t terminated = FALSE;
1383
1384         thread_call_lock_spin();
1385
1386         now = mach_absolute_time();
1387         if (group->idle_count > 0) {
1388                 if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
1389                         terminated = TRUE;
1390                         group->idle_count--;
1391                         res = wait_queue_wakeup_one(&group->idle_wqueue, NO_EVENT, THREAD_INTERRUPTED, -1);
1392                         if (res != KERN_SUCCESS) {
1393                                 panic("Unable to wake up idle thread for termination?");
1394                         }
1395                 }
1396
1397         }
1398
1399         /*
1400          * If we still have an excess of threads, schedule another
1401          * invocation of this function.
1402          */
1403         if (group->idle_count > 0 && (group->idle_count + group->active_count > group->target_thread_count)) {
1404                 /*
1405                  * If we killed someone just now, push out the
1406                  * next deadline.
1407                  */
1408                 if (terminated) {
1409                         group->idle_timestamp = now;
1410                 }
1411
1412                 thread_call_start_deallocate_timer(group);
1413         } else {
1414                 group->flags &= ~TCG_DEALLOC_ACTIVE;
1415         }
1416
1417         thread_call_unlock();
1418 }
1419
1420 /*
1421  * Wait for all requested invocations of a thread call prior to now
1422  * to finish.  Can only be invoked on thread calls whose storage we manage.
1423  * Just waits for the finish count to catch up to the submit count we find
1424  * at the beginning of our wait.
1425  */
1426 static void
1427 thread_call_wait_locked(thread_call_t call)
1428 {
1429         uint64_t submit_count;
1430         wait_result_t res;
1431
1432         assert(call->tc_flags & THREAD_CALL_ALLOC);
1433
1434         submit_count = call->tc_submit_count;
1435
1436         while (call->tc_finish_count < submit_count) {
1437                 call->tc_flags |= THREAD_CALL_WAIT;
1438
1439                 res = assert_wait(call, THREAD_UNINT);
1440                 if (res != THREAD_WAITING) {
1441                         panic("Unable to assert wait?");
1442                 }
1443
1444                 thread_call_unlock();
1445                 (void) spllo();
1446
1447                 res = thread_block(NULL);
1448                 if (res != THREAD_AWAKENED) {
1449                         panic("Awoken with %d?", res);
1450                 }
1451
1452                 (void) splsched();
1453                 thread_call_lock_spin();
1454         }
1455 }
1456
1457 /*
1458  * Determine whether a thread call is either on a queue or
1459  * currently being executed.
1460  */
1461 boolean_t
1462 thread_call_isactive(thread_call_t call)
1463 {
1464         boolean_t active;
1465
1466         disable_ints_and_lock();
1467         active = (call->tc_submit_count > call->tc_finish_count);
1468         enable_ints_and_unlock();
1469
1470         return active;
1471 }
1472