bsd/pthread/pthread_workqueue.c

   1 /*
   2  * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995-2018 Apple, Inc. All Rights Reserved */
  29
  30 #include <sys/cdefs.h>
  31
  32 // <rdar://problem/26158937> panic() should be marked noreturn
  33 extern void panic(const char *string, ...) __printflike(1,2) __dead2;
  34
  35 #include <kern/assert.h>
  36 #include <kern/ast.h>
  37 #include <kern/clock.h>
  38 #include <kern/cpu_data.h>
  39 #include <kern/kern_types.h>
  40 #include <kern/policy_internal.h>
  41 #include <kern/processor.h>
  42 #include <kern/sched_prim.h>    /* for thread_exception_return */
  43 #include <kern/task.h>
  44 #include <kern/thread.h>
  45 #include <kern/zalloc.h>
  46 #include <mach/kern_return.h>
  47 #include <mach/mach_param.h>
  48 #include <mach/mach_port.h>
  49 #include <mach/mach_types.h>
  50 #include <mach/mach_vm.h>
  51 #include <mach/sync_policy.h>
  52 #include <mach/task.h>
  53 #include <mach/thread_act.h> /* for thread_resume */
  54 #include <mach/thread_policy.h>
  55 #include <mach/thread_status.h>
  56 #include <mach/vm_prot.h>
  57 #include <mach/vm_statistics.h>
  58 #include <machine/atomic.h>
  59 #include <machine/machine_routines.h>
  60 #include <vm/vm_map.h>
  61 #include <vm/vm_protos.h>
  62
  63 #include <sys/eventvar.h>
  64 #include <sys/kdebug.h>
  65 #include <sys/kernel.h>
  66 #include <sys/lock.h>
  67 #include <sys/param.h>
  68 #include <sys/proc_info.h>      /* for fill_procworkqueue */
  69 #include <sys/proc_internal.h>
  70 #include <sys/pthread_shims.h>
  71 #include <sys/resourcevar.h>
  72 #include <sys/signalvar.h>
  73 #include <sys/sysctl.h>
  74 #include <sys/sysproto.h>
  75 #include <sys/systm.h>
  76 #include <sys/ulock.h> /* for ulock_owner_value_to_port_name */
  77
  78 #include <pthread/bsdthread_private.h>
  79 #include <pthread/workqueue_syscalls.h>
  80 #include <pthread/workqueue_internal.h>
  81 #include <pthread/workqueue_trace.h>
  82
  83 #include <os/log.h>
  84
  85 extern thread_t port_name_to_thread(mach_port_name_t port_name); /* osfmk/kern/ipc_tt.h   */
  86
  87 static void workq_unpark_continue(void *uth, wait_result_t wr) __dead2;
  88 static void workq_schedule_creator(proc_t p, struct workqueue *wq, int flags);
  89
  90 static bool workq_threadreq_admissible(struct workqueue *wq, struct uthread *uth,
  91                 workq_threadreq_t req);
  92
  93 static uint32_t workq_constrained_allowance(struct workqueue *wq,
  94                 thread_qos_t at_qos, struct uthread *uth, bool may_start_timer);
  95
  96 static bool workq_thread_is_busy(uint64_t cur_ts,
  97                 _Atomic uint64_t *lastblocked_tsp);
  98
  99 static int workq_sysctl_handle_usecs SYSCTL_HANDLER_ARGS;
 100
 101 #pragma mark globals
 102
 103 struct workq_usec_var {
 104         uint32_t usecs;
 105         uint64_t abstime;
 106 };
 107
 108 #define WORKQ_SYSCTL_USECS(var, init) \
 109                 static struct workq_usec_var var = { .usecs = init }; \
 110                 SYSCTL_OID(_kern, OID_AUTO, var##_usecs, \
 111                                 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &var, 0, \
 112                                 workq_sysctl_handle_usecs, "I", "")
 113
 114 static lck_grp_t      *workq_lck_grp;
 115 static lck_attr_t     *workq_lck_attr;
 116 static lck_grp_attr_t *workq_lck_grp_attr;
 117 os_refgrp_decl(static, workq_refgrp, "workq", NULL);
 118
 119 static zone_t workq_zone_workqueue;
 120 static zone_t workq_zone_threadreq;
 121
 122 WORKQ_SYSCTL_USECS(wq_stalled_window,     WQ_STALLED_WINDOW_USECS);
 123 WORKQ_SYSCTL_USECS(wq_reduce_pool_window, WQ_REDUCE_POOL_WINDOW_USECS);
 124 WORKQ_SYSCTL_USECS(wq_max_timer_interval, WQ_MAX_TIMER_INTERVAL_USECS);
 125 static uint32_t wq_max_threads              = WORKQUEUE_MAXTHREADS;
 126 static uint32_t wq_max_constrained_threads  = WORKQUEUE_MAXTHREADS / 8;
 127 static uint32_t wq_init_constrained_limit   = 1;
 128 static uint16_t wq_death_max_load;
 129 static uint32_t wq_max_parallelism[WORKQ_NUM_QOS_BUCKETS];
 130
 131 #pragma mark sysctls
 132
 133 static int
 134 workq_sysctl_handle_usecs SYSCTL_HANDLER_ARGS
 135 {
 136 #pragma unused(arg2)
 137         struct workq_usec_var *v = arg1;
 138         int error = sysctl_handle_int(oidp, &v->usecs, 0, req);
 139         if (error || !req->newptr)
 140                 return error;
 141         clock_interval_to_absolutetime_interval(v->usecs, NSEC_PER_USEC,
 142                         &v->abstime);
 143         return 0;
 144 }
 145
 146 SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
 147                 &wq_max_threads, 0, "");
 148
 149 SYSCTL_INT(_kern, OID_AUTO, wq_max_constrained_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
 150                 &wq_max_constrained_threads, 0, "");
 151
 152 #pragma mark p_wqptr
 153
 154 #define WQPTR_IS_INITING_VALUE ((struct workqueue *)~(uintptr_t)0)
 155
 156 static struct workqueue *
 157 proc_get_wqptr_fast(struct proc *p)
 158 {
 159         return os_atomic_load(&p->p_wqptr, relaxed);
 160 }
 161
 162 static struct workqueue *
 163 proc_get_wqptr(struct proc *p)
 164 {
 165         struct workqueue *wq = proc_get_wqptr_fast(p);
 166         return wq == WQPTR_IS_INITING_VALUE ? NULL : wq;
 167 }
 168
 169 static void
 170 proc_set_wqptr(struct proc *p, struct workqueue *wq)
 171 {
 172         wq = os_atomic_xchg(&p->p_wqptr, wq, release);
 173         if (wq == WQPTR_IS_INITING_VALUE) {
 174                 proc_lock(p);
 175                 thread_wakeup(&p->p_wqptr);
 176                 proc_unlock(p);
 177         }
 178 }
 179
 180 static bool
 181 proc_init_wqptr_or_wait(struct proc *p)
 182 {
 183         struct workqueue *wq;
 184
 185         proc_lock(p);
 186         wq = p->p_wqptr;
 187
 188         if (wq == NULL) {
 189                 p->p_wqptr = WQPTR_IS_INITING_VALUE;
 190                 proc_unlock(p);
 191                 return true;
 192         }
 193
 194         if (wq == WQPTR_IS_INITING_VALUE) {
 195                 assert_wait(&p->p_wqptr, THREAD_UNINT);
 196                 proc_unlock(p);
 197                 thread_block(THREAD_CONTINUE_NULL);
 198         } else {
 199                 proc_unlock(p);
 200         }
 201         return false;
 202 }
 203
 204 static inline event_t
 205 workq_parked_wait_event(struct uthread *uth)
 206 {
 207         return (event_t)&uth->uu_workq_stackaddr;
 208 }
 209
 210 static inline void
 211 workq_thread_wakeup(struct uthread *uth)
 212 {
 213         if ((uth->uu_workq_flags & UT_WORKQ_IDLE_CLEANUP) == 0) {
 214                 thread_wakeup_thread(workq_parked_wait_event(uth), uth->uu_thread);
 215         }
 216 }
 217
 218 #pragma mark wq_thactive
 219
 220 #if defined(__LP64__)
 221 // Layout is:
 222 //   127 - 115 : 13 bits of zeroes
 223 //   114 - 112 : best QoS among all pending constrained requests
 224 //   111 -   0 : MGR, AUI, UI, IN, DF, UT, BG+MT buckets every 16 bits
 225 #define WQ_THACTIVE_BUCKET_WIDTH 16
 226 #define WQ_THACTIVE_QOS_SHIFT    (7 * WQ_THACTIVE_BUCKET_WIDTH)
 227 #else
 228 // Layout is:
 229 //   63 - 61 : best QoS among all pending constrained requests
 230 //   60      : Manager bucket (0 or 1)
 231 //   59 -  0 : AUI, UI, IN, DF, UT, BG+MT buckets every 10 bits
 232 #define WQ_THACTIVE_BUCKET_WIDTH 10
 233 #define WQ_THACTIVE_QOS_SHIFT    (6 * WQ_THACTIVE_BUCKET_WIDTH + 1)
 234 #endif
 235 #define WQ_THACTIVE_BUCKET_MASK  ((1U << WQ_THACTIVE_BUCKET_WIDTH) - 1)
 236 #define WQ_THACTIVE_BUCKET_HALF  (1U << (WQ_THACTIVE_BUCKET_WIDTH - 1))
 237
 238 static_assert(sizeof(wq_thactive_t) * CHAR_BIT - WQ_THACTIVE_QOS_SHIFT >= 3,
 239                 "Make sure we have space to encode a QoS");
 240
 241 static inline wq_thactive_t
 242 _wq_thactive(struct workqueue *wq)
 243 {
 244         return os_atomic_load(&wq->wq_thactive, relaxed);
 245 }
 246
 247 static inline int
 248 _wq_bucket(thread_qos_t qos)
 249 {
 250         // Map both BG and MT to the same bucket by over-shifting down and
 251         // clamping MT and BG together.
 252         switch (qos) {
 253         case THREAD_QOS_MAINTENANCE:
 254                 return 0;
 255         default:
 256                 return qos - 2;
 257         }
 258 }
 259
 260 #define WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(tha) \
 261                 ((tha) >> WQ_THACTIVE_QOS_SHIFT)
 262
 263 static inline thread_qos_t
 264 _wq_thactive_best_constrained_req_qos(struct workqueue *wq)
 265 {
 266         // Avoid expensive atomic operations: the three bits we're loading are in
 267         // a single byte, and always updated under the workqueue lock
 268         wq_thactive_t v = *(wq_thactive_t *)&wq->wq_thactive;
 269         return WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(v);
 270 }
 271
 272 static void
 273 _wq_thactive_refresh_best_constrained_req_qos(struct workqueue *wq)
 274 {
 275         thread_qos_t old_qos, new_qos;
 276         workq_threadreq_t req;
 277
 278         req = priority_queue_max(&wq->wq_constrained_queue,
 279                         struct workq_threadreq_s, tr_entry);
 280         new_qos = req ? req->tr_qos : THREAD_QOS_UNSPECIFIED;
 281         old_qos = _wq_thactive_best_constrained_req_qos(wq);
 282         if (old_qos != new_qos) {
 283                 long delta = (long)new_qos - (long)old_qos;
 284                 wq_thactive_t v = (wq_thactive_t)delta << WQ_THACTIVE_QOS_SHIFT;
 285                 /*
 286                  * We can do an atomic add relative to the initial load because updates
 287                  * to this qos are always serialized under the workqueue lock.
 288                  */
 289                 v = os_atomic_add(&wq->wq_thactive, v, relaxed);
 290 #ifdef __LP64__
 291                 WQ_TRACE_WQ(TRACE_wq_thactive_update, wq, (uint64_t)v,
 292                                 (uint64_t)(v >> 64), 0, 0);
 293 #else
 294                 WQ_TRACE_WQ(TRACE_wq_thactive_update, wq, v, 0, 0, 0);
 295 #endif
 296         }
 297 }
 298
 299 static inline wq_thactive_t
 300 _wq_thactive_offset_for_qos(thread_qos_t qos)
 301 {
 302         return (wq_thactive_t)1 << (_wq_bucket(qos) * WQ_THACTIVE_BUCKET_WIDTH);
 303 }
 304
 305 static inline wq_thactive_t
 306 _wq_thactive_inc(struct workqueue *wq, thread_qos_t qos)
 307 {
 308         wq_thactive_t v = _wq_thactive_offset_for_qos(qos);
 309         return os_atomic_add_orig(&wq->wq_thactive, v, relaxed);
 310 }
 311
 312 static inline wq_thactive_t
 313 _wq_thactive_dec(struct workqueue *wq, thread_qos_t qos)
 314 {
 315         wq_thactive_t v = _wq_thactive_offset_for_qos(qos);
 316         return os_atomic_sub_orig(&wq->wq_thactive, v, relaxed);
 317 }
 318
 319 static inline void
 320 _wq_thactive_move(struct workqueue *wq,
 321                 thread_qos_t old_qos, thread_qos_t new_qos)
 322 {
 323         wq_thactive_t v = _wq_thactive_offset_for_qos(new_qos) -
 324                         _wq_thactive_offset_for_qos(old_qos);
 325         os_atomic_add_orig(&wq->wq_thactive, v, relaxed);
 326         wq->wq_thscheduled_count[_wq_bucket(old_qos)]--;
 327         wq->wq_thscheduled_count[_wq_bucket(new_qos)]++;
 328 }
 329
 330 static inline uint32_t
 331 _wq_thactive_aggregate_downto_qos(struct workqueue *wq, wq_thactive_t v,
 332                 thread_qos_t qos, uint32_t *busycount, uint32_t *max_busycount)
 333 {
 334         uint32_t count = 0, active;
 335         uint64_t curtime;
 336
 337         assert(WORKQ_THREAD_QOS_MIN <= qos && qos <= WORKQ_THREAD_QOS_MAX);
 338
 339         if (busycount) {
 340                 curtime = mach_absolute_time();
 341                 *busycount = 0;
 342         }
 343         if (max_busycount) {
 344                 *max_busycount = THREAD_QOS_LAST - qos;
 345         }
 346
 347         int i = _wq_bucket(qos);
 348         v >>= i * WQ_THACTIVE_BUCKET_WIDTH;
 349         for (; i < WORKQ_NUM_QOS_BUCKETS; i++, v >>= WQ_THACTIVE_BUCKET_WIDTH) {
 350                 active = v & WQ_THACTIVE_BUCKET_MASK;
 351                 count += active;
 352
 353                 if (busycount && wq->wq_thscheduled_count[i] > active) {
 354                         if (workq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i])) {
 355                                 /*
 356                                  * We only consider the last blocked thread for a given bucket
 357                                  * as busy because we don't want to take the list lock in each
 358                                  * sched callback. However this is an approximation that could
 359                                  * contribute to thread creation storms.
 360                                  */
 361                                 (*busycount)++;
 362                         }
 363                 }
 364         }
 365
 366         return count;
 367 }
 368
 369 #pragma mark wq_flags
 370
 371 static inline uint32_t
 372 _wq_flags(struct workqueue *wq)
 373 {
 374         return os_atomic_load(&wq->wq_flags, relaxed);
 375 }
 376
 377 static inline bool
 378 _wq_exiting(struct workqueue *wq)
 379 {
 380         return _wq_flags(wq) & WQ_EXITING;
 381 }
 382
 383 bool
 384 workq_is_exiting(struct proc *p)
 385 {
 386         struct workqueue *wq = proc_get_wqptr(p);
 387         return !wq || _wq_exiting(wq);
 388 }
 389
 390 struct turnstile *
 391 workq_turnstile(struct proc *p)
 392 {
 393         struct workqueue *wq = proc_get_wqptr(p);
 394         return wq ? wq->wq_turnstile : TURNSTILE_NULL;
 395 }
 396
 397 #pragma mark workqueue lock
 398
 399 static bool
 400 workq_lock_spin_is_acquired_kdp(struct workqueue *wq)
 401 {
 402         return kdp_lck_spin_is_acquired(&wq->wq_lock);
 403 }
 404
 405 static inline void
 406 workq_lock_spin(struct workqueue *wq)
 407 {
 408         lck_spin_lock(&wq->wq_lock);
 409 }
 410
 411 static inline void
 412 workq_lock_held(__assert_only struct workqueue *wq)
 413 {
 414         LCK_SPIN_ASSERT(&wq->wq_lock, LCK_ASSERT_OWNED);
 415 }
 416
 417 static inline bool
 418 workq_lock_try(struct workqueue *wq)
 419 {
 420         return lck_spin_try_lock(&wq->wq_lock);
 421 }
 422
 423 static inline void
 424 workq_unlock(struct workqueue *wq)
 425 {
 426         lck_spin_unlock(&wq->wq_lock);
 427 }
 428
 429 #pragma mark idle thread lists
 430
 431 #define WORKQ_POLICY_INIT(qos) \
 432                 (struct uu_workq_policy){ .qos_req = qos, .qos_bucket = qos }
 433
 434 static inline thread_qos_t
 435 workq_pri_bucket(struct uu_workq_policy req)
 436 {
 437         return MAX(MAX(req.qos_req, req.qos_max), req.qos_override);
 438 }
 439
 440 static inline thread_qos_t
 441 workq_pri_override(struct uu_workq_policy req)
 442 {
 443         return MAX(workq_pri_bucket(req), req.qos_bucket);
 444 }
 445
 446 static inline bool
 447 workq_thread_needs_params_change(workq_threadreq_t req, struct uthread *uth)
 448 {
 449         workq_threadreq_param_t cur_trp, req_trp = { };
 450
 451         cur_trp.trp_value = uth->uu_save.uus_workq_park_data.workloop_params;
 452         if (req->tr_flags & TR_FLAG_WL_PARAMS) {
 453                 req_trp = kqueue_threadreq_workloop_param(req);
 454         }
 455
 456         /*
 457          * CPU percent flags are handled separately to policy changes, so ignore
 458          * them for all of these checks.
 459          */
 460         uint16_t cur_flags = (cur_trp.trp_flags & ~TRP_CPUPERCENT);
 461         uint16_t req_flags = (req_trp.trp_flags & ~TRP_CPUPERCENT);
 462
 463         if (!req_flags && !cur_flags) {
 464                 return false;
 465         }
 466
 467         if (req_flags != cur_flags) {
 468                 return true;
 469         }
 470
 471         if ((req_flags & TRP_PRIORITY) && req_trp.trp_pri != cur_trp.trp_pri) {
 472                 return true;
 473         }
 474
 475         if ((req_flags & TRP_POLICY) && cur_trp.trp_pol != cur_trp.trp_pol) {
 476                 return true;
 477         }
 478
 479         return false;
 480 }
 481
 482 static inline bool
 483 workq_thread_needs_priority_change(workq_threadreq_t req, struct uthread *uth)
 484 {
 485         if (workq_thread_needs_params_change(req, uth)) {
 486                 return true;
 487         }
 488
 489         return req->tr_qos != workq_pri_override(uth->uu_workq_pri);
 490 }
 491
 492 static void
 493 workq_thread_update_bucket(proc_t p, struct workqueue *wq, struct uthread *uth,
 494                 struct uu_workq_policy old_pri, struct uu_workq_policy new_pri,
 495                 bool force_run)
 496 {
 497         thread_qos_t old_bucket = old_pri.qos_bucket;
 498         thread_qos_t new_bucket = workq_pri_bucket(new_pri);
 499
 500         if (old_bucket != new_bucket) {
 501                 _wq_thactive_move(wq, old_bucket, new_bucket);
 502         }
 503
 504         new_pri.qos_bucket = new_bucket;
 505         uth->uu_workq_pri = new_pri;
 506
 507         if (workq_pri_override(old_pri) != new_bucket) {
 508                 thread_set_workq_override(uth->uu_thread, new_bucket);
 509         }
 510
 511         if (wq->wq_reqcount && (old_bucket > new_bucket || force_run)) {
 512                 int flags = WORKQ_THREADREQ_CAN_CREATE_THREADS;
 513                 if (old_bucket > new_bucket) {
 514                         /*
 515                          * When lowering our bucket, we may unblock a thread request,
 516                          * but we can't drop our priority before we have evaluated
 517                          * whether this is the case, and if we ever drop the workqueue lock
 518                          * that would cause a priority inversion.
 519                          *
 520                          * We hence have to disallow thread creation in that case.
 521                          */
 522                         flags = 0;
 523                 }
 524                 workq_schedule_creator(p, wq, flags);
 525         }
 526 }
 527
 528 /*
 529  * Sets/resets the cpu percent limits on the current thread. We can't set
 530  * these limits from outside of the current thread, so this function needs
 531  * to be called when we're executing on the intended
 532  */
 533 static void
 534 workq_thread_reset_cpupercent(workq_threadreq_t req, struct uthread *uth)
 535 {
 536         assert(uth == current_uthread());
 537         workq_threadreq_param_t trp = { };
 538
 539         if (req && (req->tr_flags & TR_FLAG_WL_PARAMS)) {
 540                 trp = kqueue_threadreq_workloop_param(req);
 541         }
 542
 543         if (uth->uu_workq_flags & UT_WORKQ_CPUPERCENT) {
 544                 /*
 545                  * Going through disable when we have an existing CPU percent limit
 546                  * set will force the ledger to refill the token bucket of the current
 547                  * thread. Removing any penalty applied by previous thread use.
 548                  */
 549                 thread_set_cpulimit(THREAD_CPULIMIT_DISABLE, 0, 0);
 550                 uth->uu_workq_flags &= ~UT_WORKQ_CPUPERCENT;
 551         }
 552
 553         if (trp.trp_flags & TRP_CPUPERCENT) {
 554                 thread_set_cpulimit(THREAD_CPULIMIT_BLOCK, trp.trp_cpupercent,
 555                                 (uint64_t)trp.trp_refillms * NSEC_PER_SEC);
 556                 uth->uu_workq_flags |= UT_WORKQ_CPUPERCENT;
 557         }
 558 }
 559
 560 static void
 561 workq_thread_reset_pri(struct workqueue *wq, struct uthread *uth,
 562                 workq_threadreq_t req)
 563 {
 564         thread_t th = uth->uu_thread;
 565         thread_qos_t qos = req ? req->tr_qos : WORKQ_THREAD_QOS_CLEANUP;
 566         workq_threadreq_param_t trp = { };
 567         int priority = 31;
 568         int policy = POLICY_TIMESHARE;
 569
 570         if (req && (req->tr_flags & TR_FLAG_WL_PARAMS)) {
 571                 trp = kqueue_threadreq_workloop_param(req);
 572         }
 573
 574         uth->uu_workq_pri = WORKQ_POLICY_INIT(qos);
 575         uth->uu_workq_flags &= ~UT_WORKQ_OUTSIDE_QOS;
 576         uth->uu_save.uus_workq_park_data.workloop_params = trp.trp_value;
 577
 578         // qos sent out to userspace (may differ from uu_workq_pri on param threads)
 579         uth->uu_save.uus_workq_park_data.qos = qos;
 580
 581         if (qos == WORKQ_THREAD_QOS_MANAGER) {
 582                 uint32_t mgr_pri = wq->wq_event_manager_priority;
 583                 assert(trp.trp_value == 0); // manager qos and thread policy don't mix
 584
 585                 if (mgr_pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) {
 586                         mgr_pri &= _PTHREAD_PRIORITY_SCHED_PRI_MASK;
 587                         thread_set_workq_pri(th, THREAD_QOS_UNSPECIFIED, mgr_pri,
 588                                         POLICY_TIMESHARE);
 589                         return;
 590                 }
 591
 592                 qos = _pthread_priority_thread_qos(mgr_pri);
 593         } else {
 594                 if (trp.trp_flags & TRP_PRIORITY) {
 595                         qos = THREAD_QOS_UNSPECIFIED;
 596                         priority = trp.trp_pri;
 597                         uth->uu_workq_flags |= UT_WORKQ_OUTSIDE_QOS;
 598                 }
 599
 600                 if (trp.trp_flags & TRP_POLICY) {
 601                         policy = trp.trp_pol;
 602                 }
 603         }
 604
 605         thread_set_workq_pri(th, qos, priority, policy);
 606 }
 607
 608 /*
 609  * Called by kevent with the NOTE_WL_THREAD_REQUEST knote lock held,
 610  * every time a servicer is being told about a new max QoS.
 611  */
 612 void
 613 workq_thread_set_max_qos(struct proc *p, struct kqrequest *kqr)
 614 {
 615         struct uu_workq_policy old_pri, new_pri;
 616         struct uthread *uth = get_bsdthread_info(kqr->kqr_thread);
 617         struct workqueue *wq = proc_get_wqptr_fast(p);
 618         thread_qos_t qos = kqr->kqr_qos_index;
 619
 620         if (uth->uu_workq_pri.qos_max == qos)
 621                 return;
 622
 623         workq_lock_spin(wq);
 624         old_pri = new_pri = uth->uu_workq_pri;
 625         new_pri.qos_max = qos;
 626         workq_thread_update_bucket(p, wq, uth, old_pri, new_pri, false);
 627         workq_unlock(wq);
 628 }
 629
 630 #pragma mark idle threads accounting and handling
 631
 632 static inline struct uthread *
 633 workq_oldest_killable_idle_thread(struct workqueue *wq)
 634 {
 635         struct uthread *uth = TAILQ_LAST(&wq->wq_thidlelist, workq_uthread_head);
 636
 637         if (uth && !uth->uu_save.uus_workq_park_data.has_stack) {
 638                 uth = TAILQ_PREV(uth, workq_uthread_head, uu_workq_entry);
 639                 if (uth) {
 640                         assert(uth->uu_save.uus_workq_park_data.has_stack);
 641                 }
 642         }
 643         return uth;
 644 }
 645
 646 static inline uint64_t
 647 workq_kill_delay_for_idle_thread(struct workqueue *wq)
 648 {
 649         uint64_t delay = wq_reduce_pool_window.abstime;
 650         uint16_t idle = wq->wq_thidlecount;
 651
 652         /*
 653          * If we have less than wq_death_max_load threads, have a 5s timer.
 654          *
 655          * For the next wq_max_constrained_threads ones, decay linearly from
 656          * from 5s to 50ms.
 657          */
 658         if (idle <= wq_death_max_load) {
 659                 return delay;
 660         }
 661
 662         if (wq_max_constrained_threads > idle - wq_death_max_load) {
 663                 delay *= (wq_max_constrained_threads - (idle - wq_death_max_load));
 664         }
 665         return delay / wq_max_constrained_threads;
 666 }
 667
 668 static inline bool
 669 workq_should_kill_idle_thread(struct workqueue *wq, struct uthread *uth,
 670                 uint64_t now)
 671 {
 672         uint64_t delay = workq_kill_delay_for_idle_thread(wq);
 673         return now - uth->uu_save.uus_workq_park_data.idle_stamp > delay;
 674 }
 675
 676 static void
 677 workq_death_call_schedule(struct workqueue *wq, uint64_t deadline)
 678 {
 679         uint32_t wq_flags = os_atomic_load(&wq->wq_flags, relaxed);
 680
 681         if (wq_flags & (WQ_EXITING | WQ_DEATH_CALL_SCHEDULED)) {
 682                 return;
 683         }
 684         os_atomic_or(&wq->wq_flags, WQ_DEATH_CALL_SCHEDULED, relaxed);
 685
 686         WQ_TRACE_WQ(TRACE_wq_death_call | DBG_FUNC_NONE, wq, 1, 0, 0, 0);
 687
 688         /*
 689          * <rdar://problem/13139182> Due to how long term timers work, the leeway
 690          * can't be too short, so use 500ms which is long enough that we will not
 691          * wake up the CPU for killing threads, but short enough that it doesn't
 692          * fall into long-term timer list shenanigans.
 693          */
 694         thread_call_enter_delayed_with_leeway(wq->wq_death_call, NULL, deadline,
 695                         wq_reduce_pool_window.abstime / 10,
 696                         THREAD_CALL_DELAY_LEEWAY | THREAD_CALL_DELAY_USER_BACKGROUND);
 697 }
 698
 699 /*
 700  * `decrement` is set to the number of threads that are no longer dying:
 701  * - because they have been resuscitated just in time (workq_pop_idle_thread)
 702  * - or have been killed (workq_thread_terminate).
 703  */
 704 static void
 705 workq_death_policy_evaluate(struct workqueue *wq, uint16_t decrement)
 706 {
 707         struct uthread *uth;
 708
 709         assert(wq->wq_thdying_count >= decrement);
 710         if ((wq->wq_thdying_count -= decrement) > 0)
 711                 return;
 712
 713         if (wq->wq_thidlecount <= 1)
 714                 return;
 715
 716         if ((uth = workq_oldest_killable_idle_thread(wq)) == NULL)
 717                 return;
 718
 719         uint64_t now = mach_absolute_time();
 720         uint64_t delay = workq_kill_delay_for_idle_thread(wq);
 721
 722         if (now - uth->uu_save.uus_workq_park_data.idle_stamp > delay) {
 723                 WQ_TRACE_WQ(TRACE_wq_thread_terminate | DBG_FUNC_START,
 724                                 wq, wq->wq_thidlecount, 0, 0, 0);
 725                 wq->wq_thdying_count++;
 726                 uth->uu_workq_flags |= UT_WORKQ_DYING;
 727                 workq_thread_wakeup(uth);
 728                 return;
 729         }
 730
 731         workq_death_call_schedule(wq,
 732                         uth->uu_save.uus_workq_park_data.idle_stamp + delay);
 733 }
 734
 735 void
 736 workq_thread_terminate(struct proc *p, struct uthread *uth)
 737 {
 738         struct workqueue *wq = proc_get_wqptr_fast(p);
 739
 740         workq_lock_spin(wq);
 741         TAILQ_REMOVE(&wq->wq_thrunlist, uth, uu_workq_entry);
 742         if (uth->uu_workq_flags & UT_WORKQ_DYING) {
 743                 WQ_TRACE_WQ(TRACE_wq_thread_terminate | DBG_FUNC_END,
 744                                 wq, wq->wq_thidlecount, 0, 0, 0);
 745                 workq_death_policy_evaluate(wq, 1);
 746         }
 747         if (wq->wq_nthreads-- == wq_max_threads) {
 748                 /*
 749                  * We got under the thread limit again, which may have prevented
 750                  * thread creation from happening, redrive if there are pending requests
 751                  */
 752                 if (wq->wq_reqcount) {
 753                         workq_schedule_creator(p, wq, WORKQ_THREADREQ_CAN_CREATE_THREADS);
 754                 }
 755         }
 756         workq_unlock(wq);
 757
 758         thread_deallocate(uth->uu_thread);
 759 }
 760
 761 static void
 762 workq_kill_old_threads_call(void *param0, void *param1 __unused)
 763 {
 764         struct workqueue *wq = param0;
 765
 766         workq_lock_spin(wq);
 767         WQ_TRACE_WQ(TRACE_wq_death_call | DBG_FUNC_START, wq, 0, 0, 0, 0);
 768         os_atomic_and(&wq->wq_flags, ~WQ_DEATH_CALL_SCHEDULED, relaxed);
 769         workq_death_policy_evaluate(wq, 0);
 770         WQ_TRACE_WQ(TRACE_wq_death_call | DBG_FUNC_END, wq, 0, 0, 0, 0);
 771         workq_unlock(wq);
 772 }
 773
 774 static struct uthread *
 775 workq_pop_idle_thread(struct workqueue *wq)
 776 {
 777         struct uthread *uth;
 778
 779         if ((uth = TAILQ_FIRST(&wq->wq_thidlelist))) {
 780                 TAILQ_REMOVE(&wq->wq_thidlelist, uth, uu_workq_entry);
 781         } else {
 782                 uth = TAILQ_FIRST(&wq->wq_thnewlist);
 783                 TAILQ_REMOVE(&wq->wq_thnewlist, uth, uu_workq_entry);
 784         }
 785         TAILQ_INSERT_TAIL(&wq->wq_thrunlist, uth, uu_workq_entry);
 786
 787         assert((uth->uu_workq_flags & UT_WORKQ_RUNNING) == 0);
 788         uth->uu_workq_flags |= UT_WORKQ_RUNNING | UT_WORKQ_OVERCOMMIT;
 789         wq->wq_threads_scheduled++;
 790         wq->wq_thidlecount--;
 791
 792         if (__improbable(uth->uu_workq_flags & UT_WORKQ_DYING)) {
 793                 uth->uu_workq_flags ^= UT_WORKQ_DYING;
 794                 workq_death_policy_evaluate(wq, 1);
 795         }
 796         return uth;
 797 }
 798
 799 /*
 800  * Called by thread_create_workq_waiting() during thread initialization, before
 801  * assert_wait, before the thread has been started.
 802  */
 803 event_t
 804 workq_thread_init_and_wq_lock(task_t task, thread_t th)
 805 {
 806         struct uthread *uth = get_bsdthread_info(th);
 807
 808         uth->uu_workq_flags = UT_WORKQ_NEW;
 809         uth->uu_workq_pri = WORKQ_POLICY_INIT(THREAD_QOS_LEGACY);
 810         uth->uu_workq_thport = MACH_PORT_NULL;
 811         uth->uu_workq_stackaddr = 0;
 812
 813         thread_set_tag(th, THREAD_TAG_PTHREAD | THREAD_TAG_WORKQUEUE);
 814         thread_reset_workq_qos(th, THREAD_QOS_LEGACY);
 815
 816         workq_lock_spin(proc_get_wqptr_fast(get_bsdtask_info(task)));
 817         return workq_parked_wait_event(uth);
 818 }
 819
 820 /**
 821  * Try to add a new workqueue thread.
 822  *
 823  * - called with workq lock held
 824  * - dropped and retaken around thread creation
 825  * - return with workq lock held
 826  */
 827 static bool
 828 workq_add_new_idle_thread(proc_t p, struct workqueue *wq)
 829 {
 830         mach_vm_offset_t th_stackaddr;
 831         kern_return_t kret;
 832         thread_t th;
 833
 834         wq->wq_nthreads++;
 835
 836         workq_unlock(wq);
 837
 838         vm_map_t vmap = get_task_map(p->task);
 839
 840         kret = pthread_functions->workq_create_threadstack(p, vmap, &th_stackaddr);
 841         if (kret != KERN_SUCCESS) {
 842                 WQ_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq,
 843                                 kret, 1, 0, 0);
 844                 goto out;
 845         }
 846
 847         kret = thread_create_workq_waiting(p->task, workq_unpark_continue, &th);
 848         if (kret != KERN_SUCCESS) {
 849                 WQ_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq,
 850                                 kret, 0, 0, 0);
 851                 pthread_functions->workq_destroy_threadstack(p, vmap, th_stackaddr);
 852                 goto out;
 853         }
 854
 855         // thread_create_workq_waiting() will return with the wq lock held
 856         // on success, because it calls workq_thread_init_and_wq_lock() above
 857
 858         struct uthread *uth = get_bsdthread_info(th);
 859
 860         wq->wq_creations++;
 861         wq->wq_thidlecount++;
 862         uth->uu_workq_stackaddr = th_stackaddr;
 863         TAILQ_INSERT_TAIL(&wq->wq_thnewlist, uth, uu_workq_entry);
 864
 865         WQ_TRACE_WQ(TRACE_wq_thread_create | DBG_FUNC_NONE, wq, 0, 0, 0, 0);
 866         return true;
 867
 868 out:
 869         workq_lock_spin(wq);
 870         /*
 871          * Do not redrive here if we went under wq_max_threads again,
 872          * it is the responsibility of the callers of this function
 873          * to do so when it fails.
 874          */
 875         wq->wq_nthreads--;
 876         return false;
 877 }
 878
 879 #define WORKQ_UNPARK_FOR_DEATH_WAS_IDLE 0x1
 880
 881 __attribute__((noreturn, noinline))
 882 static void
 883 workq_unpark_for_death_and_unlock(proc_t p, struct workqueue *wq,
 884                 struct uthread *uth, uint32_t death_flags)
 885 {
 886         thread_qos_t qos = workq_pri_override(uth->uu_workq_pri);
 887         bool first_use = uth->uu_workq_flags & UT_WORKQ_NEW;
 888
 889         if (qos > WORKQ_THREAD_QOS_CLEANUP) {
 890                 workq_thread_reset_pri(wq, uth, NULL);
 891                 qos = WORKQ_THREAD_QOS_CLEANUP;
 892         }
 893
 894         workq_thread_reset_cpupercent(NULL, uth);
 895
 896         if (death_flags & WORKQ_UNPARK_FOR_DEATH_WAS_IDLE) {
 897                 wq->wq_thidlecount--;
 898                 if (first_use) {
 899                         TAILQ_REMOVE(&wq->wq_thnewlist, uth, uu_workq_entry);
 900                 } else {
 901                         TAILQ_REMOVE(&wq->wq_thidlelist, uth, uu_workq_entry);
 902                 }
 903         }
 904         TAILQ_INSERT_TAIL(&wq->wq_thrunlist, uth, uu_workq_entry);
 905
 906         workq_unlock(wq);
 907
 908         uint32_t flags = WQ_FLAG_THREAD_NEWSPI | qos | WQ_FLAG_THREAD_PRIO_QOS;
 909         uint32_t setup_flags = WQ_SETUP_EXIT_THREAD;
 910         thread_t th = uth->uu_thread;
 911         vm_map_t vmap = get_task_map(p->task);
 912
 913         if (!first_use) flags |= WQ_FLAG_THREAD_REUSE;
 914
 915         pthread_functions->workq_setup_thread(p, th, vmap, uth->uu_workq_stackaddr,
 916                         uth->uu_workq_thport, 0, setup_flags, flags);
 917         __builtin_unreachable();
 918 }
 919
 920 bool
 921 workq_is_current_thread_updating_turnstile(struct workqueue *wq)
 922 {
 923         return wq->wq_turnstile_updater == current_thread();
 924 }
 925
 926 __attribute__((always_inline))
 927 static inline void
 928 workq_perform_turnstile_operation_locked(struct workqueue *wq,
 929                 void (^operation)(void))
 930 {
 931         workq_lock_held(wq);
 932         wq->wq_turnstile_updater = current_thread();
 933         operation();
 934         wq->wq_turnstile_updater = THREAD_NULL;
 935 }
 936
 937 static void
 938 workq_turnstile_update_inheritor(struct workqueue *wq,
 939                 turnstile_inheritor_t inheritor,
 940                 turnstile_update_flags_t flags)
 941 {
 942         workq_perform_turnstile_operation_locked(wq, ^{
 943                 turnstile_update_inheritor(wq->wq_turnstile, inheritor,
 944                                 flags | TURNSTILE_IMMEDIATE_UPDATE);
 945                 turnstile_update_inheritor_complete(wq->wq_turnstile,
 946                                 TURNSTILE_INTERLOCK_HELD);
 947         });
 948 }
 949
 950 static void
 951 workq_push_idle_thread(proc_t p, struct workqueue *wq, struct uthread *uth)
 952 {
 953         uint64_t now = mach_absolute_time();
 954
 955         uth->uu_workq_flags &= ~UT_WORKQ_RUNNING;
 956         if ((uth->uu_workq_flags & UT_WORKQ_OVERCOMMIT) == 0) {
 957                 wq->wq_constrained_threads_scheduled--;
 958         }
 959         TAILQ_REMOVE(&wq->wq_thrunlist, uth, uu_workq_entry);
 960         wq->wq_threads_scheduled--;
 961
 962         if (wq->wq_creator == uth) {
 963                 WQ_TRACE_WQ(TRACE_wq_creator_select, wq, 3, 0,
 964                                 uth->uu_save.uus_workq_park_data.yields, 0);
 965                 wq->wq_creator = NULL;
 966                 if (wq->wq_reqcount) {
 967                         workq_turnstile_update_inheritor(wq, wq, TURNSTILE_INHERITOR_WORKQ);
 968                 } else {
 969                         workq_turnstile_update_inheritor(wq, TURNSTILE_INHERITOR_NULL, 0);
 970                 }
 971                 if (uth->uu_workq_flags & UT_WORKQ_NEW) {
 972                         TAILQ_INSERT_TAIL(&wq->wq_thnewlist, uth, uu_workq_entry);
 973                         wq->wq_thidlecount++;
 974                         return;
 975                 }
 976         } else {
 977                 _wq_thactive_dec(wq, uth->uu_workq_pri.qos_bucket);
 978                 wq->wq_thscheduled_count[_wq_bucket(uth->uu_workq_pri.qos_bucket)]--;
 979                 assert(!(uth->uu_workq_flags & UT_WORKQ_NEW));
 980                 uth->uu_workq_flags |= UT_WORKQ_IDLE_CLEANUP;
 981         }
 982
 983         uth->uu_save.uus_workq_park_data.idle_stamp = now;
 984
 985         struct uthread *oldest = workq_oldest_killable_idle_thread(wq);
 986         uint16_t cur_idle = wq->wq_thidlecount;
 987
 988         if (cur_idle >= wq_max_constrained_threads ||
 989                         (wq->wq_thdying_count == 0 && oldest &&
 990                         workq_should_kill_idle_thread(wq, oldest, now))) {
 991                 /*
 992                  * Immediately kill threads if we have too may of them.
 993                  *
 994                  * And swap "place" with the oldest one we'd have woken up.
 995                  * This is a relatively desperate situation where we really
 996                  * need to kill threads quickly and it's best to kill
 997                  * the one that's currently on core than context switching.
 998                  */
 999                 if (oldest) {
1000                         oldest->uu_save.uus_workq_park_data.idle_stamp = now;
1001                         TAILQ_REMOVE(&wq->wq_thidlelist, oldest, uu_workq_entry);
1002                         TAILQ_INSERT_HEAD(&wq->wq_thidlelist, oldest, uu_workq_entry);
1003                 }
1004
1005                 WQ_TRACE_WQ(TRACE_wq_thread_terminate | DBG_FUNC_START,
1006                                 wq, cur_idle, 0, 0, 0);
1007                 wq->wq_thdying_count++;
1008                 uth->uu_workq_flags |= UT_WORKQ_DYING;
1009                 uth->uu_workq_flags &= ~UT_WORKQ_IDLE_CLEANUP;
1010                 workq_unpark_for_death_and_unlock(p, wq, uth, 0);
1011                 __builtin_unreachable();
1012         }
1013
1014         struct uthread *tail = TAILQ_LAST(&wq->wq_thidlelist, workq_uthread_head);
1015
1016         cur_idle += 1;
1017         wq->wq_thidlecount = cur_idle;
1018
1019         if (cur_idle >= wq_death_max_load && tail &&
1020                         tail->uu_save.uus_workq_park_data.has_stack) {
1021                 uth->uu_save.uus_workq_park_data.has_stack = false;
1022                 TAILQ_INSERT_TAIL(&wq->wq_thidlelist, uth, uu_workq_entry);
1023         } else {
1024                 uth->uu_save.uus_workq_park_data.has_stack = true;
1025                 TAILQ_INSERT_HEAD(&wq->wq_thidlelist, uth, uu_workq_entry);
1026         }
1027
1028         if (!tail) {
1029                 uint64_t delay = workq_kill_delay_for_idle_thread(wq);
1030                 workq_death_call_schedule(wq, now + delay);
1031         }
1032 }
1033
1034 #pragma mark thread requests
1035
1036 static inline int
1037 workq_priority_for_req(workq_threadreq_t req)
1038 {
1039         thread_qos_t qos = req->tr_qos;
1040
1041         if (req->tr_flags & TR_FLAG_WL_OUTSIDE_QOS) {
1042                 workq_threadreq_param_t trp = kqueue_threadreq_workloop_param(req);
1043                 assert(trp.trp_flags & TRP_PRIORITY);
1044                 return trp.trp_pri;
1045         }
1046         return thread_workq_pri_for_qos(qos);
1047 }
1048
1049 static inline struct priority_queue *
1050 workq_priority_queue_for_req(struct workqueue *wq, workq_threadreq_t req)
1051 {
1052         if (req->tr_flags & TR_FLAG_WL_OUTSIDE_QOS) {
1053                 return &wq->wq_special_queue;
1054         } else if (req->tr_flags & TR_FLAG_OVERCOMMIT) {
1055                 return &wq->wq_overcommit_queue;
1056         } else {
1057                 return &wq->wq_constrained_queue;
1058         }
1059 }
1060
1061 /*
1062  * returns true if the the enqueued request is the highest priority item
1063  * in its priority queue.
1064  */
1065 static bool
1066 workq_threadreq_enqueue(struct workqueue *wq, workq_threadreq_t req)
1067 {
1068         assert(req->tr_state == TR_STATE_NEW);
1069
1070         req->tr_state = TR_STATE_QUEUED;
1071         wq->wq_reqcount += req->tr_count;
1072
1073         if (req->tr_qos == WORKQ_THREAD_QOS_MANAGER) {
1074                 assert(wq->wq_event_manager_threadreq == NULL);
1075                 assert(req->tr_flags & TR_FLAG_KEVENT);
1076                 assert(req->tr_count == 1);
1077                 wq->wq_event_manager_threadreq = req;
1078                 return true;
1079         }
1080         if (priority_queue_insert(workq_priority_queue_for_req(wq, req),
1081                         &req->tr_entry, workq_priority_for_req(req),
1082                         PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE)) {
1083                 if ((req->tr_flags & TR_FLAG_OVERCOMMIT) == 0) {
1084                         _wq_thactive_refresh_best_constrained_req_qos(wq);
1085                 }
1086                 return true;
1087         }
1088         return false;
1089 }
1090
1091 /*
1092  * returns true if the the dequeued request was the highest priority item
1093  * in its priority queue.
1094  */
1095 static bool
1096 workq_threadreq_dequeue(struct workqueue *wq, workq_threadreq_t req)
1097 {
1098         wq->wq_reqcount--;
1099
1100         if (--req->tr_count == 0) {
1101                 if (req->tr_qos == WORKQ_THREAD_QOS_MANAGER) {
1102                         assert(wq->wq_event_manager_threadreq == req);
1103                         assert(req->tr_count == 0);
1104                         wq->wq_event_manager_threadreq = NULL;
1105                         return true;
1106                 }
1107                 if (priority_queue_remove(workq_priority_queue_for_req(wq, req),
1108                                 &req->tr_entry, PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE)) {
1109                         if ((req->tr_flags & TR_FLAG_OVERCOMMIT) == 0) {
1110                                 _wq_thactive_refresh_best_constrained_req_qos(wq);
1111                         }
1112                         return true;
1113                 }
1114         }
1115         return false;
1116 }
1117
1118 static void
1119 workq_threadreq_destroy(proc_t p, workq_threadreq_t req)
1120 {
1121         req->tr_state = TR_STATE_IDLE;
1122         if (req->tr_flags & (TR_FLAG_WORKLOOP | TR_FLAG_KEVENT)) {
1123                 kqueue_threadreq_cancel(p, req);
1124         } else {
1125                 zfree(workq_zone_threadreq, req);
1126         }
1127 }
1128
1129 /*
1130  * Mark a thread request as complete.  At this point, it is treated as owned by
1131  * the submitting subsystem and you should assume it could be freed.
1132  *
1133  * Called with the workqueue lock held.
1134  */
1135 static void
1136 workq_threadreq_bind_and_unlock(proc_t p, struct workqueue *wq,
1137                 workq_threadreq_t req, struct uthread *uth)
1138 {
1139         uint8_t tr_flags = req->tr_flags;
1140         bool needs_commit = false;
1141         int creator_flags = 0;
1142
1143         wq->wq_fulfilled++;
1144
1145         if (req->tr_state == TR_STATE_QUEUED) {
1146                 workq_threadreq_dequeue(wq, req);
1147                 creator_flags = WORKQ_THREADREQ_CAN_CREATE_THREADS;
1148         }
1149
1150         if (wq->wq_creator == uth) {
1151                 WQ_TRACE_WQ(TRACE_wq_creator_select, wq, 4, 0,
1152                                 uth->uu_save.uus_workq_park_data.yields, 0);
1153                 creator_flags = WORKQ_THREADREQ_CAN_CREATE_THREADS |
1154                                 WORKQ_THREADREQ_CREATOR_TRANSFER;
1155                 wq->wq_creator = NULL;
1156                 _wq_thactive_inc(wq, req->tr_qos);
1157                 wq->wq_thscheduled_count[_wq_bucket(req->tr_qos)]++;
1158         } else if (uth->uu_workq_pri.qos_bucket != req->tr_qos) {
1159                 _wq_thactive_move(wq, uth->uu_workq_pri.qos_bucket, req->tr_qos);
1160         }
1161         workq_thread_reset_pri(wq, uth, req);
1162
1163         if (tr_flags & TR_FLAG_OVERCOMMIT) {
1164                 if ((uth->uu_workq_flags & UT_WORKQ_OVERCOMMIT) == 0) {
1165                         uth->uu_workq_flags |= UT_WORKQ_OVERCOMMIT;
1166                         wq->wq_constrained_threads_scheduled--;
1167                 }
1168         } else {
1169                 if ((uth->uu_workq_flags & UT_WORKQ_OVERCOMMIT) != 0) {
1170                         uth->uu_workq_flags &= ~UT_WORKQ_OVERCOMMIT;
1171                         wq->wq_constrained_threads_scheduled++;
1172                 }
1173         }
1174
1175         if (tr_flags & (TR_FLAG_KEVENT | TR_FLAG_WORKLOOP)) {
1176                 if (req->tr_state == TR_STATE_NEW) {
1177                         /*
1178                          * We're called from workq_kern_threadreq_initiate()
1179                          * due to an unbind, with the kq req held.
1180                          */
1181                         assert(!creator_flags);
1182                         req->tr_state = TR_STATE_IDLE;
1183                         kqueue_threadreq_bind(p, req, uth->uu_thread, 0);
1184                 } else {
1185                         assert(req->tr_count == 0);
1186                         workq_perform_turnstile_operation_locked(wq, ^{
1187                                 kqueue_threadreq_bind_prepost(p, req, uth->uu_thread);
1188                         });
1189                         needs_commit = true;
1190                 }
1191                 req = NULL;
1192         } else if (req->tr_count > 0) {
1193                 req = NULL;
1194         }
1195
1196         if (creator_flags) {
1197                 /* This can drop the workqueue lock, and take it again */
1198                 workq_schedule_creator(p, wq, creator_flags);
1199         }
1200
1201         workq_unlock(wq);
1202
1203         if (req) {
1204                 zfree(workq_zone_threadreq, req);
1205         }
1206         if (needs_commit) {
1207                 kqueue_threadreq_bind_commit(p, uth->uu_thread);
1208         }
1209
1210         /*
1211          * Run Thread, Run!
1212          */
1213         uint32_t upcall_flags = WQ_FLAG_THREAD_NEWSPI;
1214         if (uth->uu_workq_pri.qos_bucket == WORKQ_THREAD_QOS_MANAGER) {
1215                 upcall_flags |= WQ_FLAG_THREAD_EVENT_MANAGER;
1216         } else if (tr_flags & TR_FLAG_OVERCOMMIT) {
1217                 upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT;
1218         }
1219         if (tr_flags & TR_FLAG_KEVENT) {
1220                 upcall_flags |= WQ_FLAG_THREAD_KEVENT;
1221         }
1222         if (tr_flags & TR_FLAG_WORKLOOP) {
1223                 upcall_flags |= WQ_FLAG_THREAD_WORKLOOP | WQ_FLAG_THREAD_KEVENT;
1224         }
1225         uth->uu_save.uus_workq_park_data.upcall_flags = upcall_flags;
1226 }
1227
1228 #pragma mark workqueue thread creation thread calls
1229
1230 static inline bool
1231 workq_thread_call_prepost(struct workqueue *wq, uint32_t sched, uint32_t pend,
1232                 uint32_t fail_mask)
1233 {
1234         uint32_t old_flags, new_flags;
1235
1236         os_atomic_rmw_loop(&wq->wq_flags, old_flags, new_flags, acquire, {
1237                 if (__improbable(old_flags & (WQ_EXITING | sched | pend | fail_mask))) {
1238                         os_atomic_rmw_loop_give_up(return false);
1239                 }
1240                 if (__improbable(old_flags & WQ_PROC_SUSPENDED)) {
1241                         new_flags = old_flags | pend;
1242                 } else {
1243                         new_flags = old_flags | sched;
1244                 }
1245         });
1246
1247         return (old_flags & WQ_PROC_SUSPENDED) == 0;
1248 }
1249
1250 #define WORKQ_SCHEDULE_DELAYED_THREAD_CREATION_RESTART 0x1
1251
1252 static bool
1253 workq_schedule_delayed_thread_creation(struct workqueue *wq, int flags)
1254 {
1255         assert(!preemption_enabled());
1256
1257         if (!workq_thread_call_prepost(wq, WQ_DELAYED_CALL_SCHEDULED,
1258                         WQ_DELAYED_CALL_PENDED, WQ_IMMEDIATE_CALL_PENDED |
1259                         WQ_IMMEDIATE_CALL_SCHEDULED)) {
1260                 return false;
1261         }
1262
1263         uint64_t now = mach_absolute_time();
1264
1265         if (flags & WORKQ_SCHEDULE_DELAYED_THREAD_CREATION_RESTART) {
1266                 /* do not change the window */
1267         } else if (now - wq->wq_thread_call_last_run <= wq->wq_timer_interval) {
1268                 wq->wq_timer_interval *= 2;
1269                 if (wq->wq_timer_interval > wq_max_timer_interval.abstime) {
1270                         wq->wq_timer_interval = wq_max_timer_interval.abstime;
1271                 }
1272         } else if (now - wq->wq_thread_call_last_run > 2 * wq->wq_timer_interval) {
1273                 wq->wq_timer_interval /= 2;
1274                 if (wq->wq_timer_interval < wq_stalled_window.abstime) {
1275                         wq->wq_timer_interval = wq_stalled_window.abstime;
1276                 }
1277         }
1278
1279         WQ_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount,
1280                         _wq_flags(wq), wq->wq_timer_interval, 0);
1281
1282         thread_call_t call = wq->wq_delayed_call;
1283         uintptr_t arg = WQ_DELAYED_CALL_SCHEDULED;
1284         uint64_t deadline = now + wq->wq_timer_interval;
1285         if (thread_call_enter1_delayed(call, (void *)arg, deadline)) {
1286                 panic("delayed_call was already enqueued");
1287         }
1288         return true;
1289 }
1290
1291 static void
1292 workq_schedule_immediate_thread_creation(struct workqueue *wq)
1293 {
1294         assert(!preemption_enabled());
1295
1296         if (workq_thread_call_prepost(wq, WQ_IMMEDIATE_CALL_SCHEDULED,
1297                         WQ_IMMEDIATE_CALL_PENDED, 0)) {
1298                 WQ_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount,
1299                                 _wq_flags(wq), 0, 0);
1300
1301                 uintptr_t arg = WQ_IMMEDIATE_CALL_SCHEDULED;
1302                 if (thread_call_enter1(wq->wq_immediate_call, (void *)arg)) {
1303                         panic("immediate_call was already enqueued");
1304                 }
1305         }
1306 }
1307
1308 void
1309 workq_proc_suspended(struct proc *p)
1310 {
1311         struct workqueue *wq = proc_get_wqptr(p);
1312
1313         if (wq) os_atomic_or(&wq->wq_flags, WQ_PROC_SUSPENDED, relaxed);
1314 }
1315
1316 void
1317 workq_proc_resumed(struct proc *p)
1318 {
1319         struct workqueue *wq = proc_get_wqptr(p);
1320         uint32_t wq_flags;
1321
1322         if (!wq) return;
1323
1324         wq_flags = os_atomic_and_orig(&wq->wq_flags, ~(WQ_PROC_SUSPENDED |
1325                         WQ_DELAYED_CALL_PENDED | WQ_IMMEDIATE_CALL_PENDED), relaxed);
1326         if ((wq_flags & WQ_EXITING) == 0) {
1327                 disable_preemption();
1328                 if (wq_flags & WQ_IMMEDIATE_CALL_PENDED) {
1329                         workq_schedule_immediate_thread_creation(wq);
1330                 } else if (wq_flags & WQ_DELAYED_CALL_PENDED) {
1331                         workq_schedule_delayed_thread_creation(wq,
1332                                         WORKQ_SCHEDULE_DELAYED_THREAD_CREATION_RESTART);
1333                 }
1334                 enable_preemption();
1335         }
1336 }
1337
1338 /**
1339  * returns whether lastblocked_tsp is within wq_stalled_window usecs of now
1340  */
1341 static bool
1342 workq_thread_is_busy(uint64_t now, _Atomic uint64_t *lastblocked_tsp)
1343 {
1344         uint64_t lastblocked_ts = os_atomic_load(lastblocked_tsp, relaxed);
1345         if (now <= lastblocked_ts) {
1346                 /*
1347                  * Because the update of the timestamp when a thread blocks
1348                  * isn't serialized against us looking at it (i.e. we don't hold
1349                  * the workq lock), it's possible to have a timestamp that matches
1350                  * the current time or that even looks to be in the future relative
1351                  * to when we grabbed the current time...
1352                  *
1353                  * Just treat this as a busy thread since it must have just blocked.
1354                  */
1355                 return true;
1356         }
1357         return (now - lastblocked_ts) < wq_stalled_window.abstime;
1358 }
1359
1360 static void
1361 workq_add_new_threads_call(void *_p, void *flags)
1362 {
1363         proc_t p = _p;
1364         struct workqueue *wq = proc_get_wqptr(p);
1365         uint32_t my_flag = (uint32_t)(uintptr_t)flags;
1366
1367         /*
1368          * workq_exit() will set the workqueue to NULL before
1369          * it cancels thread calls.
1370          */
1371         if (!wq) return;
1372
1373         assert((my_flag == WQ_DELAYED_CALL_SCHEDULED) ||
1374                         (my_flag == WQ_IMMEDIATE_CALL_SCHEDULED));
1375
1376         WQ_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_START, wq, _wq_flags(wq),
1377                         wq->wq_nthreads, wq->wq_thidlecount, 0);
1378
1379         workq_lock_spin(wq);
1380
1381         wq->wq_thread_call_last_run = mach_absolute_time();
1382         os_atomic_and(&wq->wq_flags, ~my_flag, release);
1383
1384         /* This can drop the workqueue lock, and take it again */
1385         workq_schedule_creator(p, wq, WORKQ_THREADREQ_CAN_CREATE_THREADS);
1386
1387         workq_unlock(wq);
1388
1389         WQ_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_END, wq, 0,
1390                         wq->wq_nthreads, wq->wq_thidlecount, 0);
1391 }
1392
1393 #pragma mark thread state tracking
1394
1395 static void
1396 workq_sched_callback(int type, thread_t thread)
1397 {
1398         struct uthread *uth = get_bsdthread_info(thread);
1399         proc_t proc = get_bsdtask_info(get_threadtask(thread));
1400         struct workqueue *wq = proc_get_wqptr(proc);
1401         thread_qos_t req_qos, qos = uth->uu_workq_pri.qos_bucket;
1402         wq_thactive_t old_thactive;
1403         bool start_timer = false;
1404
1405         if (qos == WORKQ_THREAD_QOS_MANAGER) {
1406                 return;
1407         }
1408
1409         switch (type) {
1410         case SCHED_CALL_BLOCK:
1411                 old_thactive = _wq_thactive_dec(wq, qos);
1412                 req_qos = WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive);
1413
1414                 /*
1415                  * Remember the timestamp of the last thread that blocked in this
1416                  * bucket, it used used by admission checks to ignore one thread
1417                  * being inactive if this timestamp is recent enough.
1418                  *
1419                  * If we collide with another thread trying to update the
1420                  * last_blocked (really unlikely since another thread would have to
1421                  * get scheduled and then block after we start down this path), it's
1422                  * not a problem.  Either timestamp is adequate, so no need to retry
1423                  */
1424                 os_atomic_store(&wq->wq_lastblocked_ts[_wq_bucket(qos)],
1425                                 thread_last_run_time(thread), relaxed);
1426
1427                 if (req_qos == THREAD_QOS_UNSPECIFIED) {
1428                         /*
1429                          * No pending request at the moment we could unblock, move on.
1430                          */
1431                 } else if (qos < req_qos) {
1432                         /*
1433                          * The blocking thread is at a lower QoS than the highest currently
1434                          * pending constrained request, nothing has to be redriven
1435                          */
1436                 } else {
1437                         uint32_t max_busycount, old_req_count;
1438                         old_req_count = _wq_thactive_aggregate_downto_qos(wq, old_thactive,
1439                                         req_qos, NULL, &max_busycount);
1440                         /*
1441                          * If it is possible that may_start_constrained_thread had refused
1442                          * admission due to being over the max concurrency, we may need to
1443                          * spin up a new thread.
1444                          *
1445                          * We take into account the maximum number of busy threads
1446                          * that can affect may_start_constrained_thread as looking at the
1447                          * actual number may_start_constrained_thread will see is racy.
1448                          *
1449                          * IOW at NCPU = 4, for IN (req_qos = 1), if the old req count is
1450                          * between NCPU (4) and NCPU - 2 (2) we need to redrive.
1451                          */
1452                         uint32_t conc = wq_max_parallelism[_wq_bucket(qos)];
1453                         if (old_req_count <= conc && conc <= old_req_count + max_busycount) {
1454                                 start_timer = workq_schedule_delayed_thread_creation(wq, 0);
1455                         }
1456                 }
1457                 if (__improbable(kdebug_enable)) {
1458                         __unused uint32_t old = _wq_thactive_aggregate_downto_qos(wq,
1459                                         old_thactive, qos, NULL, NULL);
1460                         WQ_TRACE_WQ(TRACE_wq_thread_block | DBG_FUNC_START, wq,
1461                                         old - 1, qos | (req_qos << 8),
1462                                         wq->wq_reqcount << 1 | start_timer, 0);
1463                 }
1464                 break;
1465
1466         case SCHED_CALL_UNBLOCK:
1467                 /*
1468                  * we cannot take the workqueue_lock here...
1469                  * an UNBLOCK can occur from a timer event which
1470                  * is run from an interrupt context... if the workqueue_lock
1471                  * is already held by this processor, we'll deadlock...
1472                  * the thread lock for the thread being UNBLOCKED
1473                  * is also held
1474                  */
1475                 old_thactive = _wq_thactive_inc(wq, qos);
1476                 if (__improbable(kdebug_enable)) {
1477                         __unused uint32_t old = _wq_thactive_aggregate_downto_qos(wq,
1478                                         old_thactive, qos, NULL, NULL);
1479                         req_qos = WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive);
1480                         WQ_TRACE_WQ(TRACE_wq_thread_block | DBG_FUNC_END, wq,
1481                                         old + 1, qos | (req_qos << 8),
1482                                         wq->wq_threads_scheduled, 0);
1483                 }
1484                 break;
1485         }
1486 }
1487
1488 #pragma mark workq lifecycle
1489
1490 void
1491 workq_reference(struct workqueue *wq)
1492 {
1493         os_ref_retain(&wq->wq_refcnt);
1494 }
1495
1496 void
1497 workq_destroy(struct workqueue *wq)
1498 {
1499         struct turnstile *ts;
1500
1501         turnstile_complete((uintptr_t)wq, &wq->wq_turnstile, &ts);
1502         assert(ts);
1503         turnstile_cleanup();
1504         turnstile_deallocate(ts);
1505
1506         lck_spin_destroy(&wq->wq_lock, workq_lck_grp);
1507         zfree(workq_zone_workqueue, wq);
1508 }
1509
1510 static void
1511 workq_deallocate(struct workqueue *wq)
1512 {
1513         if (os_ref_release_relaxed(&wq->wq_refcnt) == 0) {
1514                 workq_destroy(wq);
1515         }
1516 }
1517
1518 void
1519 workq_deallocate_safe(struct workqueue *wq)
1520 {
1521         if (__improbable(os_ref_release_relaxed(&wq->wq_refcnt) == 0)) {
1522                 workq_deallocate_enqueue(wq);
1523         }
1524 }
1525
1526 /**
1527  * Setup per-process state for the workqueue.
1528  */
1529 int
1530 workq_open(struct proc *p, __unused struct workq_open_args *uap,
1531                 __unused int32_t *retval)
1532 {
1533         struct workqueue *wq;
1534         int error = 0;
1535
1536         if ((p->p_lflag & P_LREGISTER) == 0) {
1537                 return EINVAL;
1538         }
1539
1540         if (wq_init_constrained_limit) {
1541                 uint32_t limit, num_cpus = ml_get_max_cpus();
1542
1543                 /*
1544                  * set up the limit for the constrained pool
1545                  * this is a virtual pool in that we don't
1546                  * maintain it on a separate idle and run list
1547                  */
1548                 limit = num_cpus * WORKQUEUE_CONSTRAINED_FACTOR;
1549
1550                 if (limit > wq_max_constrained_threads)
1551                         wq_max_constrained_threads = limit;
1552
1553                 if (wq_max_threads > WQ_THACTIVE_BUCKET_HALF) {
1554                         wq_max_threads = WQ_THACTIVE_BUCKET_HALF;
1555                 }
1556                 if (wq_max_threads > CONFIG_THREAD_MAX - 20) {
1557                         wq_max_threads = CONFIG_THREAD_MAX - 20;
1558                 }
1559
1560                 wq_death_max_load = (uint16_t)fls(num_cpus) + 1;
1561
1562                 for (thread_qos_t qos = WORKQ_THREAD_QOS_MIN; qos <= WORKQ_THREAD_QOS_MAX; qos++) {
1563                         wq_max_parallelism[_wq_bucket(qos)] =
1564                                         qos_max_parallelism(qos, QOS_PARALLELISM_COUNT_LOGICAL);
1565                 }
1566
1567                 wq_init_constrained_limit = 0;
1568         }
1569
1570         if (proc_get_wqptr(p) == NULL) {
1571                 if (proc_init_wqptr_or_wait(p) == FALSE) {
1572                         assert(proc_get_wqptr(p) != NULL);
1573                         goto out;
1574                 }
1575
1576                 wq = (struct workqueue *)zalloc(workq_zone_workqueue);
1577                 bzero(wq, sizeof(struct workqueue));
1578
1579                 os_ref_init_count(&wq->wq_refcnt, &workq_refgrp, 1);
1580
1581                 // Start the event manager at the priority hinted at by the policy engine
1582                 thread_qos_t mgr_priority_hint = task_get_default_manager_qos(current_task());
1583                 pthread_priority_t pp = _pthread_priority_make_from_thread_qos(mgr_priority_hint, 0, 0);
1584                 wq->wq_event_manager_priority = (uint32_t)pp;
1585                 wq->wq_timer_interval = wq_stalled_window.abstime;
1586                 wq->wq_proc = p;
1587                 turnstile_prepare((uintptr_t)wq, &wq->wq_turnstile, turnstile_alloc(),
1588                                 TURNSTILE_WORKQS);
1589
1590                 TAILQ_INIT(&wq->wq_thrunlist);
1591                 TAILQ_INIT(&wq->wq_thnewlist);
1592                 TAILQ_INIT(&wq->wq_thidlelist);
1593                 priority_queue_init(&wq->wq_overcommit_queue,
1594                                 PRIORITY_QUEUE_BUILTIN_MAX_HEAP);
1595                 priority_queue_init(&wq->wq_constrained_queue,
1596                                 PRIORITY_QUEUE_BUILTIN_MAX_HEAP);
1597                 priority_queue_init(&wq->wq_special_queue,
1598                                 PRIORITY_QUEUE_BUILTIN_MAX_HEAP);
1599
1600                 wq->wq_delayed_call = thread_call_allocate_with_options(
1601                                 workq_add_new_threads_call, p, THREAD_CALL_PRIORITY_KERNEL,
1602                                 THREAD_CALL_OPTIONS_ONCE);
1603                 wq->wq_immediate_call = thread_call_allocate_with_options(
1604                                 workq_add_new_threads_call, p, THREAD_CALL_PRIORITY_KERNEL,
1605                                 THREAD_CALL_OPTIONS_ONCE);
1606                 wq->wq_death_call = thread_call_allocate_with_options(
1607                                 workq_kill_old_threads_call, wq,
1608                                 THREAD_CALL_PRIORITY_USER, THREAD_CALL_OPTIONS_ONCE);
1609
1610                 lck_spin_init(&wq->wq_lock, workq_lck_grp, workq_lck_attr);
1611
1612                 WQ_TRACE_WQ(TRACE_wq_create | DBG_FUNC_NONE, wq,
1613                                 VM_KERNEL_ADDRHIDE(wq), 0, 0, 0);
1614                 proc_set_wqptr(p, wq);
1615         }
1616 out:
1617
1618         return error;
1619 }
1620
1621 /*
1622  * Routine:     workq_mark_exiting
1623  *
1624  * Function:    Mark the work queue such that new threads will not be added to the
1625  *              work queue after we return.
1626  *
1627  * Conditions:  Called against the current process.
1628  */
1629 void
1630 workq_mark_exiting(struct proc *p)
1631 {
1632         struct workqueue *wq = proc_get_wqptr(p);
1633         uint32_t wq_flags;
1634         workq_threadreq_t mgr_req;
1635
1636         if (!wq) return;
1637
1638         WQ_TRACE_WQ(TRACE_wq_pthread_exit|DBG_FUNC_START, wq, 0, 0, 0, 0);
1639
1640         workq_lock_spin(wq);
1641
1642         wq_flags = os_atomic_or_orig(&wq->wq_flags, WQ_EXITING, relaxed);
1643         if (__improbable(wq_flags & WQ_EXITING)) {
1644                 panic("workq_mark_exiting called twice");
1645         }
1646
1647         /*
1648          * Opportunistically try to cancel thread calls that are likely in flight.
1649          * workq_exit() will do the proper cleanup.
1650          */
1651         if (wq_flags & WQ_IMMEDIATE_CALL_SCHEDULED) {
1652                 thread_call_cancel(wq->wq_immediate_call);
1653         }
1654         if (wq_flags & WQ_DELAYED_CALL_SCHEDULED) {
1655                 thread_call_cancel(wq->wq_delayed_call);
1656         }
1657         if (wq_flags & WQ_DEATH_CALL_SCHEDULED) {
1658                 thread_call_cancel(wq->wq_death_call);
1659         }
1660
1661         mgr_req = wq->wq_event_manager_threadreq;
1662         wq->wq_event_manager_threadreq = NULL;
1663         wq->wq_reqcount = 0; /* workq_schedule_creator must not look at queues */
1664         workq_turnstile_update_inheritor(wq, NULL, 0);
1665
1666         workq_unlock(wq);
1667
1668         if (mgr_req) {
1669                 kqueue_threadreq_cancel(p, mgr_req);
1670         }
1671         /*
1672          * No one touches the priority queues once WQ_EXITING is set.
1673          * It is hence safe to do the tear down without holding any lock.
1674          */
1675         priority_queue_destroy(&wq->wq_overcommit_queue,
1676                         struct workq_threadreq_s, tr_entry, ^(void *e){
1677                 workq_threadreq_destroy(p, e);
1678         });
1679         priority_queue_destroy(&wq->wq_constrained_queue,
1680                         struct workq_threadreq_s, tr_entry, ^(void *e){
1681                 workq_threadreq_destroy(p, e);
1682         });
1683         priority_queue_destroy(&wq->wq_special_queue,
1684                         struct workq_threadreq_s, tr_entry, ^(void *e){
1685                 workq_threadreq_destroy(p, e);
1686         });
1687
1688         WQ_TRACE(TRACE_wq_pthread_exit|DBG_FUNC_END, 0, 0, 0, 0, 0);
1689 }
1690
1691 /*
1692  * Routine:     workq_exit
1693  *
1694  * Function:    clean up the work queue structure(s) now that there are no threads
1695  *              left running inside the work queue (except possibly current_thread).
1696  *
1697  * Conditions:  Called by the last thread in the process.
1698  *              Called against current process.
1699  */
1700 void
1701 workq_exit(struct proc *p)
1702 {
1703         struct workqueue *wq;
1704         struct uthread *uth, *tmp;
1705
1706         wq = os_atomic_xchg(&p->p_wqptr, NULL, relaxed);
1707         if (wq != NULL) {
1708                 thread_t th = current_thread();
1709
1710                 WQ_TRACE_WQ(TRACE_wq_workqueue_exit|DBG_FUNC_START, wq, 0, 0, 0, 0);
1711
1712                 if (thread_get_tag(th) & THREAD_TAG_WORKQUEUE) {
1713                         /*
1714                          * <rdar://problem/40111515> Make sure we will no longer call the
1715                          * sched call, if we ever block this thread, which the cancel_wait
1716                          * below can do.
1717                          */
1718                         thread_sched_call(th, NULL);
1719                 }
1720
1721                 /*
1722                  * Thread calls are always scheduled by the proc itself or under the
1723                  * workqueue spinlock if WQ_EXITING is not yet set.
1724                  *
1725                  * Either way, when this runs, the proc has no threads left beside
1726                  * the one running this very code, so we know no thread call can be
1727                  * dispatched anymore.
1728                  */
1729                 thread_call_cancel_wait(wq->wq_delayed_call);
1730                 thread_call_cancel_wait(wq->wq_immediate_call);
1731                 thread_call_cancel_wait(wq->wq_death_call);
1732                 thread_call_free(wq->wq_delayed_call);
1733                 thread_call_free(wq->wq_immediate_call);
1734                 thread_call_free(wq->wq_death_call);
1735
1736                 /*
1737                  * Clean up workqueue data structures for threads that exited and
1738                  * didn't get a chance to clean up after themselves.
1739                  *
1740                  * idle/new threads should have been interrupted and died on their own
1741                  */
1742                 TAILQ_FOREACH_SAFE(uth, &wq->wq_thrunlist, uu_workq_entry, tmp) {
1743                         thread_sched_call(uth->uu_thread, NULL);
1744                         thread_deallocate(uth->uu_thread);
1745                 }
1746                 assert(TAILQ_EMPTY(&wq->wq_thnewlist));
1747                 assert(TAILQ_EMPTY(&wq->wq_thidlelist));
1748
1749                 WQ_TRACE_WQ(TRACE_wq_destroy | DBG_FUNC_END, wq,
1750                                 VM_KERNEL_ADDRHIDE(wq), 0, 0, 0);
1751
1752                 workq_deallocate(wq);
1753
1754                 WQ_TRACE(TRACE_wq_workqueue_exit|DBG_FUNC_END, 0, 0, 0, 0, 0);
1755         }
1756 }
1757
1758
1759 #pragma mark bsd thread control
1760
1761 static bool
1762 _pthread_priority_to_policy(pthread_priority_t priority,
1763                 thread_qos_policy_data_t *data)
1764 {
1765         data->qos_tier = _pthread_priority_thread_qos(priority);
1766         data->tier_importance = _pthread_priority_relpri(priority);
1767         if (data->qos_tier == THREAD_QOS_UNSPECIFIED || data->tier_importance > 0 ||
1768                         data->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
1769                 return false;
1770         }
1771         return true;
1772 }
1773
1774 static int
1775 bsdthread_set_self(proc_t p, thread_t th, pthread_priority_t priority,
1776                 mach_port_name_t voucher, enum workq_set_self_flags flags)
1777 {
1778         struct uthread *uth = get_bsdthread_info(th);
1779         struct workqueue *wq = proc_get_wqptr(p);
1780
1781         kern_return_t kr;
1782         int unbind_rv = 0, qos_rv = 0, voucher_rv = 0, fixedpri_rv = 0;
1783         bool is_wq_thread = (thread_get_tag(th) & THREAD_TAG_WORKQUEUE);
1784
1785         if (flags & WORKQ_SET_SELF_WQ_KEVENT_UNBIND) {
1786                 if (!is_wq_thread) {
1787                         unbind_rv = EINVAL;
1788                         goto qos;
1789                 }
1790
1791                 if (uth->uu_workq_pri.qos_bucket == WORKQ_THREAD_QOS_MANAGER) {
1792                         unbind_rv = EINVAL;
1793                         goto qos;
1794                 }
1795
1796                 struct kqrequest *kqr = uth->uu_kqr_bound;
1797                 if (kqr == NULL) {
1798                         unbind_rv = EALREADY;
1799                         goto qos;
1800                 }
1801
1802                 if (kqr->kqr_state & KQR_WORKLOOP) {
1803                         unbind_rv = EINVAL;
1804                         goto qos;
1805                 }
1806
1807                 kqueue_threadreq_unbind(p, uth->uu_kqr_bound);
1808         }
1809
1810 qos:
1811         if (flags & WORKQ_SET_SELF_QOS_FLAG) {
1812                 thread_qos_policy_data_t new_policy;
1813
1814                 if (!_pthread_priority_to_policy(priority, &new_policy)) {
1815                         qos_rv = EINVAL;
1816                         goto voucher;
1817                 }
1818
1819                 if (!is_wq_thread) {
1820                         /*
1821                          * Threads opted out of QoS can't change QoS
1822                          */
1823                         if (!thread_has_qos_policy(th)) {
1824                                 qos_rv = EPERM;
1825                                 goto voucher;
1826                         }
1827                 } else if (uth->uu_workq_pri.qos_bucket == WORKQ_THREAD_QOS_MANAGER) {
1828                         /*
1829                          * Workqueue manager threads can't change QoS
1830                          */
1831                         qos_rv = EINVAL;
1832                         goto voucher;
1833                 } else {
1834                         /*
1835                          * For workqueue threads, possibly adjust buckets and redrive thread
1836                          * requests.
1837                          */
1838                         bool old_overcommit = uth->uu_workq_flags & UT_WORKQ_OVERCOMMIT;
1839                         bool new_overcommit = priority & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
1840                         struct uu_workq_policy old_pri, new_pri;
1841                         bool force_run = false;
1842
1843                         workq_lock_spin(wq);
1844
1845                         if (old_overcommit != new_overcommit) {
1846                                 uth->uu_workq_flags ^= UT_WORKQ_OVERCOMMIT;
1847                                 if (old_overcommit) {
1848                                         wq->wq_constrained_threads_scheduled++;
1849                                 } else if (wq->wq_constrained_threads_scheduled-- ==
1850                                                 wq_max_constrained_threads) {
1851                                         force_run = true;
1852                                 }
1853                         }
1854
1855                         old_pri = new_pri = uth->uu_workq_pri;
1856                         new_pri.qos_req = new_policy.qos_tier;
1857                         workq_thread_update_bucket(p, wq, uth, old_pri, new_pri, force_run);
1858                         workq_unlock(wq);
1859                 }
1860
1861                 kr = thread_policy_set_internal(th, THREAD_QOS_POLICY,
1862                                 (thread_policy_t)&new_policy, THREAD_QOS_POLICY_COUNT);
1863                 if (kr != KERN_SUCCESS) {
1864                         qos_rv = EINVAL;
1865                 }
1866         }
1867
1868 voucher:
1869         if (flags & WORKQ_SET_SELF_VOUCHER_FLAG) {
1870                 kr = thread_set_voucher_name(voucher);
1871                 if (kr != KERN_SUCCESS) {
1872                         voucher_rv = ENOENT;
1873                         goto fixedpri;
1874                 }
1875         }
1876
1877 fixedpri:
1878         if (qos_rv) goto done;
1879         if (flags & WORKQ_SET_SELF_FIXEDPRIORITY_FLAG) {
1880                 thread_extended_policy_data_t extpol = {.timeshare = 0};
1881
1882                 if (is_wq_thread) {
1883                         /* Not allowed on workqueue threads */
1884                         fixedpri_rv = ENOTSUP;
1885                         goto done;
1886                 }
1887
1888                 kr = thread_policy_set_internal(th, THREAD_EXTENDED_POLICY,
1889                                 (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT);
1890                 if (kr != KERN_SUCCESS) {
1891                         fixedpri_rv = EINVAL;
1892                         goto done;
1893                 }
1894         } else if (flags & WORKQ_SET_SELF_TIMESHARE_FLAG) {
1895                 thread_extended_policy_data_t extpol = {.timeshare = 1};
1896
1897                 if (is_wq_thread) {
1898                         /* Not allowed on workqueue threads */
1899                         fixedpri_rv = ENOTSUP;
1900                         goto done;
1901                 }
1902
1903                 kr = thread_policy_set_internal(th, THREAD_EXTENDED_POLICY,
1904                                 (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT);
1905                 if (kr != KERN_SUCCESS) {
1906                         fixedpri_rv = EINVAL;
1907                         goto done;
1908                 }
1909         }
1910
1911 done:
1912         if (qos_rv && voucher_rv) {
1913                 /* Both failed, give that a unique error. */
1914                 return EBADMSG;
1915         }
1916
1917         if (unbind_rv) {
1918                 return unbind_rv;
1919         }
1920
1921         if (qos_rv) {
1922                 return qos_rv;
1923         }
1924
1925         if (voucher_rv) {
1926                 return voucher_rv;
1927         }
1928
1929         if (fixedpri_rv) {
1930                 return fixedpri_rv;
1931         }
1932
1933         return 0;
1934 }
1935
1936 static int
1937 bsdthread_add_explicit_override(proc_t p, mach_port_name_t kport,
1938                 pthread_priority_t pp, user_addr_t resource)
1939 {
1940         thread_qos_t qos = _pthread_priority_thread_qos(pp);
1941         if (qos == THREAD_QOS_UNSPECIFIED) {
1942                 return EINVAL;
1943         }
1944
1945         thread_t th = port_name_to_thread(kport);
1946         if (th == THREAD_NULL) {
1947                 return ESRCH;
1948         }
1949
1950         int rv = proc_thread_qos_add_override(p->task, th, 0, qos, TRUE,
1951                         resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE);
1952
1953         thread_deallocate(th);
1954         return rv;
1955 }
1956
1957 static int
1958 bsdthread_remove_explicit_override(proc_t p, mach_port_name_t kport,
1959                 user_addr_t resource)
1960 {
1961         thread_t th = port_name_to_thread(kport);
1962         if (th == THREAD_NULL) {
1963                 return ESRCH;
1964         }
1965
1966         int rv = proc_thread_qos_remove_override(p->task, th, 0, resource,
1967                         THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE);
1968
1969         thread_deallocate(th);
1970         return rv;
1971 }
1972
1973 static int
1974 workq_thread_add_dispatch_override(proc_t p, mach_port_name_t kport,
1975                 pthread_priority_t pp, user_addr_t ulock_addr)
1976 {
1977         struct uu_workq_policy old_pri, new_pri;
1978         struct workqueue *wq = proc_get_wqptr(p);
1979
1980         thread_qos_t qos_override = _pthread_priority_thread_qos(pp);
1981         if (qos_override == THREAD_QOS_UNSPECIFIED) {
1982                 return EINVAL;
1983         }
1984
1985         thread_t thread = port_name_to_thread(kport);
1986         if (thread == THREAD_NULL) {
1987                 return ESRCH;
1988         }
1989
1990         struct uthread *uth = get_bsdthread_info(thread);
1991         if ((thread_get_tag(thread) & THREAD_TAG_WORKQUEUE) == 0) {
1992                 thread_deallocate(thread);
1993                 return EPERM;
1994         }
1995
1996         WQ_TRACE_WQ(TRACE_wq_override_dispatch | DBG_FUNC_NONE,
1997                         wq, thread_tid(thread), 1, pp, 0);
1998
1999         thread_mtx_lock(thread);
2000
2001         if (ulock_addr) {
2002                 uint64_t val;
2003                 int rc;
2004                 /*
2005                  * Workaround lack of explicit support for 'no-fault copyin'
2006                  * <rdar://problem/24999882>, as disabling preemption prevents paging in
2007                  */
2008                 disable_preemption();
2009                 rc = copyin_word(ulock_addr, &val, sizeof(kport));
2010                 enable_preemption();
2011                 if (rc == 0 && ulock_owner_value_to_port_name((uint32_t)val) != kport) {
2012                         goto out;
2013                 }
2014         }
2015
2016         workq_lock_spin(wq);
2017
2018         old_pri = uth->uu_workq_pri;
2019         if (old_pri.qos_override >= qos_override) {
2020                 /* Nothing to do */
2021         } else if (thread == current_thread()) {
2022                 new_pri = old_pri;
2023                 new_pri.qos_override = qos_override;
2024                 workq_thread_update_bucket(p, wq, uth, old_pri, new_pri, false);
2025         } else {
2026                 uth->uu_workq_pri.qos_override = qos_override;
2027                 if (qos_override > workq_pri_override(old_pri)) {
2028                         thread_set_workq_override(thread, qos_override);
2029                 }
2030         }
2031
2032         workq_unlock(wq);
2033
2034 out:
2035         thread_mtx_unlock(thread);
2036         thread_deallocate(thread);
2037         return 0;
2038 }
2039
2040 static int
2041 workq_thread_reset_dispatch_override(proc_t p, thread_t thread)
2042 {
2043         struct uu_workq_policy old_pri, new_pri;
2044         struct workqueue *wq = proc_get_wqptr(p);
2045         struct uthread *uth = get_bsdthread_info(thread);
2046
2047         if ((thread_get_tag(thread) & THREAD_TAG_WORKQUEUE) == 0) {
2048                 return EPERM;
2049         }
2050
2051         WQ_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_NONE, wq, 0, 0, 0, 0);
2052
2053         workq_lock_spin(wq);
2054         old_pri = new_pri = uth->uu_workq_pri;
2055         new_pri.qos_override = THREAD_QOS_UNSPECIFIED;
2056         workq_thread_update_bucket(p, wq, uth, old_pri, new_pri, false);
2057         workq_unlock(wq);
2058         return 0;
2059 }
2060
2061 static int
2062 bsdthread_get_max_parallelism(thread_qos_t qos, unsigned long flags,
2063                 int *retval)
2064 {
2065         static_assert(QOS_PARALLELISM_COUNT_LOGICAL ==
2066                         _PTHREAD_QOS_PARALLELISM_COUNT_LOGICAL, "logical");
2067         static_assert(QOS_PARALLELISM_REALTIME ==
2068                         _PTHREAD_QOS_PARALLELISM_REALTIME, "realtime");
2069
2070         if (flags & ~(QOS_PARALLELISM_REALTIME | QOS_PARALLELISM_COUNT_LOGICAL)) {
2071                 return EINVAL;
2072         }
2073
2074         if (flags & QOS_PARALLELISM_REALTIME) {
2075                 if (qos) {
2076                         return EINVAL;
2077                 }
2078         } else if (qos == THREAD_QOS_UNSPECIFIED || qos >= THREAD_QOS_LAST) {
2079                 return EINVAL;
2080         }
2081
2082         *retval = qos_max_parallelism(qos, flags);
2083         return 0;
2084 }
2085
2086 #define ENSURE_UNUSED(arg) \
2087                 ({ if ((arg) != 0) { return EINVAL; } })
2088
2089 int
2090 bsdthread_ctl(struct proc *p, struct bsdthread_ctl_args *uap, int *retval)
2091 {
2092         switch (uap->cmd) {
2093         case BSDTHREAD_CTL_QOS_OVERRIDE_START:
2094                 return bsdthread_add_explicit_override(p, (mach_port_name_t)uap->arg1,
2095                                 (pthread_priority_t)uap->arg2, uap->arg3);
2096         case BSDTHREAD_CTL_QOS_OVERRIDE_END:
2097                 ENSURE_UNUSED(uap->arg3);
2098                 return bsdthread_remove_explicit_override(p, (mach_port_name_t)uap->arg1,
2099                                 (user_addr_t)uap->arg2);
2100
2101         case BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH:
2102                 return workq_thread_add_dispatch_override(p, (mach_port_name_t)uap->arg1,
2103                                 (pthread_priority_t)uap->arg2, uap->arg3);
2104         case BSDTHREAD_CTL_QOS_OVERRIDE_RESET:
2105                 return workq_thread_reset_dispatch_override(p, current_thread());
2106
2107         case BSDTHREAD_CTL_SET_SELF:
2108                 return bsdthread_set_self(p, current_thread(),
2109                                 (pthread_priority_t)uap->arg1, (mach_port_name_t)uap->arg2,
2110                                 (enum workq_set_self_flags)uap->arg3);
2111
2112         case BSDTHREAD_CTL_QOS_MAX_PARALLELISM:
2113                 ENSURE_UNUSED(uap->arg3);
2114                 return bsdthread_get_max_parallelism((thread_qos_t)uap->arg1,
2115                                 (unsigned long)uap->arg2, retval);
2116
2117         case BSDTHREAD_CTL_SET_QOS:
2118         case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD:
2119         case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET:
2120                 /* no longer supported */
2121                 return ENOTSUP;
2122
2123         default:
2124                 return EINVAL;
2125         }
2126 }
2127
2128 #pragma mark workqueue thread manipulation
2129
2130 static void __dead2
2131 workq_select_threadreq_or_park_and_unlock(proc_t p, struct workqueue *wq,
2132                 struct uthread *uth);
2133
2134 static void workq_setup_and_run(proc_t p, struct uthread *uth, int flags) __dead2;
2135
2136 #if KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD
2137 static inline uint64_t
2138 workq_trace_req_id(workq_threadreq_t req)
2139 {
2140         struct kqworkloop *kqwl;
2141         if (req->tr_flags & TR_FLAG_WORKLOOP) {
2142                 kqwl = __container_of(req, struct kqworkloop, kqwl_request.kqr_req);
2143                 return kqwl->kqwl_dynamicid;
2144         }
2145
2146         return VM_KERNEL_ADDRHIDE(req);
2147 }
2148 #endif
2149
2150 /**
2151  * Entry point for libdispatch to ask for threads
2152  */
2153 static int
2154 workq_reqthreads(struct proc *p, uint32_t reqcount, pthread_priority_t pp)
2155 {
2156         thread_qos_t qos = _pthread_priority_thread_qos(pp);
2157         struct workqueue *wq = proc_get_wqptr(p);
2158         uint32_t unpaced, upcall_flags = WQ_FLAG_THREAD_NEWSPI;
2159
2160         if (wq == NULL || reqcount <= 0 || reqcount > UINT16_MAX ||
2161                         qos == THREAD_QOS_UNSPECIFIED) {
2162                 return EINVAL;
2163         }
2164
2165         WQ_TRACE_WQ(TRACE_wq_wqops_reqthreads | DBG_FUNC_NONE,
2166                         wq, reqcount, pp, 0, 0);
2167
2168         workq_threadreq_t req = zalloc(workq_zone_threadreq);
2169         priority_queue_entry_init(&req->tr_entry);
2170         req->tr_state = TR_STATE_NEW;
2171         req->tr_flags = 0;
2172         req->tr_qos   = qos;
2173
2174         if (pp & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) {
2175                 req->tr_flags |= TR_FLAG_OVERCOMMIT;
2176                 upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT;
2177         }
2178
2179         WQ_TRACE_WQ(TRACE_wq_thread_request_initiate | DBG_FUNC_NONE,
2180                         wq, workq_trace_req_id(req), req->tr_qos, reqcount, 0);
2181
2182         workq_lock_spin(wq);
2183         do {
2184                 if (_wq_exiting(wq)) {
2185                         goto exiting;
2186                 }
2187
2188                 /*
2189                  * When userspace is asking for parallelism, wakeup up to (reqcount - 1)
2190                  * threads without pacing, to inform the scheduler of that workload.
2191                  *
2192                  * The last requests, or the ones that failed the admission checks are
2193                  * enqueued and go through the regular creator codepath.
2194                  *
2195                  * If there aren't enough threads, add one, but re-evaluate everything
2196                  * as conditions may now have changed.
2197                  */
2198                 if (reqcount > 1 && (req->tr_flags & TR_FLAG_OVERCOMMIT) == 0) {
2199                         unpaced = workq_constrained_allowance(wq, qos, NULL, false);
2200                         if (unpaced >= reqcount - 1) {
2201                                 unpaced = reqcount - 1;
2202                         }
2203                 } else {
2204                         unpaced = reqcount - 1;
2205                 }
2206
2207                 /*
2208                  * This path does not currently handle custom workloop parameters
2209                  * when creating threads for parallelism.
2210                  */
2211                 assert(!(req->tr_flags & TR_FLAG_WL_PARAMS));
2212
2213                 /*
2214                  * This is a trimmed down version of workq_threadreq_bind_and_unlock()
2215                  */
2216                 while (unpaced > 0 && wq->wq_thidlecount) {
2217                         struct uthread *uth = workq_pop_idle_thread(wq);
2218
2219                         _wq_thactive_inc(wq, qos);
2220                         wq->wq_thscheduled_count[_wq_bucket(qos)]++;
2221                         workq_thread_reset_pri(wq, uth, req);
2222                         wq->wq_fulfilled++;
2223
2224                         uth->uu_workq_flags |= UT_WORKQ_EARLY_BOUND;
2225                         if ((req->tr_flags & TR_FLAG_OVERCOMMIT) == 0) {
2226                                 uth->uu_workq_flags &= ~UT_WORKQ_OVERCOMMIT;
2227                                 wq->wq_constrained_threads_scheduled++;
2228                         }
2229                         uth->uu_save.uus_workq_park_data.upcall_flags = upcall_flags;
2230                         uth->uu_save.uus_workq_park_data.thread_request = req;
2231                         workq_thread_wakeup(uth);
2232                         unpaced--;
2233                         reqcount--;
2234                 }
2235         } while (unpaced && wq->wq_nthreads < wq_max_threads &&
2236                         workq_add_new_idle_thread(p, wq));
2237
2238         if (_wq_exiting(wq)) {
2239                 goto exiting;
2240         }
2241
2242         req->tr_count = reqcount;
2243         if (workq_threadreq_enqueue(wq, req)) {
2244                 /* This can drop the workqueue lock, and take it again */
2245                 workq_schedule_creator(p, wq, WORKQ_THREADREQ_CAN_CREATE_THREADS);
2246         }
2247         workq_unlock(wq);
2248         return 0;
2249
2250 exiting:
2251         workq_unlock(wq);
2252         zfree(workq_zone_threadreq, req);
2253         return ECANCELED;
2254 }
2255
2256 bool
2257 workq_kern_threadreq_initiate(struct proc *p, struct kqrequest *kqr,
2258                 struct turnstile *workloop_ts, thread_qos_t qos, int flags)
2259 {
2260         struct workqueue *wq = proc_get_wqptr_fast(p);
2261         workq_threadreq_t req = &kqr->kqr_req;
2262         struct uthread *uth = NULL;
2263         uint8_t tr_flags = 0;
2264
2265         if (kqr->kqr_state & KQR_WORKLOOP) {
2266                 tr_flags = TR_FLAG_WORKLOOP;
2267
2268                 workq_threadreq_param_t trp = kqueue_threadreq_workloop_param(req);
2269                 if (trp.trp_flags & TRP_PRIORITY) {
2270                         tr_flags |= TR_FLAG_WL_OUTSIDE_QOS;
2271                         qos = thread_workq_qos_for_pri(trp.trp_pri);
2272                         if (qos == THREAD_QOS_UNSPECIFIED) {
2273                                 qos = WORKQ_THREAD_QOS_ABOVEUI;
2274                         }
2275                 }
2276                 if (trp.trp_flags) {
2277                         tr_flags |= TR_FLAG_WL_PARAMS;
2278                 }
2279         } else {
2280                 tr_flags = TR_FLAG_KEVENT;
2281         }
2282         if (qos != WORKQ_THREAD_QOS_MANAGER &&
2283                         (kqr->kqr_state & KQR_THOVERCOMMIT)) {
2284                 tr_flags |= TR_FLAG_OVERCOMMIT;
2285         }
2286
2287         assert(req->tr_state == TR_STATE_IDLE);
2288         priority_queue_entry_init(&req->tr_entry);
2289         req->tr_count = 1;
2290         req->tr_state = TR_STATE_NEW;
2291         req->tr_flags = tr_flags;
2292         req->tr_qos   = qos;
2293
2294         WQ_TRACE_WQ(TRACE_wq_thread_request_initiate | DBG_FUNC_NONE, wq,
2295                         workq_trace_req_id(req), qos, 1, 0);
2296
2297         if (flags & WORKQ_THREADREQ_ATTEMPT_REBIND) {
2298                 /*
2299                  * we're called back synchronously from the context of
2300                  * kqueue_threadreq_unbind from within workq_thread_return()
2301                  * we can try to match up this thread with this request !
2302                  */
2303                 uth = current_uthread();
2304                 assert(uth->uu_kqr_bound == NULL);
2305         }
2306
2307         workq_lock_spin(wq);
2308         if (_wq_exiting(wq)) {
2309                 workq_unlock(wq);
2310                 return false;
2311         }
2312
2313         if (uth && workq_threadreq_admissible(wq, uth, req)) {
2314                 assert(uth != wq->wq_creator);
2315                 workq_threadreq_bind_and_unlock(p, wq, req, uth);
2316         } else {
2317                 if (workloop_ts) {
2318                         workq_perform_turnstile_operation_locked(wq, ^{
2319                                 turnstile_update_inheritor(workloop_ts, wq->wq_turnstile,
2320                                                 TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_TURNSTILE);
2321                                 turnstile_update_inheritor_complete(workloop_ts,
2322                                                 TURNSTILE_INTERLOCK_HELD);
2323                         });
2324                 }
2325                 if (workq_threadreq_enqueue(wq, req)) {
2326                         workq_schedule_creator(p, wq, flags);
2327                 }
2328                 workq_unlock(wq);
2329         }
2330
2331         return true;
2332 }
2333
2334 void
2335 workq_kern_threadreq_modify(struct proc *p, struct kqrequest *kqr,
2336                 thread_qos_t qos, int flags)
2337 {
2338         struct workqueue *wq = proc_get_wqptr_fast(p);
2339         workq_threadreq_t req = &kqr->kqr_req;
2340         bool change_overcommit = false;
2341
2342         if (req->tr_flags & TR_FLAG_WL_OUTSIDE_QOS) {
2343                 /* Requests outside-of-QoS shouldn't accept modify operations */
2344                 return;
2345         }
2346
2347         workq_lock_spin(wq);
2348
2349         assert(req->tr_qos != WORKQ_THREAD_QOS_MANAGER);
2350         assert(req->tr_flags & (TR_FLAG_KEVENT | TR_FLAG_WORKLOOP));
2351
2352         if (req->tr_state == TR_STATE_BINDING) {
2353                 kqueue_threadreq_bind(p, req, req->tr_binding_thread, 0);
2354                 workq_unlock(wq);
2355                 return;
2356         }
2357
2358         change_overcommit = (bool)(kqr->kqr_state & KQR_THOVERCOMMIT) !=
2359                         (bool)(req->tr_flags & TR_FLAG_OVERCOMMIT);
2360
2361         if (_wq_exiting(wq) || (req->tr_qos == qos && !change_overcommit)) {
2362                 workq_unlock(wq);
2363                 return;
2364         }
2365
2366         assert(req->tr_count == 1);
2367         if (req->tr_state != TR_STATE_QUEUED) {
2368                 panic("Invalid thread request (%p) state %d", req, req->tr_state);
2369         }
2370
2371         WQ_TRACE_WQ(TRACE_wq_thread_request_modify | DBG_FUNC_NONE, wq,
2372                         workq_trace_req_id(req), qos, 0, 0);
2373
2374         struct priority_queue *pq = workq_priority_queue_for_req(wq, req);
2375         workq_threadreq_t req_max;
2376
2377         /*
2378          * Stage 1: Dequeue the request from its priority queue.
2379          *
2380          * If we dequeue the root item of the constrained priority queue,
2381          * maintain the best constrained request qos invariant.
2382          */
2383         if (priority_queue_remove(pq, &req->tr_entry,
2384                         PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE)) {
2385                 if ((req->tr_flags & TR_FLAG_OVERCOMMIT) == 0) {
2386                         _wq_thactive_refresh_best_constrained_req_qos(wq);
2387                 }
2388         }
2389
2390         /*
2391          * Stage 2: Apply changes to the thread request
2392          *
2393          * If the item will not become the root of the priority queue it belongs to,
2394          * then we need to wait in line, just enqueue and return quickly.
2395          */
2396         if (__improbable(change_overcommit)) {
2397                 req->tr_flags ^= TR_FLAG_OVERCOMMIT;
2398                 pq = workq_priority_queue_for_req(wq, req);
2399         }
2400         req->tr_qos = qos;
2401
2402         req_max = priority_queue_max(pq, struct workq_threadreq_s, tr_entry);
2403         if (req_max && req_max->tr_qos >= qos) {
2404                 priority_queue_insert(pq, &req->tr_entry, workq_priority_for_req(req),
2405                                 PRIORITY_QUEUE_SCHED_PRI_MAX_HEAP_COMPARE);
2406                 workq_unlock(wq);
2407                 return;
2408         }
2409
2410         /*
2411          * Stage 3: Reevaluate whether we should run the thread request.
2412          *
2413          * Pretend the thread request is new again:
2414          * - adjust wq_reqcount to not count it anymore.
2415          * - make its state TR_STATE_NEW (so that workq_threadreq_bind_and_unlock
2416          *   properly attempts a synchronous bind)
2417          */
2418         wq->wq_reqcount--;
2419         req->tr_state = TR_STATE_NEW;
2420         if (workq_threadreq_enqueue(wq, req)) {
2421                 workq_schedule_creator(p, wq, flags);
2422         }
2423         workq_unlock(wq);
2424 }
2425
2426 void
2427 workq_kern_threadreq_lock(struct proc *p)
2428 {
2429         workq_lock_spin(proc_get_wqptr_fast(p));
2430 }
2431
2432 void
2433 workq_kern_threadreq_unlock(struct proc *p)
2434 {
2435         workq_unlock(proc_get_wqptr_fast(p));
2436 }
2437
2438 void
2439 workq_kern_threadreq_update_inheritor(struct proc *p, struct kqrequest *kqr,
2440                 thread_t owner, struct turnstile *wl_ts,
2441                 turnstile_update_flags_t flags)
2442 {
2443         struct workqueue *wq = proc_get_wqptr_fast(p);
2444         workq_threadreq_t req = &kqr->kqr_req;
2445         turnstile_inheritor_t inheritor;
2446
2447         assert(req->tr_qos != WORKQ_THREAD_QOS_MANAGER);
2448         assert(req->tr_flags & TR_FLAG_WORKLOOP);
2449         workq_lock_held(wq);
2450
2451         if (req->tr_state == TR_STATE_BINDING) {
2452                 kqueue_threadreq_bind(p, req, req->tr_binding_thread,
2453                                 KQUEUE_THREADERQ_BIND_NO_INHERITOR_UPDATE);
2454                 return;
2455         }
2456
2457         if (_wq_exiting(wq)) {
2458                 inheritor = TURNSTILE_INHERITOR_NULL;
2459         } else {
2460                 if (req->tr_state != TR_STATE_QUEUED) {
2461                         panic("Invalid thread request (%p) state %d", req, req->tr_state);
2462                 }
2463
2464                 if (owner) {
2465                         inheritor = owner;
2466                         flags |= TURNSTILE_INHERITOR_THREAD;
2467                 } else {
2468                         inheritor = wq->wq_turnstile;
2469                         flags |= TURNSTILE_INHERITOR_TURNSTILE;
2470                 }
2471         }
2472
2473         workq_perform_turnstile_operation_locked(wq, ^{
2474                 turnstile_update_inheritor(wl_ts, inheritor, flags);
2475         });
2476 }
2477
2478 void
2479 workq_kern_threadreq_redrive(struct proc *p, int flags)
2480 {
2481         struct workqueue *wq = proc_get_wqptr_fast(p);
2482
2483         workq_lock_spin(wq);
2484         workq_schedule_creator(p, wq, flags);
2485         workq_unlock(wq);
2486 }
2487
2488 void
2489 workq_schedule_creator_turnstile_redrive(struct workqueue *wq, bool locked)
2490 {
2491         if (!locked) workq_lock_spin(wq);
2492         workq_schedule_creator(NULL, wq, WORKQ_THREADREQ_CREATOR_SYNC_UPDATE);
2493         if (!locked) workq_unlock(wq);
2494 }
2495
2496 static int
2497 workq_thread_return(struct proc *p, struct workq_kernreturn_args *uap,
2498                 struct workqueue *wq)
2499 {
2500         thread_t th = current_thread();
2501         struct uthread *uth = get_bsdthread_info(th);
2502         struct kqrequest *kqr = uth->uu_kqr_bound;
2503         workq_threadreq_param_t trp = { };
2504         int nevents = uap->affinity, error;
2505         user_addr_t eventlist = uap->item;
2506
2507         if (((thread_get_tag(th) & THREAD_TAG_WORKQUEUE) == 0) ||
2508                         (uth->uu_workq_flags & UT_WORKQ_DYING)) {
2509                 return EINVAL;
2510         }
2511
2512         if (eventlist && nevents && kqr == NULL) {
2513                 return EINVAL;
2514         }
2515
2516         /* reset signal mask on the workqueue thread to default state */
2517         if (uth->uu_sigmask != (sigset_t)(~workq_threadmask)) {
2518                 proc_lock(p);
2519                 uth->uu_sigmask = ~workq_threadmask;
2520                 proc_unlock(p);
2521         }
2522
2523         if (kqr && kqr->kqr_req.tr_flags & TR_FLAG_WL_PARAMS) {
2524                 /*
2525                  * Ensure we store the threadreq param before unbinding
2526                  * the kqr from this thread.
2527                  */
2528                 trp = kqueue_threadreq_workloop_param(&kqr->kqr_req);
2529         }
2530
2531         if (kqr) {
2532                 uint32_t upcall_flags = WQ_FLAG_THREAD_NEWSPI | WQ_FLAG_THREAD_REUSE;
2533                 if (kqr->kqr_state & KQR_WORKLOOP) {
2534                         upcall_flags |= WQ_FLAG_THREAD_WORKLOOP | WQ_FLAG_THREAD_KEVENT;
2535                 } else {
2536                         upcall_flags |= WQ_FLAG_THREAD_KEVENT;
2537                 }
2538                 if (uth->uu_workq_pri.qos_bucket == WORKQ_THREAD_QOS_MANAGER) {
2539                         upcall_flags |= WQ_FLAG_THREAD_EVENT_MANAGER;
2540                 } else {
2541                         if (uth->uu_workq_flags & UT_WORKQ_OVERCOMMIT) {
2542                                 upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT;
2543                         }
2544                         if (uth->uu_workq_flags & UT_WORKQ_OUTSIDE_QOS) {
2545                                 upcall_flags |= WQ_FLAG_THREAD_OUTSIDEQOS;
2546                         } else {
2547                                 upcall_flags |= uth->uu_workq_pri.qos_req |
2548                                                 WQ_FLAG_THREAD_PRIO_QOS;
2549                         }
2550                 }
2551
2552                 error = pthread_functions->workq_handle_stack_events(p, th,
2553                                 get_task_map(p->task), uth->uu_workq_stackaddr,
2554                                 uth->uu_workq_thport, eventlist, nevents, upcall_flags);
2555                 if (error) return error;
2556
2557                 // pthread is supposed to pass KEVENT_FLAG_PARKING here
2558                 // which should cause the above call to either:
2559                 // - not return
2560                 // - return an error
2561                 // - return 0 and have unbound properly
2562                 assert(uth->uu_kqr_bound == NULL);
2563         }
2564
2565         WQ_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_END, wq, uap->options, 0, 0, 0);
2566
2567         thread_sched_call(th, NULL);
2568         thread_will_park_or_terminate(th);
2569 #if CONFIG_WORKLOOP_DEBUG
2570         UU_KEVENT_HISTORY_WRITE_ENTRY(uth, { .uu_error = -1, });
2571 #endif
2572
2573         workq_lock_spin(wq);
2574         WQ_TRACE_WQ(TRACE_wq_thread_logical_run | DBG_FUNC_END, wq, 0, 0, 0, 0);
2575         uth->uu_save.uus_workq_park_data.workloop_params = trp.trp_value;
2576         workq_select_threadreq_or_park_and_unlock(p, wq, uth);
2577         __builtin_unreachable();
2578 }
2579
2580 /**
2581  * Multiplexed call to interact with the workqueue mechanism
2582  */
2583 int
2584 workq_kernreturn(struct proc *p, struct workq_kernreturn_args *uap, int32_t *retval)
2585 {
2586         int options = uap->options;
2587         int arg2 = uap->affinity;
2588         int arg3 = uap->prio;
2589         struct workqueue *wq = proc_get_wqptr(p);
2590         int error = 0;
2591
2592         if ((p->p_lflag & P_LREGISTER) == 0) {
2593                 return EINVAL;
2594         }
2595
2596         switch (options) {
2597         case WQOPS_QUEUE_NEWSPISUPP: {
2598                 /*
2599                  * arg2 = offset of serialno into dispatch queue
2600                  * arg3 = kevent support
2601                  */
2602                 int offset = arg2;
2603                 if (arg3 & 0x01){
2604                         // If we get here, then userspace has indicated support for kevent delivery.
2605                 }
2606
2607                 p->p_dispatchqueue_serialno_offset = (uint64_t)offset;
2608                 break;
2609         }
2610         case WQOPS_QUEUE_REQTHREADS: {
2611                 /*
2612                  * arg2 = number of threads to start
2613                  * arg3 = priority
2614                  */
2615                 error = workq_reqthreads(p, arg2, arg3);
2616                 break;
2617         }
2618         case WQOPS_SET_EVENT_MANAGER_PRIORITY: {
2619                 /*
2620                  * arg2 = priority for the manager thread
2621                  *
2622                  * if _PTHREAD_PRIORITY_SCHED_PRI_FLAG is set,
2623                  * the low bits of the value contains a scheduling priority
2624                  * instead of a QOS value
2625                  */
2626                 pthread_priority_t pri = arg2;
2627
2628                 if (wq == NULL) {
2629                         error = EINVAL;
2630                         break;
2631                 }
2632
2633                 /*
2634                  * Normalize the incoming priority so that it is ordered numerically.
2635                  */
2636                 if (pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) {
2637                         pri &= (_PTHREAD_PRIORITY_SCHED_PRI_MASK |
2638                                         _PTHREAD_PRIORITY_SCHED_PRI_FLAG);
2639                 } else {
2640                         thread_qos_t qos = _pthread_priority_thread_qos(pri);
2641                         int relpri = _pthread_priority_relpri(pri);
2642                         if (relpri > 0 || relpri < THREAD_QOS_MIN_TIER_IMPORTANCE ||
2643                                         qos == THREAD_QOS_UNSPECIFIED) {
2644                                 error = EINVAL;
2645                                 break;
2646                         }
2647                         pri &= ~_PTHREAD_PRIORITY_FLAGS_MASK;
2648                 }
2649
2650                 /*
2651                  * If userspace passes a scheduling priority, that wins over any QoS.
2652                  * Userspace should takes care not to lower the priority this way.
2653                  */
2654                 workq_lock_spin(wq);
2655                 if (wq->wq_event_manager_priority < (uint32_t)pri) {
2656                         wq->wq_event_manager_priority = (uint32_t)pri;
2657                 }
2658                 workq_unlock(wq);
2659                 break;
2660         }
2661         case WQOPS_THREAD_KEVENT_RETURN:
2662         case WQOPS_THREAD_WORKLOOP_RETURN:
2663         case WQOPS_THREAD_RETURN: {
2664                 error = workq_thread_return(p, uap, wq);
2665                 break;
2666         }
2667
2668         case WQOPS_SHOULD_NARROW: {
2669                 /*
2670                  * arg2 = priority to test
2671                  * arg3 = unused
2672                  */
2673                 thread_t th = current_thread();
2674                 struct uthread *uth = get_bsdthread_info(th);
2675                 if (((thread_get_tag(th) & THREAD_TAG_WORKQUEUE) == 0) ||
2676                                 (uth->uu_workq_flags & (UT_WORKQ_DYING|UT_WORKQ_OVERCOMMIT))) {
2677                         error = EINVAL;
2678                         break;
2679                 }
2680
2681                 thread_qos_t qos = _pthread_priority_thread_qos(arg2);
2682                 if (qos == THREAD_QOS_UNSPECIFIED) {
2683                         error = EINVAL;
2684                         break;
2685                 }
2686                 workq_lock_spin(wq);
2687                 bool should_narrow = !workq_constrained_allowance(wq, qos, uth, false);
2688                 workq_unlock(wq);
2689
2690                 *retval = should_narrow;
2691                 break;
2692         }
2693         default:
2694                 error = EINVAL;
2695                 break;
2696         }
2697
2698         return (error);
2699 }
2700
2701 /*
2702  * We have no work to do, park ourselves on the idle list.
2703  *
2704  * Consumes the workqueue lock and does not return.
2705  */
2706 __attribute__((noreturn, noinline))
2707 static void
2708 workq_park_and_unlock(proc_t p, struct workqueue *wq, struct uthread *uth)
2709 {
2710         assert(uth == current_uthread());
2711         assert(uth->uu_kqr_bound == NULL);
2712         workq_push_idle_thread(p, wq, uth); // may not return
2713
2714         workq_thread_reset_cpupercent(NULL, uth);
2715
2716         if (uth->uu_workq_flags & UT_WORKQ_IDLE_CLEANUP) {
2717                 workq_unlock(wq);
2718
2719                 /*
2720                  * workq_push_idle_thread() will unset `has_stack`
2721                  * if it wants us to free the stack before parking.
2722                  */
2723                 if (!uth->uu_save.uus_workq_park_data.has_stack) {
2724                         pthread_functions->workq_markfree_threadstack(p, uth->uu_thread,
2725                                         get_task_map(p->task), uth->uu_workq_stackaddr);
2726                 }
2727
2728                 /*
2729                  * When we remove the voucher from the thread, we may lose our importance
2730                  * causing us to get preempted, so we do this after putting the thread on
2731                  * the idle list.  Then, when we get our importance back we'll be able to
2732                  * use this thread from e.g. the kevent call out to deliver a boosting
2733                  * message.
2734                  */
2735                 __assert_only kern_return_t kr;
2736                 kr = thread_set_voucher_name(MACH_PORT_NULL);
2737                 assert(kr == KERN_SUCCESS);
2738
2739                 workq_lock_spin(wq);
2740                 uth->uu_workq_flags &= ~UT_WORKQ_IDLE_CLEANUP;
2741         }
2742
2743         if (uth->uu_workq_flags & UT_WORKQ_RUNNING) {
2744                 /*
2745                  * While we'd dropped the lock to unset our voucher, someone came
2746                  * around and made us runnable.  But because we weren't waiting on the
2747                  * event their thread_wakeup() was ineffectual.  To correct for that,
2748                  * we just run the continuation ourselves.
2749                  */
2750                 WQ_TRACE_WQ(TRACE_wq_thread_logical_run | DBG_FUNC_END, wq, 0, 0, 0, 0);
2751                 workq_select_threadreq_or_park_and_unlock(p, wq, uth);
2752                 __builtin_unreachable();
2753         }
2754
2755         if (uth->uu_workq_flags & UT_WORKQ_DYING) {
2756                 workq_unpark_for_death_and_unlock(p, wq, uth,
2757                                 WORKQ_UNPARK_FOR_DEATH_WAS_IDLE);
2758                 __builtin_unreachable();
2759         }
2760
2761         thread_set_pending_block_hint(uth->uu_thread, kThreadWaitParkedWorkQueue);
2762         assert_wait(workq_parked_wait_event(uth), THREAD_INTERRUPTIBLE);
2763         workq_unlock(wq);
2764         WQ_TRACE_WQ(TRACE_wq_thread_logical_run | DBG_FUNC_END, wq, 0, 0, 0, 0);
2765         thread_block(workq_unpark_continue);
2766         __builtin_unreachable();
2767 }
2768
2769 static inline bool
2770 workq_may_start_event_mgr_thread(struct workqueue *wq, struct uthread *uth)
2771 {
2772         /*
2773          * There's an event manager request and either:
2774          * - no event manager currently running
2775          * - we are re-using the event manager
2776          */
2777         return wq->wq_thscheduled_count[_wq_bucket(WORKQ_THREAD_QOS_MANAGER)] == 0 ||
2778                         (uth && uth->uu_workq_pri.qos_bucket == WORKQ_THREAD_QOS_MANAGER);
2779 }
2780
2781 static uint32_t
2782 workq_constrained_allowance(struct workqueue *wq, thread_qos_t at_qos,
2783                 struct uthread *uth, bool may_start_timer)
2784 {
2785         assert(at_qos != WORKQ_THREAD_QOS_MANAGER);
2786         uint32_t count = 0;
2787
2788         uint32_t max_count = wq->wq_constrained_threads_scheduled;
2789         if (uth && (uth->uu_workq_flags & UT_WORKQ_OVERCOMMIT) == 0) {
2790                 /*
2791                  * don't count the current thread as scheduled
2792                  */
2793                 assert(max_count > 0);
2794                 max_count--;
2795         }
2796         if (max_count >= wq_max_constrained_threads) {
2797                 WQ_TRACE_WQ(TRACE_wq_constrained_admission | DBG_FUNC_NONE, wq, 1,
2798                                 wq->wq_constrained_threads_scheduled,
2799                                 wq_max_constrained_threads, 0);
2800                 /*
2801                  * we need 1 or more constrained threads to return to the kernel before
2802                  * we can dispatch additional work
2803                  */
2804                 return 0;
2805         }
2806         max_count -= wq_max_constrained_threads;
2807
2808         /*
2809          * Compute a metric for many how many threads are active.  We find the
2810          * highest priority request outstanding and then add up the number of
2811          * active threads in that and all higher-priority buckets.  We'll also add
2812          * any "busy" threads which are not active but blocked recently enough that
2813          * we can't be sure they've gone idle yet.  We'll then compare this metric
2814          * to our max concurrency to decide whether to add a new thread.
2815          */
2816
2817         uint32_t busycount, thactive_count;
2818
2819         thactive_count = _wq_thactive_aggregate_downto_qos(wq, _wq_thactive(wq),
2820                         at_qos, &busycount, NULL);
2821
2822         if (uth && uth->uu_workq_pri.qos_bucket != WORKQ_THREAD_QOS_MANAGER &&
2823                         at_qos <= uth->uu_workq_pri.qos_bucket) {
2824                 /*
2825                  * Don't count this thread as currently active, but only if it's not
2826                  * a manager thread, as _wq_thactive_aggregate_downto_qos ignores active
2827                  * managers.
2828                  */
2829                 assert(thactive_count > 0);
2830                 thactive_count--;
2831         }
2832
2833         count = wq_max_parallelism[_wq_bucket(at_qos)];
2834         if (count > thactive_count + busycount) {
2835                 count -= thactive_count + busycount;
2836                 WQ_TRACE_WQ(TRACE_wq_constrained_admission | DBG_FUNC_NONE, wq, 2,
2837                                 thactive_count, busycount, 0);
2838                 return MIN(count, max_count);
2839         } else {
2840                 WQ_TRACE_WQ(TRACE_wq_constrained_admission | DBG_FUNC_NONE, wq, 3,
2841                                 thactive_count, busycount, 0);
2842         }
2843
2844         if (busycount && may_start_timer) {
2845                 /*
2846                  * If this is called from the add timer, we won't have another timer
2847                  * fire when the thread exits the "busy" state, so rearm the timer.
2848                  */
2849                 workq_schedule_delayed_thread_creation(wq, 0);
2850         }
2851
2852         return 0;
2853 }
2854
2855 static bool
2856 workq_threadreq_admissible(struct workqueue *wq, struct uthread *uth,
2857                 workq_threadreq_t req)
2858 {
2859         if (req->tr_qos == WORKQ_THREAD_QOS_MANAGER) {
2860                 return workq_may_start_event_mgr_thread(wq, uth);
2861         }
2862         if ((req->tr_flags & TR_FLAG_OVERCOMMIT) == 0) {
2863                 return workq_constrained_allowance(wq, req->tr_qos, uth, true);
2864         }
2865         return true;
2866 }
2867
2868 static workq_threadreq_t
2869 workq_threadreq_select_for_creator(struct workqueue *wq)
2870 {
2871         workq_threadreq_t req_qos, req_pri, req_tmp;
2872         thread_qos_t qos = THREAD_QOS_UNSPECIFIED;
2873         uint8_t pri = 0;
2874
2875         req_tmp = wq->wq_event_manager_threadreq;
2876         if (req_tmp && workq_may_start_event_mgr_thread(wq, NULL)) {
2877                 return req_tmp;
2878         }
2879
2880         /*
2881          * Compute the best priority request, and ignore the turnstile for now
2882          */
2883
2884         req_pri = priority_queue_max(&wq->wq_special_queue,
2885                         struct workq_threadreq_s, tr_entry);
2886         if (req_pri) {
2887                 pri = priority_queue_entry_key(&wq->wq_special_queue, &req_pri->tr_entry);
2888         }
2889
2890         /*
2891          * Compute the best QoS Request, and check whether it beats the "pri" one
2892          */
2893
2894         req_qos = priority_queue_max(&wq->wq_overcommit_queue,
2895                         struct workq_threadreq_s, tr_entry);
2896         if (req_qos) {
2897                 qos = req_qos->tr_qos;
2898         }
2899
2900         req_tmp = priority_queue_max(&wq->wq_constrained_queue,
2901                         struct workq_threadreq_s, tr_entry);
2902
2903         if (req_tmp && qos < req_tmp->tr_qos) {
2904                 if (pri && pri >= thread_workq_pri_for_qos(req_tmp->tr_qos)) {
2905                         return req_pri;
2906                 }
2907
2908                 if (workq_constrained_allowance(wq, req_tmp->tr_qos, NULL, true)) {
2909                         /*
2910                          * If the constrained thread request is the best one and passes
2911                          * the admission check, pick it.
2912                          */
2913                         return req_tmp;
2914                 }
2915         }
2916
2917         if (pri && (!qos || pri >= thread_workq_pri_for_qos(qos))) {
2918                 return req_pri;
2919         }
2920
2921         if (req_qos) {
2922                 return req_qos;
2923         }
2924
2925         /*
2926          * If we had no eligible request but we have a turnstile push,
2927          * it must be a non overcommit thread request that failed
2928          * the admission check.
2929          *
2930          * Just fake a BG thread request so that if the push stops the creator
2931          * priority just drops to 4.
2932          */
2933         if (turnstile_workq_proprietor_of_max_turnstile(wq->wq_turnstile, NULL)) {
2934                 static struct workq_threadreq_s workq_sync_push_fake_req = {
2935                         .tr_qos = THREAD_QOS_BACKGROUND,
2936                 };
2937
2938                 return &workq_sync_push_fake_req;
2939         }
2940
2941         return NULL;
2942 }
2943
2944 static workq_threadreq_t
2945 workq_threadreq_select(struct workqueue *wq, struct uthread *uth)
2946 {
2947         workq_threadreq_t req_qos, req_pri, req_tmp;
2948         uintptr_t proprietor;
2949         thread_qos_t qos = THREAD_QOS_UNSPECIFIED;
2950         uint8_t pri = 0;
2951
2952         if (uth == wq->wq_creator) uth = NULL;
2953
2954         req_tmp = wq->wq_event_manager_threadreq;
2955         if (req_tmp && workq_may_start_event_mgr_thread(wq, uth)) {
2956                 return req_tmp;
2957         }
2958
2959         /*
2960          * Compute the best priority request (special or turnstile)
2961          */
2962
2963         pri = turnstile_workq_proprietor_of_max_turnstile(wq->wq_turnstile,
2964                         &proprietor);
2965         if (pri) {
2966                 struct kqworkloop *kqwl = (struct kqworkloop *)proprietor;
2967                 req_pri = &kqwl->kqwl_request.kqr_req;
2968                 if (req_pri->tr_state != TR_STATE_QUEUED) {
2969                         panic("Invalid thread request (%p) state %d",
2970                                         req_pri, req_pri->tr_state);
2971                 }
2972         } else {
2973                 req_pri = NULL;
2974         }
2975
2976         req_tmp = priority_queue_max(&wq->wq_special_queue,
2977                         struct workq_threadreq_s, tr_entry);
2978         if (req_tmp && pri < priority_queue_entry_key(&wq->wq_special_queue,
2979                         &req_tmp->tr_entry)) {
2980                 req_pri = req_tmp;
2981                 pri = priority_queue_entry_key(&wq->wq_special_queue, &req_tmp->tr_entry);
2982         }
2983
2984         /*
2985          * Compute the best QoS Request, and check whether it beats the "pri" one
2986          */
2987
2988         req_qos = priority_queue_max(&wq->wq_overcommit_queue,
2989                         struct workq_threadreq_s, tr_entry);
2990         if (req_qos) {
2991                 qos = req_qos->tr_qos;
2992         }
2993
2994         req_tmp = priority_queue_max(&wq->wq_constrained_queue,
2995                         struct workq_threadreq_s, tr_entry);
2996
2997         if (req_tmp && qos < req_tmp->tr_qos) {
2998                 if (pri && pri >= thread_workq_pri_for_qos(req_tmp->tr_qos)) {
2999                         return req_pri;
3000                 }
3001
3002                 if (workq_constrained_allowance(wq, req_tmp->tr_qos, uth, true)) {
3003                         /*
3004                          * If the constrained thread request is the best one and passes
3005                          * the admission check, pick it.
3006                          */
3007                         return req_tmp;
3008                 }
3009         }
3010
3011         if (req_pri && (!qos || pri >= thread_workq_pri_for_qos(qos))) {
3012                 return req_pri;
3013         }
3014
3015         return req_qos;
3016 }
3017
3018 /*
3019  * The creator is an anonymous thread that is counted as scheduled,
3020  * but otherwise without its scheduler callback set or tracked as active
3021  * that is used to make other threads.
3022  *
3023  * When more requests are added or an existing one is hurried along,
3024  * a creator is elected and setup, or the existing one overridden accordingly.
3025  *
3026  * While this creator is in flight, because no request has been dequeued,
3027  * already running threads have a chance at stealing thread requests avoiding
3028  * useless context switches, and the creator once scheduled may not find any
3029  * work to do and will then just park again.
3030  *
3031  * The creator serves the dual purpose of informing the scheduler of work that
3032  * hasn't be materialized as threads yet, and also as a natural pacing mechanism
3033  * for thread creation.
3034  *
3035  * By being anonymous (and not bound to anything) it means that thread requests
3036  * can be stolen from this creator by threads already on core yielding more
3037  * efficient scheduling and reduced context switches.
3038  */
3039 static void
3040 workq_schedule_creator(proc_t p, struct workqueue *wq, int flags)
3041 {
3042         workq_threadreq_t req;
3043         struct uthread *uth;
3044
3045         workq_lock_held(wq);
3046         assert(p || (flags & WORKQ_THREADREQ_CAN_CREATE_THREADS) == 0);
3047
3048 again:
3049         uth = wq->wq_creator;
3050
3051         if (!wq->wq_reqcount) {
3052                 if (uth == NULL) {
3053                         workq_turnstile_update_inheritor(wq, TURNSTILE_INHERITOR_NULL, 0);
3054                 }
3055                 return;
3056         }
3057
3058         req = workq_threadreq_select_for_creator(wq);
3059         if (req == NULL) {
3060                 if (flags & WORKQ_THREADREQ_CREATOR_SYNC_UPDATE) {
3061                         assert((flags & WORKQ_THREADREQ_CREATOR_TRANSFER) == 0);
3062                         /*
3063                          * turnstile propagation code is reaching out to us,
3064                          * and we still don't want to do anything, do not recurse.
3065                          */
3066                 } else {
3067                         workq_turnstile_update_inheritor(wq, wq, TURNSTILE_INHERITOR_WORKQ);
3068                 }
3069                 return;
3070         }
3071
3072         if (uth) {
3073                 /*
3074                  * We need to maybe override the creator we already have
3075                  */
3076                 if (workq_thread_needs_priority_change(req, uth)) {
3077                         WQ_TRACE_WQ(TRACE_wq_creator_select | DBG_FUNC_NONE,
3078                                         wq, 1, thread_tid(uth->uu_thread), req->tr_qos, 0);
3079                         workq_thread_reset_pri(wq, uth, req);
3080                 }
3081         } else if (wq->wq_thidlecount) {
3082                 /*
3083                  * We need to unpark a creator thread
3084                  */
3085                 wq->wq_creator = uth = workq_pop_idle_thread(wq);
3086                 if (workq_thread_needs_priority_change(req, uth)) {
3087                         workq_thread_reset_pri(wq, uth, req);
3088                 }
3089                 workq_turnstile_update_inheritor(wq, uth->uu_thread,
3090                                 TURNSTILE_INHERITOR_THREAD);
3091                 WQ_TRACE_WQ(TRACE_wq_creator_select | DBG_FUNC_NONE,
3092                                 wq, 2, thread_tid(uth->uu_thread), req->tr_qos, 0);
3093                 uth->uu_save.uus_workq_park_data.fulfilled_snapshot = wq->wq_fulfilled;
3094                 uth->uu_save.uus_workq_park_data.yields = 0;
3095                 workq_thread_wakeup(uth);
3096         } else {
3097                 /*
3098                  * We need to allocate a thread...
3099                  */
3100                 if (__improbable(wq->wq_nthreads >= wq_max_threads)) {
3101                         /* out of threads, just go away */
3102                 } else if (flags & WORKQ_THREADREQ_SET_AST_ON_FAILURE) {
3103                         act_set_astkevent(current_thread(), AST_KEVENT_REDRIVE_THREADREQ);
3104                 } else if (!(flags & WORKQ_THREADREQ_CAN_CREATE_THREADS)) {
3105                         /* This can drop the workqueue lock, and take it again */
3106                         workq_schedule_immediate_thread_creation(wq);
3107                 } else if (workq_add_new_idle_thread(p, wq)) {
3108                         goto again;
3109                 } else {
3110                         workq_schedule_delayed_thread_creation(wq, 0);
3111                 }
3112
3113                 if (flags & WORKQ_THREADREQ_CREATOR_TRANSFER) {
3114                         /*
3115                          * workq_schedule_creator() failed at creating a thread,
3116                          * and the responsibility of redriving is now with a thread-call.
3117                          *
3118                          * We still need to tell the turnstile the previous creator is gone.
3119                          */
3120                         workq_turnstile_update_inheritor(wq, NULL, 0);
3121                 }
3122         }
3123 }
3124
3125 /**
3126  * Runs a thread request on a thread
3127  *
3128  * - if thread is THREAD_NULL, will find a thread and run the request there.
3129  *   Otherwise, the thread must be the current thread.
3130  *
3131  * - if req is NULL, will find the highest priority request and run that.  If
3132  *   it is not NULL, it must be a threadreq object in state NEW.  If it can not
3133  *   be run immediately, it will be enqueued and moved to state QUEUED.
3134  *
3135  *   Either way, the thread request object serviced will be moved to state
3136  *   BINDING and attached to the uthread.
3137  *
3138  *   Should be called with the workqueue lock held.  Will drop it.
3139  */
3140 __attribute__((noreturn, noinline))
3141 static void
3142 workq_select_threadreq_or_park_and_unlock(proc_t p, struct workqueue *wq,
3143                 struct uthread *uth)
3144 {
3145         uint32_t setup_flags = 0;
3146         workq_threadreq_t req;
3147
3148         if (uth->uu_workq_flags & UT_WORKQ_EARLY_BOUND) {
3149                 if (uth->uu_workq_flags & UT_WORKQ_NEW) {
3150                         setup_flags |= WQ_SETUP_FIRST_USE;
3151                 }
3152                 uth->uu_workq_flags &= ~(UT_WORKQ_NEW | UT_WORKQ_EARLY_BOUND);
3153                 /*
3154                  * This pointer is possibly freed and only used for tracing purposes.
3155                  */
3156                 req = uth->uu_save.uus_workq_park_data.thread_request;
3157                 workq_unlock(wq);
3158                 WQ_TRACE_WQ(TRACE_wq_thread_logical_run | DBG_FUNC_START, wq,
3159                                 VM_KERNEL_ADDRHIDE(req), 0, 0, 0);
3160                 goto run;
3161         } else if (_wq_exiting(wq)) {
3162                 WQ_TRACE_WQ(TRACE_wq_select_threadreq | DBG_FUNC_NONE, wq, 0, 0, 0, 0);
3163         } else if (wq->wq_reqcount == 0) {
3164                 WQ_TRACE_WQ(TRACE_wq_select_threadreq | DBG_FUNC_NONE, wq, 1, 0, 0, 0);
3165         } else if ((req = workq_threadreq_select(wq, uth)) == NULL) {
3166                 WQ_TRACE_WQ(TRACE_wq_select_threadreq | DBG_FUNC_NONE, wq, 2, 0, 0, 0);
3167         } else {
3168                 WQ_TRACE_WQ(TRACE_wq_thread_logical_run | DBG_FUNC_START, wq,
3169                                 workq_trace_req_id(req), 0, 0, 0);
3170                 if (uth->uu_workq_flags & UT_WORKQ_NEW) {
3171                         uth->uu_workq_flags ^= UT_WORKQ_NEW;
3172                         setup_flags |= WQ_SETUP_FIRST_USE;
3173                 }
3174                 workq_thread_reset_cpupercent(req, uth);
3175                 workq_threadreq_bind_and_unlock(p, wq, req, uth);
3176 run:
3177                 workq_setup_and_run(p, uth, setup_flags);
3178                 __builtin_unreachable();
3179         }
3180
3181         workq_park_and_unlock(p, wq, uth);
3182         __builtin_unreachable();
3183 }
3184
3185 static bool
3186 workq_creator_should_yield(struct workqueue *wq, struct uthread *uth)
3187 {
3188         thread_qos_t qos = workq_pri_override(uth->uu_workq_pri);
3189
3190         if (qos >= THREAD_QOS_USER_INTERACTIVE) {
3191                 return false;
3192         }
3193
3194         uint32_t snapshot = uth->uu_save.uus_workq_park_data.fulfilled_snapshot;
3195         if (wq->wq_fulfilled == snapshot) {
3196                 return false;
3197         }
3198
3199         uint32_t cnt = 0, conc = wq_max_parallelism[_wq_bucket(qos)];
3200         if (wq->wq_fulfilled - snapshot > conc) {
3201                 /* we fulfilled more than NCPU requests since being dispatched */
3202                 WQ_TRACE_WQ(TRACE_wq_creator_yield, wq, 1,
3203                                 wq->wq_fulfilled, snapshot, 0);
3204                 return true;
3205         }
3206
3207         for (int i = _wq_bucket(qos); i < WORKQ_NUM_QOS_BUCKETS; i++) {
3208                 cnt += wq->wq_thscheduled_count[i];
3209         }
3210         if (conc <= cnt) {
3211                 /* We fulfilled requests and have more than NCPU scheduled threads */
3212                 WQ_TRACE_WQ(TRACE_wq_creator_yield, wq, 2,
3213                                 wq->wq_fulfilled, snapshot, 0);
3214                 return true;
3215         }
3216
3217         return false;
3218 }
3219
3220 /**
3221  * parked thread wakes up
3222  */
3223 __attribute__((noreturn, noinline))
3224 static void
3225 workq_unpark_continue(void *parameter __unused, wait_result_t wr __unused)
3226 {
3227         struct uthread *uth = current_uthread();
3228         proc_t p = current_proc();
3229         struct workqueue *wq = proc_get_wqptr_fast(p);
3230
3231         workq_lock_spin(wq);
3232
3233         if (wq->wq_creator == uth && workq_creator_should_yield(wq, uth)) {
3234                 /*
3235                  * If the number of threads we have out are able to keep up with the
3236                  * demand, then we should avoid sending this creator thread to
3237                  * userspace.
3238                  */
3239                 uth->uu_save.uus_workq_park_data.fulfilled_snapshot = wq->wq_fulfilled;
3240                 uth->uu_save.uus_workq_park_data.yields++;
3241                 workq_unlock(wq);
3242                 thread_yield_with_continuation(workq_unpark_continue, NULL);
3243                 __builtin_unreachable();
3244         }
3245
3246         if (__probable(uth->uu_workq_flags & UT_WORKQ_RUNNING)) {
3247                 workq_select_threadreq_or_park_and_unlock(p, wq, uth);
3248                 __builtin_unreachable();
3249         }
3250
3251         if (__probable(wr == THREAD_AWAKENED)) {
3252                 /*
3253                  * We were set running, but for the purposes of dying.
3254                  */
3255                 assert(uth->uu_workq_flags & UT_WORKQ_DYING);
3256                 assert((uth->uu_workq_flags & UT_WORKQ_NEW) == 0);
3257         } else {
3258                 /*
3259                  * workaround for <rdar://problem/38647347>,
3260                  * in case we do hit userspace, make sure calling
3261                  * workq_thread_terminate() does the right thing here,
3262                  * and if we never call it, that workq_exit() will too because it sees
3263                  * this thread on the runlist.
3264                  */
3265                 assert(wr == THREAD_INTERRUPTED);
3266                 wq->wq_thdying_count++;
3267                 uth->uu_workq_flags |= UT_WORKQ_DYING;
3268         }
3269
3270         workq_unpark_for_death_and_unlock(p, wq, uth,
3271                         WORKQ_UNPARK_FOR_DEATH_WAS_IDLE);
3272         __builtin_unreachable();
3273 }
3274
3275 __attribute__((noreturn, noinline))
3276 static void
3277 workq_setup_and_run(proc_t p, struct uthread *uth, int setup_flags)
3278 {
3279         thread_t th = uth->uu_thread;
3280         vm_map_t vmap = get_task_map(p->task);
3281
3282         if (setup_flags & WQ_SETUP_CLEAR_VOUCHER) {
3283                 /*
3284                  * For preemption reasons, we want to reset the voucher as late as
3285                  * possible, so we do it in two places:
3286                  *   - Just before parking (i.e. in workq_park_and_unlock())
3287                  *   - Prior to doing the setup for the next workitem (i.e. here)
3288                  *
3289                  * Those two places are sufficient to ensure we always reset it before
3290                  * it goes back out to user space, but be careful to not break that
3291                  * guarantee.
3292                  */
3293                 __assert_only kern_return_t kr;
3294                 kr = thread_set_voucher_name(MACH_PORT_NULL);
3295                 assert(kr == KERN_SUCCESS);
3296         }
3297
3298         uint32_t upcall_flags = uth->uu_save.uus_workq_park_data.upcall_flags;
3299         if (!(setup_flags & WQ_SETUP_FIRST_USE)) {
3300                 upcall_flags |= WQ_FLAG_THREAD_REUSE;
3301         }
3302
3303         if (uth->uu_workq_flags & UT_WORKQ_OUTSIDE_QOS) {
3304                 /*
3305                  * For threads that have an outside-of-QoS thread priority, indicate
3306                  * to userspace that setting QoS should only affect the TSD and not
3307                  * change QOS in the kernel.
3308                  */
3309                 upcall_flags |= WQ_FLAG_THREAD_OUTSIDEQOS;
3310         } else {
3311                 /*
3312                  * Put the QoS class value into the lower bits of the reuse_thread
3313                  * register, this is where the thread priority used to be stored
3314                  * anyway.
3315                  */
3316                 upcall_flags |= uth->uu_save.uus_workq_park_data.qos |
3317                                 WQ_FLAG_THREAD_PRIO_QOS;
3318         }
3319
3320         if (uth->uu_workq_thport == MACH_PORT_NULL) {
3321                 /* convert_thread_to_port() consumes a reference */
3322                 thread_reference(th);
3323                 ipc_port_t port = convert_thread_to_port(th);
3324                 uth->uu_workq_thport = ipc_port_copyout_send(port, get_task_ipcspace(p->task));
3325         }
3326
3327         /*
3328          * Call out to pthread, this sets up the thread, pulls in kevent structs
3329          * onto the stack, sets up the thread state and then returns to userspace.
3330          */
3331         WQ_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_START,
3332                         proc_get_wqptr_fast(p), 0, 0, 0, 0);
3333         thread_sched_call(th, workq_sched_callback);
3334         pthread_functions->workq_setup_thread(p, th, vmap, uth->uu_workq_stackaddr,
3335                         uth->uu_workq_thport, 0, setup_flags, upcall_flags);
3336
3337         __builtin_unreachable();
3338 }
3339
3340 #pragma mark misc
3341
3342 int
3343 fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo)
3344 {
3345         struct workqueue *wq = proc_get_wqptr(p);
3346         int error = 0;
3347         int     activecount;
3348
3349         if (wq == NULL) {
3350                 return EINVAL;
3351         }
3352
3353         /*
3354          * This is sometimes called from interrupt context by the kperf sampler.
3355          * In that case, it's not safe to spin trying to take the lock since we
3356          * might already hold it.  So, we just try-lock it and error out if it's
3357          * already held.  Since this is just a debugging aid, and all our callers
3358          * are able to handle an error, that's fine.
3359          */
3360         bool locked = workq_lock_try(wq);
3361         if (!locked) {
3362                 return EBUSY;
3363         }
3364
3365         wq_thactive_t act = _wq_thactive(wq);
3366         activecount = _wq_thactive_aggregate_downto_qos(wq, act,
3367                         WORKQ_THREAD_QOS_MIN, NULL, NULL);
3368         if (act & _wq_thactive_offset_for_qos(WORKQ_THREAD_QOS_MANAGER)) {
3369                 activecount++;
3370         }
3371         pwqinfo->pwq_nthreads = wq->wq_nthreads;
3372         pwqinfo->pwq_runthreads = activecount;
3373         pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount;
3374         pwqinfo->pwq_state = 0;
3375
3376         if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
3377                 pwqinfo->pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
3378         }
3379
3380         if (wq->wq_nthreads >= wq_max_threads) {
3381                 pwqinfo->pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
3382         }
3383
3384         workq_unlock(wq);
3385         return error;
3386 }
3387
3388 boolean_t
3389 workqueue_get_pwq_exceeded(void *v, boolean_t *exceeded_total,
3390                 boolean_t *exceeded_constrained)
3391 {
3392         proc_t p = v;
3393         struct proc_workqueueinfo pwqinfo;
3394         int err;
3395
3396         assert(p != NULL);
3397         assert(exceeded_total != NULL);
3398         assert(exceeded_constrained != NULL);
3399
3400         err = fill_procworkqueue(p, &pwqinfo);
3401         if (err) {
3402                 return FALSE;
3403         }
3404         if (!(pwqinfo.pwq_state & WQ_FLAGS_AVAILABLE)) {
3405                 return FALSE;
3406         }
3407
3408         *exceeded_total = (pwqinfo.pwq_state & WQ_EXCEEDED_TOTAL_THREAD_LIMIT);
3409         *exceeded_constrained = (pwqinfo.pwq_state & WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT);
3410
3411         return TRUE;
3412 }
3413
3414 uint32_t
3415 workqueue_get_pwq_state_kdp(void * v)
3416 {
3417         static_assert((WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT << 17) ==
3418                         kTaskWqExceededConstrainedThreadLimit);
3419         static_assert((WQ_EXCEEDED_TOTAL_THREAD_LIMIT << 17) ==
3420                         kTaskWqExceededTotalThreadLimit);
3421         static_assert((WQ_FLAGS_AVAILABLE << 17) == kTaskWqFlagsAvailable);
3422         static_assert((WQ_FLAGS_AVAILABLE | WQ_EXCEEDED_TOTAL_THREAD_LIMIT |
3423                                 WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT) == 0x7);
3424
3425         if (v == NULL) {
3426                 return 0;
3427         }
3428
3429         proc_t p = v;
3430         struct workqueue *wq = proc_get_wqptr(p);
3431
3432         if (wq == NULL || workq_lock_spin_is_acquired_kdp(wq)) {
3433                 return 0;
3434         }
3435
3436         uint32_t pwq_state = WQ_FLAGS_AVAILABLE;
3437
3438         if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
3439                 pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
3440         }
3441
3442         if (wq->wq_nthreads >= wq_max_threads) {
3443                 pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
3444         }
3445
3446         return pwq_state;
3447 }
3448
3449 void
3450 workq_init(void)
3451 {
3452         workq_lck_grp_attr = lck_grp_attr_alloc_init();
3453         workq_lck_attr = lck_attr_alloc_init();
3454         workq_lck_grp = lck_grp_alloc_init("workq", workq_lck_grp_attr);
3455
3456         workq_zone_workqueue = zinit(sizeof(struct workqueue),
3457                         1024 * sizeof(struct workqueue), 8192, "workq.wq");
3458         workq_zone_threadreq = zinit(sizeof(struct workq_threadreq_s),
3459                         1024 * sizeof(struct workq_threadreq_s), 8192, "workq.threadreq");
3460
3461         clock_interval_to_absolutetime_interval(wq_stalled_window.usecs,
3462                         NSEC_PER_USEC, &wq_stalled_window.abstime);
3463         clock_interval_to_absolutetime_interval(wq_reduce_pool_window.usecs,
3464                         NSEC_PER_USEC, &wq_reduce_pool_window.abstime);
3465         clock_interval_to_absolutetime_interval(wq_max_timer_interval.usecs,
3466                         NSEC_PER_USEC, &wq_max_timer_interval.abstime);
3467 }