osfmk/kern/thread_policy.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <mach/mach_types.h>
  30 #include <mach/thread_act_server.h>
  31
  32 #include <kern/kern_types.h>
  33 #include <kern/processor.h>
  34 #include <kern/thread.h>
  35 #include <kern/affinity.h>
  36 #include <mach/task_policy.h>
  37 #include <kern/sfi.h>
  38 #include <kern/policy_internal.h>
  39 #include <sys/errno.h>
  40 #include <sys/ulock.h>
  41
  42 #include <mach/machine/sdt.h>
  43
  44 #ifdef MACH_BSD
  45 extern int      proc_selfpid(void);
  46 extern char *   proc_name_address(void *p);
  47 extern void     rethrottle_thread(void * uthread);
  48 #endif /* MACH_BSD */
  49
  50 #define QOS_EXTRACT(q)        ((q) & 0xff)
  51
  52 uint32_t qos_override_mode;
  53 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
  54 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
  55 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
  56 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
  57
  58 extern zone_t thread_qos_override_zone;
  59
  60 static void
  61 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
  62
  63 /*
  64  * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
  65  * to threads that don't have a QoS class set.
  66  */
  67 const qos_policy_params_t thread_qos_policy_params = {
  68         /*
  69          * This table defines the starting base priority of the thread,
  70          * which will be modified by the thread importance and the task max priority
  71          * before being applied.
  72          */
  73         .qos_pri[THREAD_QOS_UNSPECIFIED]                = 0, /* not consulted */
  74         .qos_pri[THREAD_QOS_USER_INTERACTIVE]           = BASEPRI_BACKGROUND, /* i.e. 46 */
  75         .qos_pri[THREAD_QOS_USER_INITIATED]             = BASEPRI_USER_INITIATED,
  76         .qos_pri[THREAD_QOS_LEGACY]                     = BASEPRI_DEFAULT,
  77         .qos_pri[THREAD_QOS_UTILITY]                    = BASEPRI_UTILITY,
  78         .qos_pri[THREAD_QOS_BACKGROUND]                 = MAXPRI_THROTTLE,
  79         .qos_pri[THREAD_QOS_MAINTENANCE]                = MAXPRI_THROTTLE,
  80
  81         /*
  82          * This table defines the highest IO priority that a thread marked with this
  83          * QoS class can have.
  84          */
  85         .qos_iotier[THREAD_QOS_UNSPECIFIED]             = THROTTLE_LEVEL_TIER0,
  86         .qos_iotier[THREAD_QOS_USER_INTERACTIVE]        = THROTTLE_LEVEL_TIER0,
  87         .qos_iotier[THREAD_QOS_USER_INITIATED]          = THROTTLE_LEVEL_TIER0,
  88         .qos_iotier[THREAD_QOS_LEGACY]                  = THROTTLE_LEVEL_TIER0,
  89         .qos_iotier[THREAD_QOS_UTILITY]                 = THROTTLE_LEVEL_TIER1,
  90         .qos_iotier[THREAD_QOS_BACKGROUND]              = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
  91         .qos_iotier[THREAD_QOS_MAINTENANCE]             = THROTTLE_LEVEL_TIER3,
  92
  93         /*
  94          * This table defines the highest QoS level that
  95          * a thread marked with this QoS class can have.
  96          */
  97
  98         .qos_through_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
  99         .qos_through_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
 100         .qos_through_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
 101         .qos_through_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
 102         .qos_through_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
 103         .qos_through_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
 104         .qos_through_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
 105
 106         .qos_latency_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
 107         .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(LATENCY_QOS_TIER_0),
 108         .qos_latency_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(LATENCY_QOS_TIER_1),
 109         .qos_latency_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(LATENCY_QOS_TIER_1),
 110         .qos_latency_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 111         .qos_latency_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 112         .qos_latency_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 113 };
 114
 115 static void
 116 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
 117
 118 static int
 119 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
 120
 121 static void
 122 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
 123
 124 static void
 125 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 126
 127 static void
 128 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 129
 130 static void
 131 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 132
 133 static int
 134 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
 135
 136 static int
 137 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
 138
 139 static void
 140 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
 141
 142 static void
 143 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
 144
 145 void
 146 thread_policy_init(void)
 147 {
 148         if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
 149                 printf("QOS override mode: 0x%08x\n", qos_override_mode);
 150         } else {
 151                 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
 152         }
 153 }
 154
 155 boolean_t
 156 thread_has_qos_policy(thread_t thread)
 157 {
 158         return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
 159 }
 160
 161
 162 static void
 163 thread_remove_qos_policy_locked(thread_t thread,
 164     task_pend_token_t pend_token)
 165 {
 166         __unused int prev_qos = thread->requested_policy.thrp_qos;
 167
 168         DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
 169
 170         proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
 171             THREAD_QOS_UNSPECIFIED, 0, pend_token);
 172 }
 173
 174 kern_return_t
 175 thread_remove_qos_policy(thread_t thread)
 176 {
 177         struct task_pend_token pend_token = {};
 178
 179         thread_mtx_lock(thread);
 180         if (!thread->active) {
 181                 thread_mtx_unlock(thread);
 182                 return KERN_TERMINATED;
 183         }
 184
 185         thread_remove_qos_policy_locked(thread, &pend_token);
 186
 187         thread_mtx_unlock(thread);
 188
 189         thread_policy_update_complete_unlocked(thread, &pend_token);
 190
 191         return KERN_SUCCESS;
 192 }
 193
 194
 195 boolean_t
 196 thread_is_static_param(thread_t thread)
 197 {
 198         if (thread->static_param) {
 199                 DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
 200                 return TRUE;
 201         }
 202         return FALSE;
 203 }
 204
 205 /*
 206  * Relative priorities can range between 0REL and -15REL. These
 207  * map to QoS-specific ranges, to create non-overlapping priority
 208  * ranges.
 209  */
 210 static int
 211 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
 212 {
 213         int next_lower_qos;
 214
 215         /* Fast path, since no validation or scaling is needed */
 216         if (qos_relprio == 0) {
 217                 return 0;
 218         }
 219
 220         switch (qos) {
 221         case THREAD_QOS_USER_INTERACTIVE:
 222                 next_lower_qos = THREAD_QOS_USER_INITIATED;
 223                 break;
 224         case THREAD_QOS_USER_INITIATED:
 225                 next_lower_qos = THREAD_QOS_LEGACY;
 226                 break;
 227         case THREAD_QOS_LEGACY:
 228                 next_lower_qos = THREAD_QOS_UTILITY;
 229                 break;
 230         case THREAD_QOS_UTILITY:
 231                 next_lower_qos = THREAD_QOS_BACKGROUND;
 232                 break;
 233         case THREAD_QOS_MAINTENANCE:
 234         case THREAD_QOS_BACKGROUND:
 235                 next_lower_qos = 0;
 236                 break;
 237         default:
 238                 panic("Unrecognized QoS %d", qos);
 239                 return 0;
 240         }
 241
 242         int prio_range_max = thread_qos_policy_params.qos_pri[qos];
 243         int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
 244
 245         /*
 246          * We now have the valid range that the scaled relative priority can map to. Note
 247          * that the lower bound is exclusive, but the upper bound is inclusive. If the
 248          * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
 249          * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
 250          * remainder.
 251          */
 252         int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
 253
 254         return scaled_relprio;
 255 }
 256
 257 /*
 258  * flag set by -qos-policy-allow boot-arg to allow
 259  * testing thread qos policy from userspace
 260  */
 261 static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
 262
 263 kern_return_t
 264 thread_policy_set(
 265         thread_t                                thread,
 266         thread_policy_flavor_t  flavor,
 267         thread_policy_t                 policy_info,
 268         mach_msg_type_number_t  count)
 269 {
 270         thread_qos_policy_data_t req_qos;
 271         kern_return_t kr;
 272
 273         req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
 274
 275         if (thread == THREAD_NULL) {
 276                 return KERN_INVALID_ARGUMENT;
 277         }
 278
 279         if (!allow_qos_policy_set) {
 280                 if (thread_is_static_param(thread)) {
 281                         return KERN_POLICY_STATIC;
 282                 }
 283
 284                 if (flavor == THREAD_QOS_POLICY) {
 285                         return KERN_INVALID_ARGUMENT;
 286                 }
 287         }
 288
 289         /* Threads without static_param set reset their QoS when other policies are applied. */
 290         if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
 291                 /* Store the existing tier, if we fail this call it is used to reset back. */
 292                 req_qos.qos_tier = thread->requested_policy.thrp_qos;
 293                 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
 294
 295                 kr = thread_remove_qos_policy(thread);
 296                 if (kr != KERN_SUCCESS) {
 297                         return kr;
 298                 }
 299         }
 300
 301         kr = thread_policy_set_internal(thread, flavor, policy_info, count);
 302
 303         /* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */
 304         if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
 305                 if (kr != KERN_SUCCESS) {
 306                         /* Reset back to our original tier as the set failed. */
 307                         (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
 308                 }
 309         }
 310
 311         return kr;
 312 }
 313
 314 kern_return_t
 315 thread_policy_set_internal(
 316         thread_t                     thread,
 317         thread_policy_flavor_t       flavor,
 318         thread_policy_t              policy_info,
 319         mach_msg_type_number_t       count)
 320 {
 321         kern_return_t result = KERN_SUCCESS;
 322         struct task_pend_token pend_token = {};
 323
 324         thread_mtx_lock(thread);
 325         if (!thread->active) {
 326                 thread_mtx_unlock(thread);
 327
 328                 return KERN_TERMINATED;
 329         }
 330
 331         switch (flavor) {
 332         case THREAD_EXTENDED_POLICY:
 333         {
 334                 boolean_t timeshare = TRUE;
 335
 336                 if (count >= THREAD_EXTENDED_POLICY_COUNT) {
 337                         thread_extended_policy_t info;
 338
 339                         info = (thread_extended_policy_t)policy_info;
 340                         timeshare = info->timeshare;
 341                 }
 342
 343                 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
 344
 345                 spl_t s = splsched();
 346                 thread_lock(thread);
 347
 348                 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
 349
 350                 thread_unlock(thread);
 351                 splx(s);
 352
 353                 pend_token.tpt_update_thread_sfi = 1;
 354
 355                 break;
 356         }
 357
 358         case THREAD_TIME_CONSTRAINT_POLICY:
 359         {
 360                 thread_time_constraint_policy_t info;
 361
 362                 if (count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
 363                         result = KERN_INVALID_ARGUMENT;
 364                         break;
 365                 }
 366
 367                 info = (thread_time_constraint_policy_t)policy_info;
 368
 369
 370                 if (info->constraint < info->computation ||
 371                     info->computation > max_rt_quantum ||
 372                     info->computation < min_rt_quantum) {
 373                         result = KERN_INVALID_ARGUMENT;
 374                         break;
 375                 }
 376
 377                 spl_t s = splsched();
 378                 thread_lock(thread);
 379
 380                 thread->realtime.period         = info->period;
 381                 thread->realtime.computation    = info->computation;
 382                 thread->realtime.constraint     = info->constraint;
 383                 thread->realtime.preemptible    = info->preemptible;
 384
 385                 thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
 386
 387                 thread_unlock(thread);
 388                 splx(s);
 389
 390                 pend_token.tpt_update_thread_sfi = 1;
 391
 392                 break;
 393         }
 394
 395         case THREAD_PRECEDENCE_POLICY:
 396         {
 397                 thread_precedence_policy_t info;
 398
 399                 if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
 400                         result = KERN_INVALID_ARGUMENT;
 401                         break;
 402                 }
 403                 info = (thread_precedence_policy_t)policy_info;
 404
 405                 spl_t s = splsched();
 406                 thread_lock(thread);
 407
 408                 thread->importance = info->importance;
 409
 410                 thread_recompute_priority(thread);
 411
 412                 thread_unlock(thread);
 413                 splx(s);
 414
 415                 break;
 416         }
 417
 418         case THREAD_AFFINITY_POLICY:
 419         {
 420                 thread_affinity_policy_t info;
 421
 422                 if (!thread_affinity_is_supported()) {
 423                         result = KERN_NOT_SUPPORTED;
 424                         break;
 425                 }
 426                 if (count < THREAD_AFFINITY_POLICY_COUNT) {
 427                         result = KERN_INVALID_ARGUMENT;
 428                         break;
 429                 }
 430
 431                 info = (thread_affinity_policy_t) policy_info;
 432                 /*
 433                  * Unlock the thread mutex here and
 434                  * return directly after calling thread_affinity_set().
 435                  * This is necessary for correct lock ordering because
 436                  * thread_affinity_set() takes the task lock.
 437                  */
 438                 thread_mtx_unlock(thread);
 439                 return thread_affinity_set(thread, info->affinity_tag);
 440         }
 441
 442 #if !defined(XNU_TARGET_OS_OSX)
 443         case THREAD_BACKGROUND_POLICY:
 444         {
 445                 thread_background_policy_t info;
 446
 447                 if (count < THREAD_BACKGROUND_POLICY_COUNT) {
 448                         result = KERN_INVALID_ARGUMENT;
 449                         break;
 450                 }
 451
 452                 if (thread->task != current_task()) {
 453                         result = KERN_PROTECTION_FAILURE;
 454                         break;
 455                 }
 456
 457                 info = (thread_background_policy_t) policy_info;
 458
 459                 int enable;
 460
 461                 if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
 462                         enable = TASK_POLICY_ENABLE;
 463                 } else {
 464                         enable = TASK_POLICY_DISABLE;
 465                 }
 466
 467                 int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
 468
 469                 proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
 470
 471                 break;
 472         }
 473 #endif /* !defined(XNU_TARGET_OS_OSX) */
 474
 475         case THREAD_THROUGHPUT_QOS_POLICY:
 476         {
 477                 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
 478                 thread_throughput_qos_t tqos;
 479
 480                 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
 481                         result = KERN_INVALID_ARGUMENT;
 482                         break;
 483                 }
 484
 485                 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
 486                         break;
 487                 }
 488
 489                 tqos = qos_extract(info->thread_throughput_qos_tier);
 490
 491                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
 492                     TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
 493
 494                 break;
 495         }
 496
 497         case THREAD_LATENCY_QOS_POLICY:
 498         {
 499                 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
 500                 thread_latency_qos_t lqos;
 501
 502                 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
 503                         result = KERN_INVALID_ARGUMENT;
 504                         break;
 505                 }
 506
 507                 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
 508                         break;
 509                 }
 510
 511                 lqos = qos_extract(info->thread_latency_qos_tier);
 512
 513                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
 514                     TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
 515
 516                 break;
 517         }
 518
 519         case THREAD_QOS_POLICY:
 520         {
 521                 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
 522
 523                 if (count < THREAD_QOS_POLICY_COUNT) {
 524                         result = KERN_INVALID_ARGUMENT;
 525                         break;
 526                 }
 527
 528                 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
 529                         result = KERN_INVALID_ARGUMENT;
 530                         break;
 531                 }
 532
 533                 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
 534                         result = KERN_INVALID_ARGUMENT;
 535                         break;
 536                 }
 537
 538                 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
 539                         result = KERN_INVALID_ARGUMENT;
 540                         break;
 541                 }
 542
 543                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
 544                     info->qos_tier, -info->tier_importance, &pend_token);
 545
 546                 break;
 547         }
 548
 549         default:
 550                 result = KERN_INVALID_ARGUMENT;
 551                 break;
 552         }
 553
 554         thread_mtx_unlock(thread);
 555
 556         thread_policy_update_complete_unlocked(thread, &pend_token);
 557
 558         return result;
 559 }
 560
 561 /*
 562  * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
 563  * Both result in FIXED mode scheduling.
 564  */
 565 static sched_mode_t
 566 convert_policy_to_sched_mode(integer_t policy)
 567 {
 568         switch (policy) {
 569         case POLICY_TIMESHARE:
 570                 return TH_MODE_TIMESHARE;
 571         case POLICY_RR:
 572         case POLICY_FIFO:
 573                 return TH_MODE_FIXED;
 574         default:
 575                 panic("unexpected sched policy: %d", policy);
 576                 return TH_MODE_NONE;
 577         }
 578 }
 579
 580 /*
 581  * Called either with the thread mutex locked
 582  * or from the pthread kext in a 'safe place'.
 583  */
 584 static kern_return_t
 585 thread_set_mode_and_absolute_pri_internal(thread_t              thread,
 586     sched_mode_t          mode,
 587     integer_t             priority,
 588     task_pend_token_t     pend_token)
 589 {
 590         kern_return_t kr = KERN_SUCCESS;
 591
 592         spl_t s = splsched();
 593         thread_lock(thread);
 594
 595         /* This path isn't allowed to change a thread out of realtime. */
 596         if ((thread->sched_mode == TH_MODE_REALTIME) ||
 597             (thread->saved_mode == TH_MODE_REALTIME)) {
 598                 kr = KERN_FAILURE;
 599                 goto unlock;
 600         }
 601
 602         if (thread->policy_reset) {
 603                 kr = KERN_SUCCESS;
 604                 goto unlock;
 605         }
 606
 607         sched_mode_t old_mode = thread->sched_mode;
 608
 609         /*
 610          * Reverse engineer and apply the correct importance value
 611          * from the requested absolute priority value.
 612          *
 613          * TODO: Store the absolute priority value instead
 614          */
 615
 616         if (priority >= thread->max_priority) {
 617                 priority = thread->max_priority - thread->task_priority;
 618         } else if (priority >= MINPRI_KERNEL) {
 619                 priority -=  MINPRI_KERNEL;
 620         } else if (priority >= MINPRI_RESERVED) {
 621                 priority -=  MINPRI_RESERVED;
 622         } else {
 623                 priority -= BASEPRI_DEFAULT;
 624         }
 625
 626         priority += thread->task_priority;
 627
 628         if (priority > thread->max_priority) {
 629                 priority = thread->max_priority;
 630         } else if (priority < MINPRI) {
 631                 priority = MINPRI;
 632         }
 633
 634         thread->importance = priority - thread->task_priority;
 635
 636         thread_set_user_sched_mode_and_recompute_pri(thread, mode);
 637
 638         if (mode != old_mode) {
 639                 pend_token->tpt_update_thread_sfi = 1;
 640         }
 641
 642 unlock:
 643         thread_unlock(thread);
 644         splx(s);
 645
 646         return kr;
 647 }
 648
 649 void
 650 thread_freeze_base_pri(thread_t thread)
 651 {
 652         assert(thread == current_thread());
 653
 654         spl_t s = splsched();
 655         thread_lock(thread);
 656
 657         assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
 658         thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
 659
 660         thread_unlock(thread);
 661         splx(s);
 662 }
 663
 664 bool
 665 thread_unfreeze_base_pri(thread_t thread)
 666 {
 667         assert(thread == current_thread());
 668         integer_t base_pri;
 669         ast_t ast = 0;
 670
 671         spl_t s = splsched();
 672         thread_lock(thread);
 673
 674         assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
 675         thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
 676
 677         base_pri = thread->req_base_pri;
 678         if (base_pri != thread->base_pri) {
 679                 /*
 680                  * This function returns "true" if the base pri change
 681                  * is the most likely cause for the preemption.
 682                  */
 683                 sched_set_thread_base_priority(thread, base_pri);
 684                 ast = ast_peek(AST_PREEMPT);
 685         }
 686
 687         thread_unlock(thread);
 688         splx(s);
 689
 690         return ast != 0;
 691 }
 692
 693 uint8_t
 694 thread_workq_pri_for_qos(thread_qos_t qos)
 695 {
 696         assert(qos < THREAD_QOS_LAST);
 697         return (uint8_t)thread_qos_policy_params.qos_pri[qos];
 698 }
 699
 700 thread_qos_t
 701 thread_workq_qos_for_pri(int priority)
 702 {
 703         thread_qos_t qos;
 704         if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
 705                 // indicate that workq should map >UI threads to workq's
 706                 // internal notation for above-UI work.
 707                 return THREAD_QOS_UNSPECIFIED;
 708         }
 709         for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
 710                 // map a given priority up to the next nearest qos band.
 711                 if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
 712                         return qos;
 713                 }
 714         }
 715         return THREAD_QOS_MAINTENANCE;
 716 }
 717
 718 /*
 719  * private interface for pthread workqueues
 720  *
 721  * Set scheduling policy & absolute priority for thread
 722  * May be called with spinlocks held
 723  * Thread mutex lock is not held
 724  */
 725 void
 726 thread_reset_workq_qos(thread_t thread, uint32_t qos)
 727 {
 728         struct task_pend_token pend_token = {};
 729
 730         assert(qos < THREAD_QOS_LAST);
 731
 732         spl_t s = splsched();
 733         thread_lock(thread);
 734
 735         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 736             TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
 737         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 738             TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
 739             &pend_token);
 740
 741         assert(pend_token.tpt_update_sockets == 0);
 742
 743         thread_unlock(thread);
 744         splx(s);
 745
 746         thread_policy_update_complete_unlocked(thread, &pend_token);
 747 }
 748
 749 /*
 750  * private interface for pthread workqueues
 751  *
 752  * Set scheduling policy & absolute priority for thread
 753  * May be called with spinlocks held
 754  * Thread mutex lock is held
 755  */
 756 void
 757 thread_set_workq_override(thread_t thread, uint32_t qos)
 758 {
 759         struct task_pend_token pend_token = {};
 760
 761         assert(qos < THREAD_QOS_LAST);
 762
 763         spl_t s = splsched();
 764         thread_lock(thread);
 765
 766         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 767             TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
 768
 769         assert(pend_token.tpt_update_sockets == 0);
 770
 771         thread_unlock(thread);
 772         splx(s);
 773
 774         thread_policy_update_complete_unlocked(thread, &pend_token);
 775 }
 776
 777 /*
 778  * private interface for pthread workqueues
 779  *
 780  * Set scheduling policy & absolute priority for thread
 781  * May be called with spinlocks held
 782  * Thread mutex lock is not held
 783  */
 784 void
 785 thread_set_workq_pri(thread_t  thread,
 786     thread_qos_t qos,
 787     integer_t priority,
 788     integer_t policy)
 789 {
 790         struct task_pend_token pend_token = {};
 791         sched_mode_t mode = convert_policy_to_sched_mode(policy);
 792
 793         assert(qos < THREAD_QOS_LAST);
 794         assert(thread->static_param);
 795
 796         if (!thread->static_param || !thread->active) {
 797                 return;
 798         }
 799
 800         spl_t s = splsched();
 801         thread_lock(thread);
 802
 803         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 804             TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
 805         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 806             TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
 807             0, &pend_token);
 808
 809         thread_unlock(thread);
 810         splx(s);
 811
 812         /* Concern: this doesn't hold the mutex... */
 813
 814         __assert_only kern_return_t kr;
 815         kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
 816             &pend_token);
 817         assert(kr == KERN_SUCCESS);
 818
 819         if (pend_token.tpt_update_thread_sfi) {
 820                 sfi_reevaluate(thread);
 821         }
 822 }
 823
 824 /*
 825  * thread_set_mode_and_absolute_pri:
 826  *
 827  * Set scheduling policy & absolute priority for thread, for deprecated
 828  * thread_set_policy and thread_policy interfaces.
 829  *
 830  * Called with nothing locked.
 831  */
 832 kern_return_t
 833 thread_set_mode_and_absolute_pri(thread_t   thread,
 834     integer_t  policy,
 835     integer_t  priority)
 836 {
 837         kern_return_t kr = KERN_SUCCESS;
 838         struct task_pend_token pend_token = {};
 839
 840         sched_mode_t mode = convert_policy_to_sched_mode(policy);
 841
 842         thread_mtx_lock(thread);
 843
 844         if (!thread->active) {
 845                 kr = KERN_TERMINATED;
 846                 goto unlock;
 847         }
 848
 849         if (thread_is_static_param(thread)) {
 850                 kr = KERN_POLICY_STATIC;
 851                 goto unlock;
 852         }
 853
 854         /* Setting legacy policies on threads kills the current QoS */
 855         if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
 856                 thread_remove_qos_policy_locked(thread, &pend_token);
 857         }
 858
 859         kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
 860
 861 unlock:
 862         thread_mtx_unlock(thread);
 863
 864         thread_policy_update_complete_unlocked(thread, &pend_token);
 865
 866         return kr;
 867 }
 868
 869 /*
 870  * Set the thread's requested mode and recompute priority
 871  * Called with thread mutex and thread locked
 872  *
 873  * TODO: Mitigate potential problems caused by moving thread to end of runq
 874  * whenever its priority is recomputed
 875  *      Only remove when it actually changes? Attempt to re-insert at appropriate location?
 876  */
 877 static void
 878 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
 879 {
 880         if (thread->policy_reset) {
 881                 return;
 882         }
 883
 884         boolean_t removed = thread_run_queue_remove(thread);
 885
 886         /*
 887          * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
 888          * That way there's zero confusion over which the user wants
 889          * and which the kernel wants.
 890          */
 891         if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) {
 892                 thread->saved_mode = mode;
 893         } else {
 894                 sched_set_thread_mode(thread, mode);
 895         }
 896
 897         thread_recompute_priority(thread);
 898
 899         if (removed) {
 900                 thread_run_queue_reinsert(thread, SCHED_TAILQ);
 901         }
 902 }
 903
 904 /* called at splsched with thread lock locked */
 905 static void
 906 thread_update_qos_cpu_time_locked(thread_t thread)
 907 {
 908         task_t task = thread->task;
 909         uint64_t timer_sum, timer_delta;
 910
 911         /*
 912          * This is only as accurate as the distance between
 913          * last context switch (embedded) or last user/kernel boundary transition (desktop)
 914          * because user_timer and system_timer are only updated then.
 915          *
 916          * TODO: Consider running a timer_update operation here to update it first.
 917          *       Maybe doable with interrupts disabled from current thread.
 918          *       If the thread is on a different core, may not be easy to get right.
 919          *
 920          * TODO: There should be a function for this in timer.c
 921          */
 922
 923         timer_sum = timer_grab(&thread->user_timer);
 924         timer_sum += timer_grab(&thread->system_timer);
 925         timer_delta = timer_sum - thread->vtimer_qos_save;
 926
 927         thread->vtimer_qos_save = timer_sum;
 928
 929         uint64_t* task_counter = NULL;
 930
 931         /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
 932         switch (thread->effective_policy.thep_qos) {
 933         case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
 934         case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
 935         case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
 936         case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
 937         case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
 938         case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
 939         case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
 940         default:
 941                 panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
 942         }
 943
 944         OSAddAtomic64(timer_delta, task_counter);
 945
 946         /* Update the task-level qos stats atomically, because we don't have the task lock. */
 947         switch (thread->requested_policy.thrp_qos) {
 948         case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
 949         case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
 950         case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
 951         case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
 952         case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
 953         case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
 954         case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
 955         default:
 956                 panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
 957         }
 958
 959         OSAddAtomic64(timer_delta, task_counter);
 960 }
 961
 962 /*
 963  * called with no thread locks held
 964  * may hold task lock
 965  */
 966 void
 967 thread_update_qos_cpu_time(thread_t thread)
 968 {
 969         thread_mtx_lock(thread);
 970
 971         spl_t s = splsched();
 972         thread_lock(thread);
 973
 974         thread_update_qos_cpu_time_locked(thread);
 975
 976         thread_unlock(thread);
 977         splx(s);
 978
 979         thread_mtx_unlock(thread);
 980 }
 981
 982 /*
 983  * Calculate base priority from thread attributes, and set it on the thread
 984  *
 985  * Called with thread_lock and thread mutex held.
 986  */
 987 extern thread_t vm_pageout_scan_thread;
 988 extern boolean_t vps_dynamic_priority_enabled;
 989
 990 void
 991 thread_recompute_priority(
 992         thread_t                thread)
 993 {
 994         integer_t               priority;
 995
 996         if (thread->policy_reset) {
 997                 return;
 998         }
 999
1000         if (thread->sched_mode == TH_MODE_REALTIME) {
1001                 sched_set_thread_base_priority(thread, BASEPRI_RTQUEUES);
1002                 return;
1003         } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1004                 int qos = thread->effective_policy.thep_qos;
1005                 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1006                 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1007                 int qos_scaled_relprio;
1008
1009                 assert(qos >= 0 && qos < THREAD_QOS_LAST);
1010                 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1011
1012                 priority = thread_qos_policy_params.qos_pri[qos];
1013                 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1014
1015                 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1016                         /* Bump priority 46 to 47 when in a frontmost app */
1017                         qos_scaled_relprio += 1;
1018                 }
1019
1020                 /* TODO: factor in renice priority here? */
1021
1022                 priority += qos_scaled_relprio;
1023         } else {
1024                 if (thread->importance > MAXPRI) {
1025                         priority = MAXPRI;
1026                 } else if (thread->importance < -MAXPRI) {
1027                         priority = -MAXPRI;
1028                 } else {
1029                         priority = thread->importance;
1030                 }
1031
1032                 priority += thread->task_priority;
1033         }
1034
1035         priority = MAX(priority, thread->user_promotion_basepri);
1036
1037         /*
1038          * Clamp priority back into the allowed range for this task.
1039          *  The initial priority value could be out of this range due to:
1040          *      Task clamped to BG or Utility (max-pri is 4, or 20)
1041          *      Task is user task (max-pri is 63)
1042          *      Task is kernel task (max-pri is 95)
1043          * Note that thread->importance is user-settable to any integer
1044          * via THREAD_PRECEDENCE_POLICY.
1045          */
1046         if (priority > thread->max_priority) {
1047                 if (thread->effective_policy.thep_promote_above_task) {
1048                         priority = MAX(thread->max_priority, thread->user_promotion_basepri);
1049                 } else {
1050                         priority = thread->max_priority;
1051                 }
1052         } else if (priority < MINPRI) {
1053                 priority = MINPRI;
1054         }
1055
1056         if (thread->saved_mode == TH_MODE_REALTIME &&
1057             thread->sched_flags & TH_SFLAG_FAILSAFE) {
1058                 priority = DEPRESSPRI;
1059         }
1060
1061         if (thread->effective_policy.thep_terminated == TRUE) {
1062                 /*
1063                  * We temporarily want to override the expected priority to
1064                  * ensure that the thread exits in a timely manner.
1065                  * Note that this is allowed to exceed thread->max_priority
1066                  * so that the thread is no longer clamped to background
1067                  * during the final exit phase.
1068                  */
1069                 if (priority < thread->task_priority) {
1070                         priority = thread->task_priority;
1071                 }
1072                 if (priority < BASEPRI_DEFAULT) {
1073                         priority = BASEPRI_DEFAULT;
1074                 }
1075         }
1076
1077 #if !defined(XNU_TARGET_OS_OSX)
1078         /* No one can have a base priority less than MAXPRI_THROTTLE */
1079         if (priority < MAXPRI_THROTTLE) {
1080                 priority = MAXPRI_THROTTLE;
1081         }
1082 #endif /* !defined(XNU_TARGET_OS_OSX) */
1083
1084         sched_set_thread_base_priority(thread, priority);
1085 }
1086
1087 /* Called with the task lock held, but not the thread mutex or spinlock */
1088 void
1089 thread_policy_update_tasklocked(
1090         thread_t           thread,
1091         integer_t          priority,
1092         integer_t          max_priority,
1093         task_pend_token_t  pend_token)
1094 {
1095         thread_mtx_lock(thread);
1096
1097         if (!thread->active || thread->policy_reset) {
1098                 thread_mtx_unlock(thread);
1099                 return;
1100         }
1101
1102         spl_t s = splsched();
1103         thread_lock(thread);
1104
1105         __unused
1106         integer_t old_max_priority = thread->max_priority;
1107
1108         assert(priority >= INT16_MIN && priority <= INT16_MAX);
1109         thread->task_priority = (int16_t)priority;
1110
1111         assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
1112         thread->max_priority = (int16_t)max_priority;
1113
1114         /*
1115          * When backgrounding a thread, realtime and fixed priority threads
1116          * should be demoted to timeshare background threads.
1117          *
1118          * TODO: Do this inside the thread policy update routine in order to avoid double
1119          * remove/reinsert for a runnable thread
1120          */
1121         if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1122                 sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1123         } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1124                 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1125         }
1126
1127         thread_policy_update_spinlocked(thread, true, pend_token);
1128
1129         thread_unlock(thread);
1130         splx(s);
1131
1132         thread_mtx_unlock(thread);
1133 }
1134
1135 /*
1136  * Reset thread to default state in preparation for termination
1137  * Called with thread mutex locked
1138  *
1139  * Always called on current thread, so we don't need a run queue remove
1140  */
1141 void
1142 thread_policy_reset(
1143         thread_t                thread)
1144 {
1145         spl_t           s;
1146
1147         assert(thread == current_thread());
1148
1149         s = splsched();
1150         thread_lock(thread);
1151
1152         if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1153                 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1154         }
1155
1156         if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1157                 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1158         }
1159
1160         /* At this point, the various demotions should be inactive */
1161         assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1162         assert(!(thread->sched_flags & TH_SFLAG_THROTTLED));
1163         assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1164
1165         /* Reset thread back to task-default basepri and mode  */
1166         sched_mode_t newmode = SCHED(initial_thread_sched_mode)(thread->task);
1167
1168         sched_set_thread_mode(thread, newmode);
1169
1170         thread->importance = 0;
1171
1172         /* Prevent further changes to thread base priority or mode */
1173         thread->policy_reset = 1;
1174
1175         sched_set_thread_base_priority(thread, thread->task_priority);
1176
1177         thread_unlock(thread);
1178         splx(s);
1179 }
1180
1181 kern_return_t
1182 thread_policy_get(
1183         thread_t                                thread,
1184         thread_policy_flavor_t  flavor,
1185         thread_policy_t                 policy_info,
1186         mach_msg_type_number_t  *count,
1187         boolean_t                               *get_default)
1188 {
1189         kern_return_t                   result = KERN_SUCCESS;
1190
1191         if (thread == THREAD_NULL) {
1192                 return KERN_INVALID_ARGUMENT;
1193         }
1194
1195         thread_mtx_lock(thread);
1196         if (!thread->active) {
1197                 thread_mtx_unlock(thread);
1198
1199                 return KERN_TERMINATED;
1200         }
1201
1202         switch (flavor) {
1203         case THREAD_EXTENDED_POLICY:
1204         {
1205                 boolean_t               timeshare = TRUE;
1206
1207                 if (!(*get_default)) {
1208                         spl_t s = splsched();
1209                         thread_lock(thread);
1210
1211                         if ((thread->sched_mode != TH_MODE_REALTIME) &&
1212                             (thread->saved_mode != TH_MODE_REALTIME)) {
1213                                 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1214                                         timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1215                                 } else {
1216                                         timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1217                                 }
1218                         } else {
1219                                 *get_default = TRUE;
1220                         }
1221
1222                         thread_unlock(thread);
1223                         splx(s);
1224                 }
1225
1226                 if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1227                         thread_extended_policy_t        info;
1228
1229                         info = (thread_extended_policy_t)policy_info;
1230                         info->timeshare = timeshare;
1231                 }
1232
1233                 break;
1234         }
1235
1236         case THREAD_TIME_CONSTRAINT_POLICY:
1237         {
1238                 thread_time_constraint_policy_t         info;
1239
1240                 if (*count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
1241                         result = KERN_INVALID_ARGUMENT;
1242                         break;
1243                 }
1244
1245                 info = (thread_time_constraint_policy_t)policy_info;
1246
1247                 if (!(*get_default)) {
1248                         spl_t s = splsched();
1249                         thread_lock(thread);
1250
1251                         if ((thread->sched_mode == TH_MODE_REALTIME) ||
1252                             (thread->saved_mode == TH_MODE_REALTIME)) {
1253                                 info->period = thread->realtime.period;
1254                                 info->computation = thread->realtime.computation;
1255                                 info->constraint = thread->realtime.constraint;
1256                                 info->preemptible = thread->realtime.preemptible;
1257                         } else {
1258                                 *get_default = TRUE;
1259                         }
1260
1261                         thread_unlock(thread);
1262                         splx(s);
1263                 }
1264
1265                 if (*get_default) {
1266                         info->period = 0;
1267                         info->computation = default_timeshare_computation;
1268                         info->constraint = default_timeshare_constraint;
1269                         info->preemptible = TRUE;
1270                 }
1271
1272
1273                 break;
1274         }
1275
1276         case THREAD_PRECEDENCE_POLICY:
1277         {
1278                 thread_precedence_policy_t              info;
1279
1280                 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1281                         result = KERN_INVALID_ARGUMENT;
1282                         break;
1283                 }
1284
1285                 info = (thread_precedence_policy_t)policy_info;
1286
1287                 if (!(*get_default)) {
1288                         spl_t s = splsched();
1289                         thread_lock(thread);
1290
1291                         info->importance = thread->importance;
1292
1293                         thread_unlock(thread);
1294                         splx(s);
1295                 } else {
1296                         info->importance = 0;
1297                 }
1298
1299                 break;
1300         }
1301
1302         case THREAD_AFFINITY_POLICY:
1303         {
1304                 thread_affinity_policy_t                info;
1305
1306                 if (!thread_affinity_is_supported()) {
1307                         result = KERN_NOT_SUPPORTED;
1308                         break;
1309                 }
1310                 if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1311                         result = KERN_INVALID_ARGUMENT;
1312                         break;
1313                 }
1314
1315                 info = (thread_affinity_policy_t)policy_info;
1316
1317                 if (!(*get_default)) {
1318                         info->affinity_tag = thread_affinity_get(thread);
1319                 } else {
1320                         info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1321                 }
1322
1323                 break;
1324         }
1325
1326         case THREAD_POLICY_STATE:
1327         {
1328                 thread_policy_state_t           info;
1329
1330                 if (*count < THREAD_POLICY_STATE_COUNT) {
1331                         result = KERN_INVALID_ARGUMENT;
1332                         break;
1333                 }
1334
1335                 /* Only root can get this info */
1336                 if (current_task()->sec_token.val[0] != 0) {
1337                         result = KERN_PROTECTION_FAILURE;
1338                         break;
1339                 }
1340
1341                 info = (thread_policy_state_t)(void*)policy_info;
1342
1343                 if (!(*get_default)) {
1344                         info->flags = 0;
1345
1346                         spl_t s = splsched();
1347                         thread_lock(thread);
1348
1349                         info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1350
1351                         info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1352                         info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1353
1354                         info->thps_user_promotions          = 0;
1355                         info->thps_user_promotion_basepri   = thread->user_promotion_basepri;
1356                         info->thps_ipc_overrides            = thread->kevent_overrides;
1357
1358                         proc_get_thread_policy_bitfield(thread, info);
1359
1360                         thread_unlock(thread);
1361                         splx(s);
1362                 } else {
1363                         info->requested = 0;
1364                         info->effective = 0;
1365                         info->pending = 0;
1366                 }
1367
1368                 break;
1369         }
1370
1371         case THREAD_LATENCY_QOS_POLICY:
1372         {
1373                 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1374                 thread_latency_qos_t plqos;
1375
1376                 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1377                         result = KERN_INVALID_ARGUMENT;
1378                         break;
1379                 }
1380
1381                 if (*get_default) {
1382                         plqos = 0;
1383                 } else {
1384                         plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1385                 }
1386
1387                 info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1388         }
1389         break;
1390
1391         case THREAD_THROUGHPUT_QOS_POLICY:
1392         {
1393                 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1394                 thread_throughput_qos_t ptqos;
1395
1396                 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1397                         result = KERN_INVALID_ARGUMENT;
1398                         break;
1399                 }
1400
1401                 if (*get_default) {
1402                         ptqos = 0;
1403                 } else {
1404                         ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1405                 }
1406
1407                 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1408         }
1409         break;
1410
1411         case THREAD_QOS_POLICY:
1412         {
1413                 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1414
1415                 if (*count < THREAD_QOS_POLICY_COUNT) {
1416                         result = KERN_INVALID_ARGUMENT;
1417                         break;
1418                 }
1419
1420                 if (!(*get_default)) {
1421                         int relprio_value = 0;
1422                         info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1423                             TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1424
1425                         info->tier_importance = -relprio_value;
1426                 } else {
1427                         info->qos_tier = THREAD_QOS_UNSPECIFIED;
1428                         info->tier_importance = 0;
1429                 }
1430
1431                 break;
1432         }
1433
1434         default:
1435                 result = KERN_INVALID_ARGUMENT;
1436                 break;
1437         }
1438
1439         thread_mtx_unlock(thread);
1440
1441         return result;
1442 }
1443
1444 void
1445 thread_policy_create(thread_t thread)
1446 {
1447         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1448             (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1449             thread_tid(thread), theffective_0(thread),
1450             theffective_1(thread), thread->base_pri, 0);
1451
1452         /* We pass a pend token but ignore it */
1453         struct task_pend_token pend_token = {};
1454
1455         thread_policy_update_internal_spinlocked(thread, true, &pend_token);
1456
1457         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1458             (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1459             thread_tid(thread), theffective_0(thread),
1460             theffective_1(thread), thread->base_pri, 0);
1461 }
1462
1463 static void
1464 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
1465 {
1466         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1467             (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1468             thread_tid(thread), theffective_0(thread),
1469             theffective_1(thread), thread->base_pri, 0);
1470
1471         thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1472
1473         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1474             (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1475             thread_tid(thread), theffective_0(thread),
1476             theffective_1(thread), thread->base_pri, 0);
1477 }
1478
1479
1480
1481 /*
1482  * One thread state update function TO RULE THEM ALL
1483  *
1484  * This function updates the thread effective policy fields
1485  * and pushes the results to the relevant subsystems.
1486  *
1487  * Returns TRUE if a pended action needs to be run.
1488  *
1489  * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1490  */
1491 static void
1492 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
1493     task_pend_token_t pend_token)
1494 {
1495         /*
1496          * Step 1:
1497          *  Gather requested policy and effective task state
1498          */
1499
1500         struct thread_requested_policy requested = thread->requested_policy;
1501         struct task_effective_policy task_effective = thread->task->effective_policy;
1502
1503         /*
1504          * Step 2:
1505          *  Calculate new effective policies from requested policy, task and thread state
1506          *  Rules:
1507          *      Don't change requested, it won't take effect
1508          */
1509
1510         struct thread_effective_policy next = {};
1511
1512         next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1513
1514         uint32_t next_qos = requested.thrp_qos;
1515
1516         if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1517                 next_qos = MAX(requested.thrp_qos_override, next_qos);
1518                 next_qos = MAX(requested.thrp_qos_promote, next_qos);
1519                 next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1520                 next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1521                 next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1522         }
1523
1524         if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg &&
1525             requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
1526                 /*
1527                  * This thread is turnstile-boosted higher than the adaptive clamp
1528                  * by a synchronous waiter. Allow that to override the adaptive
1529                  * clamp temporarily for this thread only.
1530                  */
1531                 next.thep_promote_above_task = true;
1532                 next_qos = requested.thrp_qos_promote;
1533         }
1534
1535         next.thep_qos = next_qos;
1536
1537         /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1538         if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1539                 if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1540                         next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1541                 } else {
1542                         next.thep_qos = task_effective.tep_qos_clamp;
1543                 }
1544         }
1545
1546         /*
1547          * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1548          * This allows QoS promotions to work properly even after the process is unclamped.
1549          */
1550         next.thep_qos_promote = next.thep_qos;
1551
1552         /* The ceiling only applies to threads that are in the QoS world */
1553         /* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
1554         if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1555             next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1556                 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1557         }
1558
1559         /*
1560          * The QoS relative priority is only applicable when the original programmer's
1561          * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1562          * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1563          * since otherwise it would be lower than unclamped threads. Similarly, in the
1564          * presence of boosting, the programmer doesn't know what other actors
1565          * are boosting the thread.
1566          */
1567         if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1568             (requested.thrp_qos == next.thep_qos) &&
1569             (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1570                 next.thep_qos_relprio = requested.thrp_qos_relprio;
1571         } else {
1572                 next.thep_qos_relprio = 0;
1573         }
1574
1575         /* Calculate DARWIN_BG */
1576         bool wants_darwinbg        = false;
1577         bool wants_all_sockets_bg  = false; /* Do I want my existing sockets to be bg */
1578
1579         if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
1580                 wants_darwinbg = true;
1581         }
1582
1583         /*
1584          * If DARWIN_BG has been requested at either level, it's engaged.
1585          * darwinbg threads always create bg sockets,
1586          * but only some types of darwinbg change the sockets
1587          * after they're created
1588          */
1589         if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1590                 wants_all_sockets_bg = wants_darwinbg = true;
1591         }
1592
1593         if (requested.thrp_pidbind_bg) {
1594                 wants_all_sockets_bg = wants_darwinbg = true;
1595         }
1596
1597         if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1598             next.thep_qos == THREAD_QOS_MAINTENANCE) {
1599                 wants_darwinbg = true;
1600         }
1601
1602         /* Calculate side effects of DARWIN_BG */
1603
1604         if (wants_darwinbg) {
1605                 next.thep_darwinbg = 1;
1606         }
1607
1608         if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1609                 next.thep_new_sockets_bg = 1;
1610         }
1611
1612         /* Don't use task_effective.tep_all_sockets_bg here */
1613         if (wants_all_sockets_bg) {
1614                 next.thep_all_sockets_bg = 1;
1615         }
1616
1617         /* darwinbg implies background QOS (or lower) */
1618         if (next.thep_darwinbg &&
1619             (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1620                 next.thep_qos = THREAD_QOS_BACKGROUND;
1621                 next.thep_qos_relprio = 0;
1622         }
1623
1624         /* Calculate IO policy */
1625
1626         int iopol = THROTTLE_LEVEL_TIER0;
1627
1628         /* Factor in the task's IO policy */
1629         if (next.thep_darwinbg) {
1630                 iopol = MAX(iopol, task_effective.tep_bg_iotier);
1631         }
1632
1633         if (!next.thep_promote_above_task) {
1634                 iopol = MAX(iopol, task_effective.tep_io_tier);
1635         }
1636
1637         /* Look up the associated IO tier value for the QoS class */
1638         iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1639
1640         iopol = MAX(iopol, requested.thrp_int_iotier);
1641         iopol = MAX(iopol, requested.thrp_ext_iotier);
1642
1643         next.thep_io_tier = iopol;
1644
1645         /*
1646          * If a QoS override is causing IO to go into a lower tier, we also set
1647          * the passive bit so that a thread doesn't end up stuck in its own throttle
1648          * window when the override goes away.
1649          */
1650
1651         int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
1652         int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
1653         bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
1654
1655         /* Calculate Passive IO policy */
1656         if (requested.thrp_ext_iopassive ||
1657             requested.thrp_int_iopassive ||
1658             qos_io_override_active ||
1659             task_effective.tep_io_passive) {
1660                 next.thep_io_passive = 1;
1661         }
1662
1663         /* Calculate timer QOS */
1664         uint32_t latency_qos = requested.thrp_latency_qos;
1665
1666         if (!next.thep_promote_above_task) {
1667                 latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1668         }
1669
1670         latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1671
1672         next.thep_latency_qos = latency_qos;
1673
1674         /* Calculate throughput QOS */
1675         uint32_t through_qos = requested.thrp_through_qos;
1676
1677         if (!next.thep_promote_above_task) {
1678                 through_qos = MAX(through_qos, task_effective.tep_through_qos);
1679         }
1680
1681         through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1682
1683         next.thep_through_qos = through_qos;
1684
1685         if (task_effective.tep_terminated || requested.thrp_terminated) {
1686                 /* Shoot down the throttles that slow down exit or response to SIGTERM */
1687                 next.thep_terminated    = 1;
1688                 next.thep_darwinbg      = 0;
1689                 next.thep_io_tier       = THROTTLE_LEVEL_TIER0;
1690                 next.thep_qos           = THREAD_QOS_UNSPECIFIED;
1691                 next.thep_latency_qos   = LATENCY_QOS_TIER_UNSPECIFIED;
1692                 next.thep_through_qos   = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1693         }
1694
1695         /*
1696          * Step 3:
1697          *  Swap out old policy for new policy
1698          */
1699
1700         struct thread_effective_policy prev = thread->effective_policy;
1701
1702         thread_update_qos_cpu_time_locked(thread);
1703
1704         /* This is the point where the new values become visible to other threads */
1705         thread->effective_policy = next;
1706
1707         /*
1708          * Step 4:
1709          *  Pend updates that can't be done while holding the thread lock
1710          */
1711
1712         if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1713                 pend_token->tpt_update_sockets = 1;
1714         }
1715
1716         /* TODO: Doesn't this only need to be done if the throttle went up? */
1717         if (prev.thep_io_tier != next.thep_io_tier) {
1718                 pend_token->tpt_update_throttle = 1;
1719         }
1720
1721         /*
1722          * Check for the attributes that sfi_thread_classify() consults,
1723          *  and trigger SFI re-evaluation.
1724          */
1725         if (prev.thep_qos != next.thep_qos ||
1726             prev.thep_darwinbg != next.thep_darwinbg) {
1727                 pend_token->tpt_update_thread_sfi = 1;
1728         }
1729
1730         integer_t old_base_pri = thread->base_pri;
1731
1732         /*
1733          * Step 5:
1734          *  Update other subsystems as necessary if something has changed
1735          */
1736
1737         /* Check for the attributes that thread_recompute_priority() consults */
1738         if (prev.thep_qos != next.thep_qos ||
1739             prev.thep_qos_relprio != next.thep_qos_relprio ||
1740             prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1741             prev.thep_promote_above_task != next.thep_promote_above_task ||
1742             prev.thep_terminated != next.thep_terminated ||
1743             pend_token->tpt_force_recompute_pri == 1 ||
1744             recompute_priority) {
1745                 thread_recompute_priority(thread);
1746         }
1747
1748         /*
1749          * Check if the thread is waiting on a turnstile and needs priority propagation.
1750          */
1751         if (pend_token->tpt_update_turnstile &&
1752             ((old_base_pri == thread->base_pri) ||
1753             !thread_get_waiting_turnstile(thread))) {
1754                 /*
1755                  * Reset update turnstile pend token since either
1756                  * the thread priority did not change or thread is
1757                  * not blocked on a turnstile.
1758                  */
1759                 pend_token->tpt_update_turnstile = 0;
1760         }
1761 }
1762
1763
1764 /*
1765  * Initiate a thread policy state transition on a thread with its TID
1766  * Useful if you cannot guarantee the thread won't get terminated
1767  * Precondition: No locks are held
1768  * Will take task lock - using the non-tid variant is faster
1769  * if you already have a thread ref.
1770  */
1771 void
1772 proc_set_thread_policy_with_tid(task_t     task,
1773     uint64_t   tid,
1774     int        category,
1775     int        flavor,
1776     int        value)
1777 {
1778         /* takes task lock, returns ref'ed thread or NULL */
1779         thread_t thread = task_findtid(task, tid);
1780
1781         if (thread == THREAD_NULL) {
1782                 return;
1783         }
1784
1785         proc_set_thread_policy(thread, category, flavor, value);
1786
1787         thread_deallocate(thread);
1788 }
1789
1790 /*
1791  * Initiate a thread policy transition on a thread
1792  * This path supports networking transitions (i.e. darwinbg transitions)
1793  * Precondition: No locks are held
1794  */
1795 void
1796 proc_set_thread_policy(thread_t   thread,
1797     int        category,
1798     int        flavor,
1799     int        value)
1800 {
1801         struct task_pend_token pend_token = {};
1802
1803         thread_mtx_lock(thread);
1804
1805         proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
1806
1807         thread_mtx_unlock(thread);
1808
1809         thread_policy_update_complete_unlocked(thread, &pend_token);
1810 }
1811
1812 /*
1813  * Do the things that can't be done while holding a thread mutex.
1814  * These are set up to call back into thread policy to get the latest value,
1815  * so they don't have to be synchronized with the update.
1816  * The only required semantic is 'call this sometime after updating effective policy'
1817  *
1818  * Precondition: Thread mutex is not held
1819  *
1820  * This may be called with the task lock held, but in that case it won't be
1821  * called with tpt_update_sockets set.
1822  */
1823 void
1824 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1825 {
1826 #ifdef MACH_BSD
1827         if (pend_token->tpt_update_sockets) {
1828                 proc_apply_task_networkbg(thread->task->bsd_info, thread);
1829         }
1830 #endif /* MACH_BSD */
1831
1832         if (pend_token->tpt_update_throttle) {
1833                 rethrottle_thread(thread->uthread);
1834         }
1835
1836         if (pend_token->tpt_update_thread_sfi) {
1837                 sfi_reevaluate(thread);
1838         }
1839
1840         if (pend_token->tpt_update_turnstile) {
1841                 turnstile_update_thread_priority_chain(thread);
1842         }
1843 }
1844
1845 /*
1846  * Set and update thread policy
1847  * Thread mutex might be held
1848  */
1849 static void
1850 proc_set_thread_policy_locked(thread_t          thread,
1851     int               category,
1852     int               flavor,
1853     int               value,
1854     int               value2,
1855     task_pend_token_t pend_token)
1856 {
1857         spl_t s = splsched();
1858         thread_lock(thread);
1859
1860         proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1861
1862         thread_unlock(thread);
1863         splx(s);
1864 }
1865
1866 /*
1867  * Set and update thread policy
1868  * Thread spinlock is held
1869  */
1870 static void
1871 proc_set_thread_policy_spinlocked(thread_t          thread,
1872     int               category,
1873     int               flavor,
1874     int               value,
1875     int               value2,
1876     task_pend_token_t pend_token)
1877 {
1878         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1879             (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1880             thread_tid(thread), threquested_0(thread),
1881             threquested_1(thread), value, 0);
1882
1883         thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1884
1885         thread_policy_update_spinlocked(thread, false, pend_token);
1886
1887         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1888             (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1889             thread_tid(thread), threquested_0(thread),
1890             threquested_1(thread), tpending(pend_token), 0);
1891 }
1892
1893 /*
1894  * Set the requested state for a specific flavor to a specific value.
1895  */
1896 static void
1897 thread_set_requested_policy_spinlocked(thread_t     thread,
1898     int               category,
1899     int               flavor,
1900     int               value,
1901     int               value2,
1902     task_pend_token_t pend_token)
1903 {
1904         int tier, passive;
1905
1906         struct thread_requested_policy requested = thread->requested_policy;
1907
1908         switch (flavor) {
1909         /* Category: EXTERNAL and INTERNAL, thread and task */
1910
1911         case TASK_POLICY_DARWIN_BG:
1912                 if (category == TASK_POLICY_EXTERNAL) {
1913                         requested.thrp_ext_darwinbg = value;
1914                 } else {
1915                         requested.thrp_int_darwinbg = value;
1916                 }
1917                 break;
1918
1919         case TASK_POLICY_IOPOL:
1920                 proc_iopol_to_tier(value, &tier, &passive);
1921                 if (category == TASK_POLICY_EXTERNAL) {
1922                         requested.thrp_ext_iotier  = tier;
1923                         requested.thrp_ext_iopassive = passive;
1924                 } else {
1925                         requested.thrp_int_iotier  = tier;
1926                         requested.thrp_int_iopassive = passive;
1927                 }
1928                 break;
1929
1930         case TASK_POLICY_IO:
1931                 if (category == TASK_POLICY_EXTERNAL) {
1932                         requested.thrp_ext_iotier = value;
1933                 } else {
1934                         requested.thrp_int_iotier = value;
1935                 }
1936                 break;
1937
1938         case TASK_POLICY_PASSIVE_IO:
1939                 if (category == TASK_POLICY_EXTERNAL) {
1940                         requested.thrp_ext_iopassive = value;
1941                 } else {
1942                         requested.thrp_int_iopassive = value;
1943                 }
1944                 break;
1945
1946         /* Category: ATTRIBUTE, thread only */
1947
1948         case TASK_POLICY_PIDBIND_BG:
1949                 assert(category == TASK_POLICY_ATTRIBUTE);
1950                 requested.thrp_pidbind_bg = value;
1951                 break;
1952
1953         case TASK_POLICY_LATENCY_QOS:
1954                 assert(category == TASK_POLICY_ATTRIBUTE);
1955                 requested.thrp_latency_qos = value;
1956                 break;
1957
1958         case TASK_POLICY_THROUGH_QOS:
1959                 assert(category == TASK_POLICY_ATTRIBUTE);
1960                 requested.thrp_through_qos = value;
1961                 break;
1962
1963         case TASK_POLICY_QOS_OVERRIDE:
1964                 assert(category == TASK_POLICY_ATTRIBUTE);
1965                 requested.thrp_qos_override = value;
1966                 pend_token->tpt_update_turnstile = 1;
1967                 break;
1968
1969         case TASK_POLICY_QOS_AND_RELPRIO:
1970                 assert(category == TASK_POLICY_ATTRIBUTE);
1971                 requested.thrp_qos = value;
1972                 requested.thrp_qos_relprio = value2;
1973                 pend_token->tpt_update_turnstile = 1;
1974                 DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
1975                 break;
1976
1977         case TASK_POLICY_QOS_WORKQ_OVERRIDE:
1978                 assert(category == TASK_POLICY_ATTRIBUTE);
1979                 requested.thrp_qos_workq_override = value;
1980                 pend_token->tpt_update_turnstile = 1;
1981                 break;
1982
1983         case TASK_POLICY_QOS_PROMOTE:
1984                 assert(category == TASK_POLICY_ATTRIBUTE);
1985                 requested.thrp_qos_promote = value;
1986                 break;
1987
1988         case TASK_POLICY_QOS_KEVENT_OVERRIDE:
1989                 assert(category == TASK_POLICY_ATTRIBUTE);
1990                 requested.thrp_qos_kevent_override = value;
1991                 pend_token->tpt_update_turnstile = 1;
1992                 break;
1993
1994         case TASK_POLICY_QOS_SERVICER_OVERRIDE:
1995                 assert(category == TASK_POLICY_ATTRIBUTE);
1996                 requested.thrp_qos_wlsvc_override = value;
1997                 pend_token->tpt_update_turnstile = 1;
1998                 break;
1999
2000         case TASK_POLICY_TERMINATED:
2001                 assert(category == TASK_POLICY_ATTRIBUTE);
2002                 requested.thrp_terminated = value;
2003                 break;
2004
2005         default:
2006                 panic("unknown task policy: %d %d %d", category, flavor, value);
2007                 break;
2008         }
2009
2010         thread->requested_policy = requested;
2011 }
2012
2013 /*
2014  * Gets what you set. Effective values may be different.
2015  * Precondition: No locks are held
2016  */
2017 int
2018 proc_get_thread_policy(thread_t   thread,
2019     int        category,
2020     int        flavor)
2021 {
2022         int value = 0;
2023         thread_mtx_lock(thread);
2024         value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2025         thread_mtx_unlock(thread);
2026         return value;
2027 }
2028
2029 static int
2030 proc_get_thread_policy_locked(thread_t   thread,
2031     int        category,
2032     int        flavor,
2033     int*       value2)
2034 {
2035         int value = 0;
2036
2037         spl_t s = splsched();
2038         thread_lock(thread);
2039
2040         value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2041
2042         thread_unlock(thread);
2043         splx(s);
2044
2045         return value;
2046 }
2047
2048 /*
2049  * Gets what you set. Effective values may be different.
2050  */
2051 static int
2052 thread_get_requested_policy_spinlocked(thread_t thread,
2053     int      category,
2054     int      flavor,
2055     int*     value2)
2056 {
2057         int value = 0;
2058
2059         struct thread_requested_policy requested = thread->requested_policy;
2060
2061         switch (flavor) {
2062         case TASK_POLICY_DARWIN_BG:
2063                 if (category == TASK_POLICY_EXTERNAL) {
2064                         value = requested.thrp_ext_darwinbg;
2065                 } else {
2066                         value = requested.thrp_int_darwinbg;
2067                 }
2068                 break;
2069         case TASK_POLICY_IOPOL:
2070                 if (category == TASK_POLICY_EXTERNAL) {
2071                         value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2072                             requested.thrp_ext_iopassive);
2073                 } else {
2074                         value = proc_tier_to_iopol(requested.thrp_int_iotier,
2075                             requested.thrp_int_iopassive);
2076                 }
2077                 break;
2078         case TASK_POLICY_IO:
2079                 if (category == TASK_POLICY_EXTERNAL) {
2080                         value = requested.thrp_ext_iotier;
2081                 } else {
2082                         value = requested.thrp_int_iotier;
2083                 }
2084                 break;
2085         case TASK_POLICY_PASSIVE_IO:
2086                 if (category == TASK_POLICY_EXTERNAL) {
2087                         value = requested.thrp_ext_iopassive;
2088                 } else {
2089                         value = requested.thrp_int_iopassive;
2090                 }
2091                 break;
2092         case TASK_POLICY_QOS:
2093                 assert(category == TASK_POLICY_ATTRIBUTE);
2094                 value = requested.thrp_qos;
2095                 break;
2096         case TASK_POLICY_QOS_OVERRIDE:
2097                 assert(category == TASK_POLICY_ATTRIBUTE);
2098                 value = requested.thrp_qos_override;
2099                 break;
2100         case TASK_POLICY_LATENCY_QOS:
2101                 assert(category == TASK_POLICY_ATTRIBUTE);
2102                 value = requested.thrp_latency_qos;
2103                 break;
2104         case TASK_POLICY_THROUGH_QOS:
2105                 assert(category == TASK_POLICY_ATTRIBUTE);
2106                 value = requested.thrp_through_qos;
2107                 break;
2108         case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2109                 assert(category == TASK_POLICY_ATTRIBUTE);
2110                 value = requested.thrp_qos_workq_override;
2111                 break;
2112         case TASK_POLICY_QOS_AND_RELPRIO:
2113                 assert(category == TASK_POLICY_ATTRIBUTE);
2114                 assert(value2 != NULL);
2115                 value = requested.thrp_qos;
2116                 *value2 = requested.thrp_qos_relprio;
2117                 break;
2118         case TASK_POLICY_QOS_PROMOTE:
2119                 assert(category == TASK_POLICY_ATTRIBUTE);
2120                 value = requested.thrp_qos_promote;
2121                 break;
2122         case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2123                 assert(category == TASK_POLICY_ATTRIBUTE);
2124                 value = requested.thrp_qos_kevent_override;
2125                 break;
2126         case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2127                 assert(category == TASK_POLICY_ATTRIBUTE);
2128                 value = requested.thrp_qos_wlsvc_override;
2129                 break;
2130         case TASK_POLICY_TERMINATED:
2131                 assert(category == TASK_POLICY_ATTRIBUTE);
2132                 value = requested.thrp_terminated;
2133                 break;
2134
2135         default:
2136                 panic("unknown policy_flavor %d", flavor);
2137                 break;
2138         }
2139
2140         return value;
2141 }
2142
2143 /*
2144  * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2145  *
2146  * NOTE: This accessor does not take the task or thread lock.
2147  * Notifications of state updates need to be externally synchronized with state queries.
2148  * This routine *MUST* remain interrupt safe, as it is potentially invoked
2149  * within the context of a timer interrupt.
2150  *
2151  * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2152  *      Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2153  *      I don't think that cost is worth not having the right answer.
2154  */
2155 int
2156 proc_get_effective_thread_policy(thread_t thread,
2157     int      flavor)
2158 {
2159         int value = 0;
2160
2161         switch (flavor) {
2162         case TASK_POLICY_DARWIN_BG:
2163                 /*
2164                  * This call is used within the timer layer, as well as
2165                  * prioritizing requests to the graphics system.
2166                  * It also informs SFI and originator-bg-state.
2167                  * Returns 1 for background mode, 0 for normal mode
2168                  */
2169
2170                 value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2171                 break;
2172         case TASK_POLICY_IO:
2173                 /*
2174                  * The I/O system calls here to find out what throttling tier to apply to an operation.
2175                  * Returns THROTTLE_LEVEL_* values
2176                  */
2177                 value = thread->effective_policy.thep_io_tier;
2178                 if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2179                         value = MIN(value, thread->iotier_override);
2180                 }
2181                 break;
2182         case TASK_POLICY_PASSIVE_IO:
2183                 /*
2184                  * The I/O system calls here to find out whether an operation should be passive.
2185                  * (i.e. not cause operations with lower throttle tiers to be throttled)
2186                  * Returns 1 for passive mode, 0 for normal mode
2187                  *
2188                  * If an override is causing IO to go into a lower tier, we also set
2189                  * the passive bit so that a thread doesn't end up stuck in its own throttle
2190                  * window when the override goes away.
2191                  */
2192                 value = thread->effective_policy.thep_io_passive ? 1 : 0;
2193                 if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2194                     thread->iotier_override < thread->effective_policy.thep_io_tier) {
2195                         value = 1;
2196                 }
2197                 break;
2198         case TASK_POLICY_ALL_SOCKETS_BG:
2199                 /*
2200                  * do_background_socket() calls this to determine whether
2201                  * it should change the thread's sockets
2202                  * Returns 1 for background mode, 0 for normal mode
2203                  * This consults both thread and task so un-DBGing a thread while the task is BG
2204                  * doesn't get you out of the network throttle.
2205                  */
2206                 value = (thread->effective_policy.thep_all_sockets_bg ||
2207                     thread->task->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2208                 break;
2209         case TASK_POLICY_NEW_SOCKETS_BG:
2210                 /*
2211                  * socreate() calls this to determine if it should mark a new socket as background
2212                  * Returns 1 for background mode, 0 for normal mode
2213                  */
2214                 value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2215                 break;
2216         case TASK_POLICY_LATENCY_QOS:
2217                 /*
2218                  * timer arming calls into here to find out the timer coalescing level
2219                  * Returns a latency QoS tier (0-6)
2220                  */
2221                 value = thread->effective_policy.thep_latency_qos;
2222                 break;
2223         case TASK_POLICY_THROUGH_QOS:
2224                 /*
2225                  * This value is passed into the urgency callout from the scheduler
2226                  * to the performance management subsystem.
2227                  *
2228                  * Returns a throughput QoS tier (0-6)
2229                  */
2230                 value = thread->effective_policy.thep_through_qos;
2231                 break;
2232         case TASK_POLICY_QOS:
2233                 /*
2234                  * This is communicated to the performance management layer and SFI.
2235                  *
2236                  * Returns a QoS policy tier
2237                  */
2238                 value = thread->effective_policy.thep_qos;
2239                 break;
2240         default:
2241                 panic("unknown thread policy flavor %d", flavor);
2242                 break;
2243         }
2244
2245         return value;
2246 }
2247
2248
2249 /*
2250  * (integer_t) casts limit the number of bits we can fit here
2251  * this interface is deprecated and replaced by the _EXT struct ?
2252  */
2253 static void
2254 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2255 {
2256         uint64_t bits = 0;
2257         struct thread_requested_policy requested = thread->requested_policy;
2258
2259         bits |= (requested.thrp_int_darwinbg    ? POLICY_REQ_INT_DARWIN_BG  : 0);
2260         bits |= (requested.thrp_ext_darwinbg    ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2261         bits |= (requested.thrp_int_iotier      ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2262         bits |= (requested.thrp_ext_iotier      ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2263         bits |= (requested.thrp_int_iopassive   ? POLICY_REQ_INT_PASSIVE_IO : 0);
2264         bits |= (requested.thrp_ext_iopassive   ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2265
2266         bits |= (requested.thrp_qos             ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2267         bits |= (requested.thrp_qos_override    ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT)   : 0);
2268
2269         bits |= (requested.thrp_pidbind_bg      ? POLICY_REQ_PIDBIND_BG     : 0);
2270
2271         bits |= (requested.thrp_latency_qos     ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2272         bits |= (requested.thrp_through_qos     ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2273
2274         info->requested = (integer_t) bits;
2275         bits = 0;
2276
2277         struct thread_effective_policy effective = thread->effective_policy;
2278
2279         bits |= (effective.thep_darwinbg        ? POLICY_EFF_DARWIN_BG      : 0);
2280
2281         bits |= (effective.thep_io_tier         ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2282         bits |= (effective.thep_io_passive      ? POLICY_EFF_IO_PASSIVE     : 0);
2283         bits |= (effective.thep_all_sockets_bg  ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2284         bits |= (effective.thep_new_sockets_bg  ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2285
2286         bits |= (effective.thep_qos             ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2287
2288         bits |= (effective.thep_latency_qos     ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2289         bits |= (effective.thep_through_qos     ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2290
2291         info->effective = (integer_t)bits;
2292         bits = 0;
2293
2294         info->pending = 0;
2295 }
2296
2297 /*
2298  * Sneakily trace either the task and thread requested
2299  * or just the thread requested, depending on if we have enough room.
2300  * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2301  *
2302  *                                LP32            LP64
2303  * threquested_0(thread)          thread[0]       task[0]
2304  * threquested_1(thread)          thread[1]       thread[0]
2305  *
2306  */
2307
2308 uintptr_t
2309 threquested_0(thread_t thread)
2310 {
2311         static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2312
2313         uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2314
2315         return raw[0];
2316 }
2317
2318 uintptr_t
2319 threquested_1(thread_t thread)
2320 {
2321 #if defined __LP64__
2322         return *(uintptr_t*)&thread->task->requested_policy;
2323 #else
2324         uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2325         return raw[1];
2326 #endif
2327 }
2328
2329 uintptr_t
2330 theffective_0(thread_t thread)
2331 {
2332         static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2333
2334         uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2335         return raw[0];
2336 }
2337
2338 uintptr_t
2339 theffective_1(thread_t thread)
2340 {
2341 #if defined __LP64__
2342         return *(uintptr_t*)&thread->task->effective_policy;
2343 #else
2344         uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2345         return raw[1];
2346 #endif
2347 }
2348
2349
2350 /*
2351  * Set an override on the thread which is consulted with a
2352  * higher priority than the task/thread policy. This should
2353  * only be set for temporary grants until the thread
2354  * returns to the userspace boundary
2355  *
2356  * We use atomic operations to swap in the override, with
2357  * the assumption that the thread itself can
2358  * read the override and clear it on return to userspace.
2359  *
2360  * No locking is performed, since it is acceptable to see
2361  * a stale override for one loop through throttle_lowpri_io().
2362  * However a thread reference must be held on the thread.
2363  */
2364
2365 void
2366 set_thread_iotier_override(thread_t thread, int policy)
2367 {
2368         int current_override;
2369
2370         /* Let most aggressive I/O policy win until user boundary */
2371         do {
2372                 current_override = thread->iotier_override;
2373
2374                 if (current_override != THROTTLE_LEVEL_NONE) {
2375                         policy = MIN(current_override, policy);
2376                 }
2377
2378                 if (current_override == policy) {
2379                         /* no effective change */
2380                         return;
2381                 }
2382         } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2383
2384         /*
2385          * Since the thread may be currently throttled,
2386          * re-evaluate tiers and potentially break out
2387          * of an msleep
2388          */
2389         rethrottle_thread(thread->uthread);
2390 }
2391
2392 /*
2393  * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2394  * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2395  * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2396  * priority thread. In these cases, we attempt to propagate the priority token, as long
2397  * as the subsystem informs us of the relationships between the threads. The userspace
2398  * synchronization subsystem should maintain the information of owner->resource and
2399  * resource->waiters itself.
2400  */
2401
2402 /*
2403  * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2404  * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2405  * to be handled specially in the future, but for now it's fine to slam
2406  * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2407  */
2408 static void
2409 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2410 {
2411         if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2412                 /* Map all input resource/type to a single one */
2413                 *resource = USER_ADDR_NULL;
2414                 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2415         } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2416                 /* no transform */
2417         } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2418                 /* Map all mutex overrides to a single one, to avoid memory overhead */
2419                 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2420                         *resource = USER_ADDR_NULL;
2421                 }
2422         }
2423 }
2424
2425 /* This helper routine finds an existing override if known. Locking should be done by caller */
2426 static struct thread_qos_override *
2427 find_qos_override(thread_t thread,
2428     user_addr_t resource,
2429     int resource_type)
2430 {
2431         struct thread_qos_override *override;
2432
2433         override = thread->overrides;
2434         while (override) {
2435                 if (override->override_resource == resource &&
2436                     override->override_resource_type == resource_type) {
2437                         return override;
2438                 }
2439
2440                 override = override->override_next;
2441         }
2442
2443         return NULL;
2444 }
2445
2446 static void
2447 find_and_decrement_qos_override(thread_t       thread,
2448     user_addr_t    resource,
2449     int            resource_type,
2450     boolean_t      reset,
2451     struct thread_qos_override **free_override_list)
2452 {
2453         struct thread_qos_override *override, *override_prev;
2454
2455         override_prev = NULL;
2456         override = thread->overrides;
2457         while (override) {
2458                 struct thread_qos_override *override_next = override->override_next;
2459
2460                 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2461                     (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2462                         if (reset) {
2463                                 override->override_contended_resource_count = 0;
2464                         } else {
2465                                 override->override_contended_resource_count--;
2466                         }
2467
2468                         if (override->override_contended_resource_count == 0) {
2469                                 if (override_prev == NULL) {
2470                                         thread->overrides = override_next;
2471                                 } else {
2472                                         override_prev->override_next = override_next;
2473                                 }
2474
2475                                 /* Add to out-param for later zfree */
2476                                 override->override_next = *free_override_list;
2477                                 *free_override_list = override;
2478                         } else {
2479                                 override_prev = override;
2480                         }
2481
2482                         if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2483                                 return;
2484                         }
2485                 } else {
2486                         override_prev = override;
2487                 }
2488
2489                 override = override_next;
2490         }
2491 }
2492
2493 /* This helper recalculates the current requested override using the policy selected at boot */
2494 static int
2495 calculate_requested_qos_override(thread_t thread)
2496 {
2497         if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2498                 return THREAD_QOS_UNSPECIFIED;
2499         }
2500
2501         /* iterate over all overrides and calculate MAX */
2502         struct thread_qos_override *override;
2503         int qos_override = THREAD_QOS_UNSPECIFIED;
2504
2505         override = thread->overrides;
2506         while (override) {
2507                 qos_override = MAX(qos_override, override->override_qos);
2508                 override = override->override_next;
2509         }
2510
2511         return qos_override;
2512 }
2513
2514 /*
2515  * Returns:
2516  * - 0 on success
2517  * - EINVAL if some invalid input was passed
2518  */
2519 static int
2520 proc_thread_qos_add_override_internal(thread_t         thread,
2521     int              override_qos,
2522     boolean_t        first_override_for_resource,
2523     user_addr_t      resource,
2524     int              resource_type)
2525 {
2526         struct task_pend_token pend_token = {};
2527         int rc = 0;
2528
2529         thread_mtx_lock(thread);
2530
2531         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2532             thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2533
2534         DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2535             uint64_t, thread->requested_policy.thrp_qos,
2536             uint64_t, thread->effective_policy.thep_qos,
2537             int, override_qos, boolean_t, first_override_for_resource);
2538
2539         struct thread_qos_override *override;
2540         struct thread_qos_override *override_new = NULL;
2541         int new_qos_override, prev_qos_override;
2542         int new_effective_qos;
2543
2544         canonicalize_resource_and_type(&resource, &resource_type);
2545
2546         override = find_qos_override(thread, resource, resource_type);
2547         if (first_override_for_resource && !override) {
2548                 /* We need to allocate a new object. Drop the thread lock and
2549                  * recheck afterwards in case someone else added the override
2550                  */
2551                 thread_mtx_unlock(thread);
2552                 override_new = zalloc(thread_qos_override_zone);
2553                 thread_mtx_lock(thread);
2554                 override = find_qos_override(thread, resource, resource_type);
2555         }
2556         if (first_override_for_resource && override) {
2557                 /* Someone else already allocated while the thread lock was dropped */
2558                 override->override_contended_resource_count++;
2559         } else if (!override && override_new) {
2560                 override = override_new;
2561                 override_new = NULL;
2562                 override->override_next = thread->overrides;
2563                 /* since first_override_for_resource was TRUE */
2564                 override->override_contended_resource_count = 1;
2565                 override->override_resource = resource;
2566                 override->override_resource_type = (int16_t)resource_type;
2567                 override->override_qos = THREAD_QOS_UNSPECIFIED;
2568                 thread->overrides = override;
2569         }
2570
2571         if (override) {
2572                 if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2573                         override->override_qos = (int16_t)override_qos;
2574                 } else {
2575                         override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
2576                 }
2577         }
2578
2579         /* Determine how to combine the various overrides into a single current
2580          * requested override
2581          */
2582         new_qos_override = calculate_requested_qos_override(thread);
2583
2584         prev_qos_override = proc_get_thread_policy_locked(thread,
2585             TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2586
2587         if (new_qos_override != prev_qos_override) {
2588                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2589                     TASK_POLICY_QOS_OVERRIDE,
2590                     new_qos_override, 0, &pend_token);
2591         }
2592
2593         new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2594
2595         thread_mtx_unlock(thread);
2596
2597         thread_policy_update_complete_unlocked(thread, &pend_token);
2598
2599         if (override_new) {
2600                 zfree(thread_qos_override_zone, override_new);
2601         }
2602
2603         DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2604             int, new_qos_override, int, new_effective_qos, int, rc);
2605
2606         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2607             new_qos_override, resource, resource_type, 0, 0);
2608
2609         return rc;
2610 }
2611
2612 int
2613 proc_thread_qos_add_override(task_t           task,
2614     thread_t         thread,
2615     uint64_t         tid,
2616     int              override_qos,
2617     boolean_t        first_override_for_resource,
2618     user_addr_t      resource,
2619     int              resource_type)
2620 {
2621         boolean_t has_thread_reference = FALSE;
2622         int rc = 0;
2623
2624         if (thread == THREAD_NULL) {
2625                 thread = task_findtid(task, tid);
2626                 /* returns referenced thread */
2627
2628                 if (thread == THREAD_NULL) {
2629                         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2630                             tid, 0, 0xdead, 0, 0);
2631                         return ESRCH;
2632                 }
2633                 has_thread_reference = TRUE;
2634         } else {
2635                 assert(thread->task == task);
2636         }
2637         rc = proc_thread_qos_add_override_internal(thread, override_qos,
2638             first_override_for_resource, resource, resource_type);
2639         if (has_thread_reference) {
2640                 thread_deallocate(thread);
2641         }
2642
2643         return rc;
2644 }
2645
2646 static void
2647 proc_thread_qos_remove_override_internal(thread_t       thread,
2648     user_addr_t    resource,
2649     int            resource_type,
2650     boolean_t      reset)
2651 {
2652         struct task_pend_token pend_token = {};
2653
2654         struct thread_qos_override *deferred_free_override_list = NULL;
2655         int new_qos_override, prev_qos_override, new_effective_qos;
2656
2657         thread_mtx_lock(thread);
2658
2659         canonicalize_resource_and_type(&resource, &resource_type);
2660
2661         find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2662
2663         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2664             thread_tid(thread), resource, reset, 0, 0);
2665
2666         DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2667             uint64_t, thread->requested_policy.thrp_qos,
2668             uint64_t, thread->effective_policy.thep_qos);
2669
2670         /* Determine how to combine the various overrides into a single current requested override */
2671         new_qos_override = calculate_requested_qos_override(thread);
2672
2673         spl_t s = splsched();
2674         thread_lock(thread);
2675
2676         /*
2677          * The override chain and therefore the value of the current override is locked with thread mutex,
2678          * so we can do a get/set without races.  However, the rest of thread policy is locked under the spinlock.
2679          * This means you can't change the current override from a spinlock-only setter.
2680          */
2681         prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2682
2683         if (new_qos_override != prev_qos_override) {
2684                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2685         }
2686
2687         new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2688
2689         thread_unlock(thread);
2690         splx(s);
2691
2692         thread_mtx_unlock(thread);
2693
2694         thread_policy_update_complete_unlocked(thread, &pend_token);
2695
2696         while (deferred_free_override_list) {
2697                 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2698
2699                 zfree(thread_qos_override_zone, deferred_free_override_list);
2700                 deferred_free_override_list = override_next;
2701         }
2702
2703         DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2704             int, new_qos_override, int, new_effective_qos);
2705
2706         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2707             thread_tid(thread), 0, 0, 0, 0);
2708 }
2709
2710 int
2711 proc_thread_qos_remove_override(task_t      task,
2712     thread_t    thread,
2713     uint64_t    tid,
2714     user_addr_t resource,
2715     int         resource_type)
2716 {
2717         boolean_t has_thread_reference = FALSE;
2718
2719         if (thread == THREAD_NULL) {
2720                 thread = task_findtid(task, tid);
2721                 /* returns referenced thread */
2722
2723                 if (thread == THREAD_NULL) {
2724                         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2725                             tid, 0, 0xdead, 0, 0);
2726                         return ESRCH;
2727                 }
2728                 has_thread_reference = TRUE;
2729         } else {
2730                 assert(task == thread->task);
2731         }
2732
2733         proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2734
2735         if (has_thread_reference) {
2736                 thread_deallocate(thread);
2737         }
2738
2739         return 0;
2740 }
2741
2742 /* Deallocate before thread termination */
2743 void
2744 proc_thread_qos_deallocate(thread_t thread)
2745 {
2746         /* This thread must have no more IPC overrides. */
2747         assert(thread->kevent_overrides == 0);
2748         assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2749         assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2750
2751         /*
2752          * Clear out any lingering override objects.
2753          */
2754         struct thread_qos_override *override;
2755
2756         thread_mtx_lock(thread);
2757         override = thread->overrides;
2758         thread->overrides = NULL;
2759         thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2760         /* We don't need to re-evaluate thread policy here because the thread has already exited */
2761         thread_mtx_unlock(thread);
2762
2763         while (override) {
2764                 struct thread_qos_override *override_next = override->override_next;
2765
2766                 zfree(thread_qos_override_zone, override);
2767                 override = override_next;
2768         }
2769 }
2770
2771 /*
2772  * Set up the primordial thread's QoS
2773  */
2774 void
2775 task_set_main_thread_qos(task_t task, thread_t thread)
2776 {
2777         struct task_pend_token pend_token = {};
2778
2779         assert(thread->task == task);
2780
2781         thread_mtx_lock(thread);
2782
2783         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2784             (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2785             thread_tid(thread), threquested_0(thread), threquested_1(thread),
2786             thread->requested_policy.thrp_qos, 0);
2787
2788         thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2789
2790         proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2791             primordial_qos, 0, &pend_token);
2792
2793         thread_mtx_unlock(thread);
2794
2795         thread_policy_update_complete_unlocked(thread, &pend_token);
2796
2797         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2798             (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2799             thread_tid(thread), threquested_0(thread), threquested_1(thread),
2800             primordial_qos, 0);
2801 }
2802
2803 /*
2804  * KPI for pthread kext
2805  *
2806  * Return a good guess at what the initial manager QoS will be
2807  * Dispatch can override this in userspace if it so chooses
2808  */
2809 thread_qos_t
2810 task_get_default_manager_qos(task_t task)
2811 {
2812         thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2813
2814         if (primordial_qos == THREAD_QOS_LEGACY) {
2815                 primordial_qos = THREAD_QOS_USER_INITIATED;
2816         }
2817
2818         return primordial_qos;
2819 }
2820
2821 /*
2822  * Check if the kernel promotion on thread has changed
2823  * and apply it.
2824  *
2825  * thread locked on entry and exit
2826  */
2827 boolean_t
2828 thread_recompute_kernel_promotion_locked(thread_t thread)
2829 {
2830         boolean_t needs_update = FALSE;
2831         uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
2832
2833         /*
2834          * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
2835          * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
2836          * and propagates the priority through the chain with the same cap, because as of now it does
2837          * not differenciate on the kernel primitive.
2838          *
2839          * If this assumption will change with the adoption of a kernel primitive that does not
2840          * cap the when adding/propagating,
2841          * then here is the place to put the generic cap for all kernel primitives
2842          * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
2843          */
2844         assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
2845
2846         if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
2847                 KDBG(MACHDBG_CODE(
2848                             DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
2849                     thread_tid(thread),
2850                     kern_promotion_schedpri,
2851                     thread->kern_promotion_schedpri);
2852
2853                 needs_update = TRUE;
2854                 thread->kern_promotion_schedpri = kern_promotion_schedpri;
2855                 thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
2856         }
2857
2858         return needs_update;
2859 }
2860
2861 /*
2862  * Check if the user promotion on thread has changed
2863  * and apply it.
2864  *
2865  * thread locked on entry, might drop the thread lock
2866  * and reacquire it.
2867  */
2868 boolean_t
2869 thread_recompute_user_promotion_locked(thread_t thread)
2870 {
2871         boolean_t needs_update = FALSE;
2872         struct task_pend_token pend_token = {};
2873         uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
2874         int old_base_pri = thread->base_pri;
2875         thread_qos_t qos_promotion;
2876
2877         /* Check if user promotion has changed */
2878         if (thread->user_promotion_basepri == user_promotion_basepri) {
2879                 return needs_update;
2880         } else {
2881                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2882                     (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
2883                     thread_tid(thread),
2884                     user_promotion_basepri,
2885                     thread->user_promotion_basepri,
2886                     0, 0);
2887                 KDBG(MACHDBG_CODE(
2888                             DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
2889                     thread_tid(thread),
2890                     user_promotion_basepri,
2891                     thread->user_promotion_basepri);
2892         }
2893
2894         /* Update the user promotion base pri */
2895         thread->user_promotion_basepri = user_promotion_basepri;
2896         pend_token.tpt_force_recompute_pri = 1;
2897
2898         if (user_promotion_basepri <= MAXPRI_THROTTLE) {
2899                 qos_promotion = THREAD_QOS_UNSPECIFIED;
2900         } else {
2901                 qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
2902         }
2903
2904         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2905             TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
2906
2907         if (thread_get_waiting_turnstile(thread) &&
2908             thread->base_pri != old_base_pri) {
2909                 needs_update = TRUE;
2910         }
2911
2912         thread_unlock(thread);
2913
2914         thread_policy_update_complete_unlocked(thread, &pend_token);
2915
2916         thread_lock(thread);
2917
2918         return needs_update;
2919 }
2920
2921 /*
2922  * Convert the thread user promotion base pri to qos for threads in qos world.
2923  * For priority above UI qos, the qos would be set to UI.
2924  */
2925 thread_qos_t
2926 thread_user_promotion_qos_for_pri(int priority)
2927 {
2928         thread_qos_t qos;
2929         for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
2930                 if (thread_qos_policy_params.qos_pri[qos] <= priority) {
2931                         return qos;
2932                 }
2933         }
2934         return THREAD_QOS_MAINTENANCE;
2935 }
2936
2937 /*
2938  * Set the thread's QoS Kevent override
2939  * Owned by the Kevent subsystem
2940  *
2941  * May be called with spinlocks held, but not spinlocks
2942  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
2943  *
2944  * One 'add' must be balanced by one 'drop'.
2945  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
2946  * Before the thread is deallocated, there must be 0 remaining overrides.
2947  */
2948 static void
2949 thread_kevent_override(thread_t    thread,
2950     uint32_t    qos_override,
2951     boolean_t   is_new_override)
2952 {
2953         struct task_pend_token pend_token = {};
2954         boolean_t needs_update;
2955
2956         spl_t s = splsched();
2957         thread_lock(thread);
2958
2959         uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
2960
2961         assert(qos_override > THREAD_QOS_UNSPECIFIED);
2962         assert(qos_override < THREAD_QOS_LAST);
2963
2964         if (is_new_override) {
2965                 if (thread->kevent_overrides++ == 0) {
2966                         /* This add is the first override for this thread */
2967                         assert(old_override == THREAD_QOS_UNSPECIFIED);
2968                 } else {
2969                         /* There are already other overrides in effect for this thread */
2970                         assert(old_override > THREAD_QOS_UNSPECIFIED);
2971                 }
2972         } else {
2973                 /* There must be at least one override (the previous add call) in effect */
2974                 assert(thread->kevent_overrides > 0);
2975                 assert(old_override > THREAD_QOS_UNSPECIFIED);
2976         }
2977
2978         /*
2979          * We can't allow lowering if there are several IPC overrides because
2980          * the caller can't possibly know the whole truth
2981          */
2982         if (thread->kevent_overrides == 1) {
2983                 needs_update = qos_override != old_override;
2984         } else {
2985                 needs_update = qos_override > old_override;
2986         }
2987
2988         if (needs_update) {
2989                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2990                     TASK_POLICY_QOS_KEVENT_OVERRIDE,
2991                     qos_override, 0, &pend_token);
2992                 assert(pend_token.tpt_update_sockets == 0);
2993         }
2994
2995         thread_unlock(thread);
2996         splx(s);
2997
2998         thread_policy_update_complete_unlocked(thread, &pend_token);
2999 }
3000
3001 void
3002 thread_add_kevent_override(thread_t thread, uint32_t qos_override)
3003 {
3004         thread_kevent_override(thread, qos_override, TRUE);
3005 }
3006
3007 void
3008 thread_update_kevent_override(thread_t thread, uint32_t qos_override)
3009 {
3010         thread_kevent_override(thread, qos_override, FALSE);
3011 }
3012
3013 void
3014 thread_drop_kevent_override(thread_t thread)
3015 {
3016         struct task_pend_token pend_token = {};
3017
3018         spl_t s = splsched();
3019         thread_lock(thread);
3020
3021         assert(thread->kevent_overrides > 0);
3022
3023         if (--thread->kevent_overrides == 0) {
3024                 /*
3025                  * There are no more overrides for this thread, so we should
3026                  * clear out the saturated override value
3027                  */
3028
3029                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3030                     TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3031                     0, &pend_token);
3032         }
3033
3034         thread_unlock(thread);
3035         splx(s);
3036
3037         thread_policy_update_complete_unlocked(thread, &pend_token);
3038 }
3039
3040 /*
3041  * Set the thread's QoS Workloop Servicer override
3042  * Owned by the Kevent subsystem
3043  *
3044  * May be called with spinlocks held, but not spinlocks
3045  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3046  *
3047  * One 'add' must be balanced by one 'drop'.
3048  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3049  * Before the thread is deallocated, there must be 0 remaining overrides.
3050  */
3051 static void
3052 thread_servicer_override(thread_t    thread,
3053     uint32_t    qos_override,
3054     boolean_t   is_new_override)
3055 {
3056         struct task_pend_token pend_token = {};
3057
3058         spl_t s = splsched();
3059         thread_lock(thread);
3060
3061         if (is_new_override) {
3062                 assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3063         } else {
3064                 assert(thread->requested_policy.thrp_qos_wlsvc_override);
3065         }
3066
3067         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3068             TASK_POLICY_QOS_SERVICER_OVERRIDE,
3069             qos_override, 0, &pend_token);
3070
3071         thread_unlock(thread);
3072         splx(s);
3073
3074         assert(pend_token.tpt_update_sockets == 0);
3075         thread_policy_update_complete_unlocked(thread, &pend_token);
3076 }
3077
3078 void
3079 thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3080 {
3081         assert(qos_override > THREAD_QOS_UNSPECIFIED);
3082         assert(qos_override < THREAD_QOS_LAST);
3083
3084         thread_servicer_override(thread, qos_override, TRUE);
3085 }
3086
3087 void
3088 thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3089 {
3090         assert(qos_override > THREAD_QOS_UNSPECIFIED);
3091         assert(qos_override < THREAD_QOS_LAST);
3092
3093         thread_servicer_override(thread, qos_override, FALSE);
3094 }
3095
3096 void
3097 thread_drop_servicer_override(thread_t thread)
3098 {
3099         thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3100 }
3101
3102
3103 /* Get current requested qos / relpri, may be called from spinlock context */
3104 thread_qos_t
3105 thread_get_requested_qos(thread_t thread, int *relpri)
3106 {
3107         int relprio_value = 0;
3108         thread_qos_t qos;
3109
3110         qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3111             TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3112         if (relpri) {
3113                 *relpri = -relprio_value;
3114         }
3115         return qos;
3116 }
3117
3118 /*
3119  * This function will promote the thread priority
3120  * since exec could block other threads calling
3121  * proc_find on the proc. This boost must be removed
3122  * via call to thread_clear_exec_promotion.
3123  *
3124  * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3125  */
3126 void
3127 thread_set_exec_promotion(thread_t thread)
3128 {
3129         spl_t s = splsched();
3130         thread_lock(thread);
3131
3132         sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3133
3134         thread_unlock(thread);
3135         splx(s);
3136 }
3137
3138 /*
3139  * This function will clear the exec thread
3140  * promotion set on the thread by thread_set_exec_promotion.
3141  */
3142 void
3143 thread_clear_exec_promotion(thread_t thread)
3144 {
3145         spl_t s = splsched();
3146         thread_lock(thread);
3147
3148         sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3149
3150         thread_unlock(thread);
3151         splx(s);
3152 }