osfmk/kern/thread_policy.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <mach/mach_types.h>
  30 #include <mach/thread_act_server.h>
  31
  32 #include <kern/kern_types.h>
  33 #include <kern/processor.h>
  34 #include <kern/thread.h>
  35 #include <kern/affinity.h>
  36 #include <mach/task_policy.h>
  37 #include <kern/sfi.h>
  38 #include <kern/policy_internal.h>
  39 #include <sys/errno.h>
  40 #include <sys/ulock.h>
  41
  42 #include <mach/machine/sdt.h>
  43
  44 #ifdef MACH_BSD
  45 extern int      proc_selfpid(void);
  46 extern char *   proc_name_address(void *p);
  47 extern void     rethrottle_thread(void * uthread);
  48 #endif /* MACH_BSD */
  49
  50 #define QOS_EXTRACT(q)        ((q) & 0xff)
  51
  52 uint32_t qos_override_mode;
  53 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
  54 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
  55 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
  56 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH 3
  57 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 4
  58
  59 extern zone_t thread_qos_override_zone;
  60
  61 static boolean_t
  62 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset, boolean_t squash);
  63
  64 /*
  65  * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
  66  * to threads that don't have a QoS class set.
  67  */
  68 const qos_policy_params_t thread_qos_policy_params = {
  69         /*
  70          * This table defines the starting base priority of the thread,
  71          * which will be modified by the thread importance and the task max priority
  72          * before being applied.
  73          */
  74         .qos_pri[THREAD_QOS_UNSPECIFIED]                = 0, /* not consulted */
  75         .qos_pri[THREAD_QOS_USER_INTERACTIVE]           = BASEPRI_BACKGROUND, /* i.e. 46 */
  76         .qos_pri[THREAD_QOS_USER_INITIATED]             = BASEPRI_USER_INITIATED,
  77         .qos_pri[THREAD_QOS_LEGACY]                     = BASEPRI_DEFAULT,
  78         .qos_pri[THREAD_QOS_UTILITY]                    = BASEPRI_UTILITY,
  79         .qos_pri[THREAD_QOS_BACKGROUND]                 = MAXPRI_THROTTLE,
  80         .qos_pri[THREAD_QOS_MAINTENANCE]                = MAXPRI_THROTTLE,
  81
  82         /*
  83          * This table defines the highest IO priority that a thread marked with this
  84          * QoS class can have.
  85          */
  86 #if CONFIG_EMBEDDED
  87         .qos_iotier[THREAD_QOS_UNSPECIFIED]             = THROTTLE_LEVEL_TIER0,
  88         .qos_iotier[THREAD_QOS_USER_INTERACTIVE]        = THROTTLE_LEVEL_TIER0,
  89         .qos_iotier[THREAD_QOS_USER_INITIATED]          = THROTTLE_LEVEL_TIER0,
  90         .qos_iotier[THREAD_QOS_LEGACY]                  = THROTTLE_LEVEL_TIER0,
  91         .qos_iotier[THREAD_QOS_UTILITY]                 = THROTTLE_LEVEL_TIER0,
  92         .qos_iotier[THREAD_QOS_BACKGROUND]              = THROTTLE_LEVEL_TIER3,
  93         .qos_iotier[THREAD_QOS_MAINTENANCE]             = THROTTLE_LEVEL_TIER3,
  94 #else
  95         .qos_iotier[THREAD_QOS_UNSPECIFIED]             = THROTTLE_LEVEL_TIER0,
  96         .qos_iotier[THREAD_QOS_USER_INTERACTIVE]        = THROTTLE_LEVEL_TIER0,
  97         .qos_iotier[THREAD_QOS_USER_INITIATED]          = THROTTLE_LEVEL_TIER0,
  98         .qos_iotier[THREAD_QOS_LEGACY]                  = THROTTLE_LEVEL_TIER0,
  99         .qos_iotier[THREAD_QOS_UTILITY]                 = THROTTLE_LEVEL_TIER1,
 100         .qos_iotier[THREAD_QOS_BACKGROUND]              = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
 101         .qos_iotier[THREAD_QOS_MAINTENANCE]             = THROTTLE_LEVEL_TIER3,
 102 #endif
 103
 104         /*
 105          * This table defines the highest QoS level that
 106          * a thread marked with this QoS class can have.
 107          */
 108
 109         .qos_through_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
 110         .qos_through_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
 111         .qos_through_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
 112         .qos_through_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
 113         .qos_through_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
 114         .qos_through_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
 115         .qos_through_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
 116
 117         .qos_latency_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
 118         .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(LATENCY_QOS_TIER_0),
 119         .qos_latency_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(LATENCY_QOS_TIER_1),
 120         .qos_latency_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(LATENCY_QOS_TIER_1),
 121         .qos_latency_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 122         .qos_latency_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 123         .qos_latency_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 124 };
 125
 126 static void
 127 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
 128
 129 static int
 130 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
 131
 132 static void
 133 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
 134
 135 static void
 136 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 137
 138 static void
 139 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 140
 141 static void
 142 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2);
 143
 144 static int
 145 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
 146
 147 static int
 148 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
 149
 150 static void
 151 thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
 152
 153 static void
 154 thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
 155
 156 void
 157 thread_policy_init(void) {
 158         if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
 159                 printf("QOS override mode: 0x%08x\n", qos_override_mode);
 160         } else {
 161                 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
 162         }
 163 }
 164
 165 boolean_t
 166 thread_has_qos_policy(thread_t thread) {
 167         return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
 168 }
 169
 170
 171 static void
 172 thread_remove_qos_policy_locked(thread_t thread,
 173                                 task_pend_token_t pend_token)
 174 {
 175
 176         __unused int prev_qos = thread->requested_policy.thrp_qos;
 177
 178         DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
 179
 180         proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
 181                                       THREAD_QOS_UNSPECIFIED, 0, pend_token);
 182 }
 183
 184 kern_return_t
 185 thread_remove_qos_policy(thread_t thread)
 186 {
 187         struct task_pend_token pend_token = {};
 188
 189         thread_mtx_lock(thread);
 190         if (!thread->active) {
 191                 thread_mtx_unlock(thread);
 192                 return KERN_TERMINATED;
 193         }
 194
 195         thread_remove_qos_policy_locked(thread, &pend_token);
 196
 197         thread_mtx_unlock(thread);
 198
 199         thread_policy_update_complete_unlocked(thread, &pend_token);
 200
 201         return KERN_SUCCESS;
 202 }
 203
 204
 205 boolean_t
 206 thread_is_static_param(thread_t thread)
 207 {
 208         if (thread->static_param) {
 209                 DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
 210                 return TRUE;
 211         }
 212         return FALSE;
 213 }
 214
 215 /*
 216  * Relative priorities can range between 0REL and -15REL. These
 217  * map to QoS-specific ranges, to create non-overlapping priority
 218  * ranges.
 219  */
 220 static int
 221 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
 222 {
 223         int next_lower_qos;
 224
 225         /* Fast path, since no validation or scaling is needed */
 226         if (qos_relprio == 0) return 0;
 227
 228         switch (qos) {
 229                 case THREAD_QOS_USER_INTERACTIVE:
 230                         next_lower_qos = THREAD_QOS_USER_INITIATED;
 231                         break;
 232                 case THREAD_QOS_USER_INITIATED:
 233                         next_lower_qos = THREAD_QOS_LEGACY;
 234                         break;
 235                 case THREAD_QOS_LEGACY:
 236                         next_lower_qos = THREAD_QOS_UTILITY;
 237                         break;
 238                 case THREAD_QOS_UTILITY:
 239                         next_lower_qos = THREAD_QOS_BACKGROUND;
 240                         break;
 241                 case THREAD_QOS_MAINTENANCE:
 242                 case THREAD_QOS_BACKGROUND:
 243                         next_lower_qos = 0;
 244                         break;
 245                 default:
 246                         panic("Unrecognized QoS %d", qos);
 247                         return 0;
 248         }
 249
 250         int prio_range_max = thread_qos_policy_params.qos_pri[qos];
 251         int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
 252
 253         /*
 254          * We now have the valid range that the scaled relative priority can map to. Note
 255          * that the lower bound is exclusive, but the upper bound is inclusive. If the
 256          * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
 257          * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
 258          * remainder.
 259          */
 260         int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
 261
 262         return scaled_relprio;
 263 }
 264
 265 /*
 266  * flag set by -qos-policy-allow boot-arg to allow
 267  * testing thread qos policy from userspace
 268  */
 269 boolean_t allow_qos_policy_set = FALSE;
 270
 271 kern_return_t
 272 thread_policy_set(
 273         thread_t                                thread,
 274         thread_policy_flavor_t  flavor,
 275         thread_policy_t                 policy_info,
 276         mach_msg_type_number_t  count)
 277 {
 278         thread_qos_policy_data_t req_qos;
 279         kern_return_t kr;
 280
 281         req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
 282
 283         if (thread == THREAD_NULL)
 284                 return (KERN_INVALID_ARGUMENT);
 285
 286         if (allow_qos_policy_set == FALSE) {
 287                 if (thread_is_static_param(thread))
 288                         return (KERN_POLICY_STATIC);
 289
 290                 if (flavor == THREAD_QOS_POLICY)
 291                         return (KERN_INVALID_ARGUMENT);
 292         }
 293
 294         /* Threads without static_param set reset their QoS when other policies are applied. */
 295         if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
 296                 /* Store the existing tier, if we fail this call it is used to reset back. */
 297                 req_qos.qos_tier = thread->requested_policy.thrp_qos;
 298                 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
 299
 300                 kr = thread_remove_qos_policy(thread);
 301                 if (kr != KERN_SUCCESS) {
 302                         return kr;
 303                 }
 304         }
 305
 306         kr = thread_policy_set_internal(thread, flavor, policy_info, count);
 307
 308         /* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */
 309         if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
 310                 if (kr != KERN_SUCCESS) {
 311                         /* Reset back to our original tier as the set failed. */
 312                         (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
 313                 }
 314         }
 315
 316         return kr;
 317 }
 318
 319 kern_return_t
 320 thread_policy_set_internal(
 321                            thread_t                     thread,
 322                            thread_policy_flavor_t       flavor,
 323                            thread_policy_t              policy_info,
 324                            mach_msg_type_number_t       count)
 325 {
 326         kern_return_t result = KERN_SUCCESS;
 327         struct task_pend_token pend_token = {};
 328
 329         thread_mtx_lock(thread);
 330         if (!thread->active) {
 331                 thread_mtx_unlock(thread);
 332
 333                 return (KERN_TERMINATED);
 334         }
 335
 336         switch (flavor) {
 337
 338         case THREAD_EXTENDED_POLICY:
 339         {
 340                 boolean_t timeshare = TRUE;
 341
 342                 if (count >= THREAD_EXTENDED_POLICY_COUNT) {
 343                         thread_extended_policy_t info;
 344
 345                         info = (thread_extended_policy_t)policy_info;
 346                         timeshare = info->timeshare;
 347                 }
 348
 349                 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
 350
 351                 spl_t s = splsched();
 352                 thread_lock(thread);
 353
 354                 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
 355
 356                 thread_unlock(thread);
 357                 splx(s);
 358
 359                 pend_token.tpt_update_thread_sfi = 1;
 360
 361                 break;
 362         }
 363
 364         case THREAD_TIME_CONSTRAINT_POLICY:
 365         {
 366                 thread_time_constraint_policy_t info;
 367
 368                 if (count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
 369                         result = KERN_INVALID_ARGUMENT;
 370                         break;
 371                 }
 372
 373                 info = (thread_time_constraint_policy_t)policy_info;
 374                 if (info->constraint  < info->computation   ||
 375                     info->computation > max_rt_quantum      ||
 376                     info->computation < min_rt_quantum      ) {
 377                         result = KERN_INVALID_ARGUMENT;
 378                         break;
 379                 }
 380
 381                 spl_t s = splsched();
 382                 thread_lock(thread);
 383
 384                 thread->realtime.period         = info->period;
 385                 thread->realtime.computation    = info->computation;
 386                 thread->realtime.constraint     = info->constraint;
 387                 thread->realtime.preemptible    = info->preemptible;
 388
 389                 thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
 390
 391                 thread_unlock(thread);
 392                 splx(s);
 393
 394                 pend_token.tpt_update_thread_sfi = 1;
 395
 396                 break;
 397         }
 398
 399         case THREAD_PRECEDENCE_POLICY:
 400         {
 401                 thread_precedence_policy_t info;
 402
 403                 if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
 404                         result = KERN_INVALID_ARGUMENT;
 405                         break;
 406                 }
 407                 info = (thread_precedence_policy_t)policy_info;
 408
 409                 spl_t s = splsched();
 410                 thread_lock(thread);
 411
 412                 thread->importance = info->importance;
 413
 414                 thread_recompute_priority(thread);
 415
 416                 thread_unlock(thread);
 417                 splx(s);
 418
 419                 break;
 420         }
 421
 422         case THREAD_AFFINITY_POLICY:
 423         {
 424                 thread_affinity_policy_t info;
 425
 426                 if (!thread_affinity_is_supported()) {
 427                         result = KERN_NOT_SUPPORTED;
 428                         break;
 429                 }
 430                 if (count < THREAD_AFFINITY_POLICY_COUNT) {
 431                         result = KERN_INVALID_ARGUMENT;
 432                         break;
 433                 }
 434
 435                 info = (thread_affinity_policy_t) policy_info;
 436                 /*
 437                  * Unlock the thread mutex here and
 438                  * return directly after calling thread_affinity_set().
 439                  * This is necessary for correct lock ordering because
 440                  * thread_affinity_set() takes the task lock.
 441                  */
 442                 thread_mtx_unlock(thread);
 443                 return thread_affinity_set(thread, info->affinity_tag);
 444         }
 445
 446 #if CONFIG_EMBEDDED
 447         case THREAD_BACKGROUND_POLICY:
 448         {
 449                 thread_background_policy_t info;
 450
 451                 if (count < THREAD_BACKGROUND_POLICY_COUNT) {
 452                         result = KERN_INVALID_ARGUMENT;
 453                         break;
 454                 }
 455
 456                 if (thread->task != current_task()) {
 457                         result = KERN_PROTECTION_FAILURE;
 458                         break;
 459                 }
 460
 461                 info = (thread_background_policy_t) policy_info;
 462
 463                 int enable;
 464
 465                 if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG)
 466                         enable = TASK_POLICY_ENABLE;
 467                 else
 468                         enable = TASK_POLICY_DISABLE;
 469
 470                 int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
 471
 472                 proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
 473
 474                 break;
 475         }
 476 #endif /* CONFIG_EMBEDDED */
 477
 478         case THREAD_THROUGHPUT_QOS_POLICY:
 479         {
 480                 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
 481                 thread_throughput_qos_t tqos;
 482
 483                 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
 484                         result = KERN_INVALID_ARGUMENT;
 485                         break;
 486                 }
 487
 488                 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS)
 489                         break;
 490
 491                 tqos = qos_extract(info->thread_throughput_qos_tier);
 492
 493                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
 494                                               TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
 495
 496                 break;
 497         }
 498
 499         case THREAD_LATENCY_QOS_POLICY:
 500         {
 501                 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
 502                 thread_latency_qos_t lqos;
 503
 504                 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
 505                         result = KERN_INVALID_ARGUMENT;
 506                         break;
 507                 }
 508
 509                 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS)
 510                         break;
 511
 512                 lqos = qos_extract(info->thread_latency_qos_tier);
 513
 514                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
 515                                               TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
 516
 517                 break;
 518         }
 519
 520         case THREAD_QOS_POLICY:
 521         {
 522                 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
 523
 524                 if (count < THREAD_QOS_POLICY_COUNT) {
 525                         result = KERN_INVALID_ARGUMENT;
 526                         break;
 527                 }
 528
 529                 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
 530                         result = KERN_INVALID_ARGUMENT;
 531                         break;
 532                 }
 533
 534                 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
 535                         result = KERN_INVALID_ARGUMENT;
 536                         break;
 537                 }
 538
 539                 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
 540                         result = KERN_INVALID_ARGUMENT;
 541                         break;
 542                 }
 543
 544                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
 545                                               info->qos_tier, -info->tier_importance, &pend_token);
 546
 547                 break;
 548         }
 549
 550         default:
 551                 result = KERN_INVALID_ARGUMENT;
 552                 break;
 553         }
 554
 555         thread_mtx_unlock(thread);
 556
 557         thread_policy_update_complete_unlocked(thread, &pend_token);
 558
 559         return (result);
 560 }
 561
 562 /*
 563  * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
 564  * Both result in FIXED mode scheduling.
 565  */
 566 static sched_mode_t
 567 convert_policy_to_sched_mode(integer_t policy) {
 568         switch (policy) {
 569                 case POLICY_TIMESHARE:
 570                         return TH_MODE_TIMESHARE;
 571                 case POLICY_RR:
 572                 case POLICY_FIFO:
 573                         return TH_MODE_FIXED;
 574                 default:
 575                         panic("unexpected sched policy: %d", policy);
 576                         return TH_MODE_NONE;
 577         }
 578 }
 579
 580 /*
 581  * Called either with the thread mutex locked
 582  * or from the pthread kext in a 'safe place'.
 583  */
 584 static kern_return_t
 585 thread_set_mode_and_absolute_pri_internal(thread_t              thread,
 586                                           sched_mode_t          mode,
 587                                           integer_t             priority,
 588                                           task_pend_token_t     pend_token)
 589 {
 590         kern_return_t kr = KERN_SUCCESS;
 591
 592         spl_t s = splsched();
 593         thread_lock(thread);
 594
 595         /* This path isn't allowed to change a thread out of realtime. */
 596         if ((thread->sched_mode == TH_MODE_REALTIME) ||
 597             (thread->saved_mode == TH_MODE_REALTIME)) {
 598                 kr = KERN_FAILURE;
 599                 goto unlock;
 600         }
 601
 602         if (thread->policy_reset) {
 603                 kr = KERN_SUCCESS;
 604                 goto unlock;
 605         }
 606
 607         sched_mode_t old_mode = thread->sched_mode;
 608
 609         /*
 610          * Reverse engineer and apply the correct importance value
 611          * from the requested absolute priority value.
 612          *
 613          * TODO: Store the absolute priority value instead
 614          */
 615
 616         if (priority >= thread->max_priority)
 617                 priority = thread->max_priority - thread->task_priority;
 618         else if (priority >= MINPRI_KERNEL)
 619                 priority -=  MINPRI_KERNEL;
 620         else if (priority >= MINPRI_RESERVED)
 621                 priority -=  MINPRI_RESERVED;
 622         else
 623                 priority -= BASEPRI_DEFAULT;
 624
 625         priority += thread->task_priority;
 626
 627         if (priority > thread->max_priority)
 628                 priority = thread->max_priority;
 629         else if (priority < MINPRI)
 630                 priority = MINPRI;
 631
 632         thread->importance = priority - thread->task_priority;
 633
 634         thread_set_user_sched_mode_and_recompute_pri(thread, mode);
 635
 636         if (mode != old_mode)
 637                 pend_token->tpt_update_thread_sfi = 1;
 638
 639 unlock:
 640         thread_unlock(thread);
 641         splx(s);
 642
 643         return kr;
 644 }
 645
 646 /*
 647  * KPI for pthread kext
 648  *
 649  * Set scheduling policy & absolute priority for thread
 650  * May be called from waitqueue callout context with spinlocks held
 651  * Thread mutex lock is not held
 652  */
 653 kern_return_t
 654 thread_set_workq_pri(thread_t  thread,
 655                      integer_t priority,
 656                      integer_t policy)
 657 {
 658         struct task_pend_token pend_token = {};
 659         sched_mode_t mode = convert_policy_to_sched_mode(policy);
 660
 661         assert(thread->static_param);
 662         if (!thread->static_param)
 663                 return KERN_FAILURE;
 664
 665         /* Concern: this doesn't hold the mutex... */
 666         if (!thread->active)
 667                 return KERN_TERMINATED;
 668
 669         kern_return_t kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
 670
 671         if (pend_token.tpt_update_thread_sfi)
 672                 sfi_reevaluate(thread);
 673
 674         return kr;
 675 }
 676
 677 /*
 678  * thread_set_mode_and_absolute_pri:
 679  *
 680  * Set scheduling policy & absolute priority for thread, for deprecated
 681  * thread_set_policy and thread_policy interfaces.
 682  *
 683  * Called with nothing locked.
 684  */
 685 kern_return_t
 686 thread_set_mode_and_absolute_pri(thread_t   thread,
 687                                  integer_t  policy,
 688                                  integer_t  priority)
 689 {
 690         kern_return_t kr = KERN_SUCCESS;
 691         struct task_pend_token pend_token = {};
 692
 693         sched_mode_t mode = convert_policy_to_sched_mode(policy);
 694
 695         thread_mtx_lock(thread);
 696
 697         if (!thread->active) {
 698                 kr = KERN_TERMINATED;
 699                 goto unlock;
 700         }
 701
 702         if (thread_is_static_param(thread)) {
 703                 kr = KERN_POLICY_STATIC;
 704                 goto unlock;
 705         }
 706
 707         /* Setting legacy policies on threads kills the current QoS */
 708         if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED)
 709                 thread_remove_qos_policy_locked(thread, &pend_token);
 710
 711         kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
 712
 713 unlock:
 714         thread_mtx_unlock(thread);
 715
 716         thread_policy_update_complete_unlocked(thread, &pend_token);
 717
 718         return (kr);
 719 }
 720
 721 /*
 722  * Set the thread's requested mode and recompute priority
 723  * Called with thread mutex and thread locked
 724  *
 725  * TODO: Mitigate potential problems caused by moving thread to end of runq
 726  * whenever its priority is recomputed
 727  *      Only remove when it actually changes? Attempt to re-insert at appropriate location?
 728  */
 729 static void
 730 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
 731 {
 732         if (thread->policy_reset)
 733                 return;
 734
 735         boolean_t removed = thread_run_queue_remove(thread);
 736
 737         /*
 738          * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
 739          * That way there's zero confusion over which the user wants
 740          * and which the kernel wants.
 741          */
 742         if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK)
 743                 thread->saved_mode = mode;
 744         else
 745                 sched_set_thread_mode(thread, mode);
 746
 747         thread_recompute_priority(thread);
 748
 749         if (removed)
 750                 thread_run_queue_reinsert(thread, SCHED_TAILQ);
 751 }
 752
 753 /* called at splsched with thread lock locked */
 754 static void
 755 thread_update_qos_cpu_time_locked(thread_t thread)
 756 {
 757         task_t task = thread->task;
 758         uint64_t timer_sum, timer_delta;
 759
 760         /*
 761          * This is only as accurate as the distance between
 762          * last context switch (embedded) or last user/kernel boundary transition (desktop)
 763          * because user_timer and system_timer are only updated then.
 764          *
 765          * TODO: Consider running a thread_timer_event operation here to update it first.
 766          *       Maybe doable with interrupts disabled from current thread.
 767          *       If the thread is on a different core, may not be easy to get right.
 768          *
 769          * TODO: There should be a function for this in timer.c
 770          */
 771
 772         timer_sum = timer_grab(&thread->user_timer);
 773         timer_sum += timer_grab(&thread->system_timer);
 774         timer_delta = timer_sum - thread->vtimer_qos_save;
 775
 776         thread->vtimer_qos_save = timer_sum;
 777
 778         uint64_t* task_counter = NULL;
 779
 780         /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
 781         switch (thread->effective_policy.thep_qos) {
 782                 case THREAD_QOS_DEFAULT:            task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default;          break;
 783                 case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance;      break;
 784                 case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background;       break;
 785                 case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility;          break;
 786                 case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy;           break;
 787                 case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;   break;
 788                 case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
 789                 default:
 790                         panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
 791         }
 792
 793         OSAddAtomic64(timer_delta, task_counter);
 794
 795         /* Update the task-level qos stats atomically, because we don't have the task lock. */
 796         switch (thread->requested_policy.thrp_qos) {
 797                 case THREAD_QOS_DEFAULT:            task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default;          break;
 798                 case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance;      break;
 799                 case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background;       break;
 800                 case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility;          break;
 801                 case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy;           break;
 802                 case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;   break;
 803                 case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
 804                 default:
 805                         panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
 806         }
 807
 808         OSAddAtomic64(timer_delta, task_counter);
 809 }
 810
 811 /*
 812  * called with no thread locks held
 813  * may hold task lock
 814  */
 815 void
 816 thread_update_qos_cpu_time(thread_t thread)
 817 {
 818         thread_mtx_lock(thread);
 819
 820         spl_t s = splsched();
 821         thread_lock(thread);
 822
 823         thread_update_qos_cpu_time_locked(thread);
 824
 825         thread_unlock(thread);
 826         splx(s);
 827
 828         thread_mtx_unlock(thread);
 829 }
 830
 831 /*
 832  * Calculate base priority from thread attributes, and set it on the thread
 833  *
 834  * Called with thread_lock and thread mutex held.
 835  */
 836 void
 837 thread_recompute_priority(
 838         thread_t                thread)
 839 {
 840         integer_t               priority;
 841
 842         if (thread->policy_reset)
 843                 return;
 844
 845         if (thread->sched_mode == TH_MODE_REALTIME) {
 846                 sched_set_thread_base_priority(thread, BASEPRI_RTQUEUES);
 847                 return;
 848         } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
 849                 int qos = thread->effective_policy.thep_qos;
 850                 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
 851                 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
 852                 int qos_scaled_relprio;
 853
 854                 assert(qos >= 0 && qos < THREAD_QOS_LAST);
 855                 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
 856
 857                 priority = thread_qos_policy_params.qos_pri[qos];
 858                 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
 859
 860                 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
 861                         /* Bump priority 46 to 47 when in a frontmost app */
 862                         qos_scaled_relprio += 1;
 863                 }
 864
 865                 /* TODO: factor in renice priority here? */
 866
 867                 priority += qos_scaled_relprio;
 868         } else {
 869                 if (thread->importance > MAXPRI)
 870                         priority = MAXPRI;
 871                 else if (thread->importance < -MAXPRI)
 872                         priority = -MAXPRI;
 873                 else
 874                         priority = thread->importance;
 875
 876                 priority += thread->task_priority;
 877         }
 878
 879         priority = MAX(priority, thread->user_promotion_basepri);
 880
 881         /*
 882          * Clamp priority back into the allowed range for this task.
 883          *  The initial priority value could be out of this range due to:
 884          *      Task clamped to BG or Utility (max-pri is 4, or 20)
 885          *      Task is user task (max-pri is 63)
 886          *      Task is kernel task (max-pri is 95)
 887          * Note that thread->importance is user-settable to any integer
 888          * via THREAD_PRECEDENCE_POLICY.
 889          */
 890         if (priority > thread->max_priority)
 891                 priority = thread->max_priority;
 892         else if (priority < MINPRI)
 893                 priority = MINPRI;
 894
 895         if (thread->saved_mode == TH_MODE_REALTIME &&
 896             thread->sched_flags & TH_SFLAG_FAILSAFE)
 897                 priority = DEPRESSPRI;
 898
 899         if (thread->effective_policy.thep_terminated == TRUE) {
 900                 /*
 901                  * We temporarily want to override the expected priority to
 902                  * ensure that the thread exits in a timely manner.
 903                  * Note that this is allowed to exceed thread->max_priority
 904                  * so that the thread is no longer clamped to background
 905                  * during the final exit phase.
 906                  */
 907                 if (priority < thread->task_priority)
 908                         priority = thread->task_priority;
 909                 if (priority < BASEPRI_DEFAULT)
 910                         priority = BASEPRI_DEFAULT;
 911         }
 912
 913 #if CONFIG_EMBEDDED
 914         /* No one can have a base priority less than MAXPRI_THROTTLE */
 915         if (priority < MAXPRI_THROTTLE)
 916                 priority = MAXPRI_THROTTLE;
 917 #endif /* CONFIG_EMBEDDED */
 918
 919         sched_set_thread_base_priority(thread, priority);
 920 }
 921
 922 /* Called with the task lock held, but not the thread mutex or spinlock */
 923 void
 924 thread_policy_update_tasklocked(
 925                                 thread_t           thread,
 926                                 integer_t          priority,
 927                                 integer_t          max_priority,
 928                                 task_pend_token_t  pend_token)
 929 {
 930         thread_mtx_lock(thread);
 931
 932         if (!thread->active || thread->policy_reset) {
 933                 thread_mtx_unlock(thread);
 934                 return;
 935         }
 936
 937         spl_t s = splsched();
 938         thread_lock(thread);
 939
 940         __unused
 941         integer_t old_max_priority = thread->max_priority;
 942
 943         thread->task_priority = priority;
 944         thread->max_priority = max_priority;
 945
 946 #if CONFIG_EMBEDDED
 947         /*
 948          * When backgrounding a thread, iOS has the semantic that
 949          * realtime and fixed priority threads should be demoted
 950          * to timeshare background threads.
 951          *
 952          * On OSX, realtime and fixed priority threads don't lose their mode.
 953          *
 954          * TODO: Do this inside the thread policy update routine in order to avoid double
 955          * remove/reinsert for a runnable thread
 956          */
 957         if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
 958                 sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
 959         } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
 960                 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
 961         }
 962 #endif /* CONFIG_EMBEDDED */
 963
 964         thread_policy_update_spinlocked(thread, TRUE, pend_token);
 965
 966         thread_unlock(thread);
 967         splx(s);
 968
 969         thread_mtx_unlock(thread);
 970 }
 971
 972 /*
 973  * Reset thread to default state in preparation for termination
 974  * Called with thread mutex locked
 975  *
 976  * Always called on current thread, so we don't need a run queue remove
 977  */
 978 void
 979 thread_policy_reset(
 980         thread_t                thread)
 981 {
 982         spl_t           s;
 983
 984         assert(thread == current_thread());
 985
 986         s = splsched();
 987         thread_lock(thread);
 988
 989         if (thread->sched_flags & TH_SFLAG_FAILSAFE)
 990                 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
 991
 992         if (thread->sched_flags & TH_SFLAG_THROTTLED)
 993                 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
 994
 995         /* At this point, the various demotions should be inactive */
 996         assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
 997         assert(!(thread->sched_flags & TH_SFLAG_THROTTLED));
 998         assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
 999
1000         /* Reset thread back to task-default basepri and mode  */
1001         sched_mode_t newmode = SCHED(initial_thread_sched_mode)(thread->task);
1002
1003         sched_set_thread_mode(thread, newmode);
1004
1005         thread->importance = 0;
1006
1007         /* Prevent further changes to thread base priority or mode */
1008         thread->policy_reset = 1;
1009
1010         sched_set_thread_base_priority(thread, thread->task_priority);
1011
1012         thread_unlock(thread);
1013         splx(s);
1014 }
1015
1016 kern_return_t
1017 thread_policy_get(
1018         thread_t                                thread,
1019         thread_policy_flavor_t  flavor,
1020         thread_policy_t                 policy_info,
1021         mach_msg_type_number_t  *count,
1022         boolean_t                               *get_default)
1023 {
1024         kern_return_t                   result = KERN_SUCCESS;
1025
1026         if (thread == THREAD_NULL)
1027                 return (KERN_INVALID_ARGUMENT);
1028
1029         thread_mtx_lock(thread);
1030         if (!thread->active) {
1031                 thread_mtx_unlock(thread);
1032
1033                 return (KERN_TERMINATED);
1034         }
1035
1036         switch (flavor) {
1037
1038         case THREAD_EXTENDED_POLICY:
1039         {
1040                 boolean_t               timeshare = TRUE;
1041
1042                 if (!(*get_default)) {
1043                         spl_t s = splsched();
1044                         thread_lock(thread);
1045
1046                         if (     (thread->sched_mode != TH_MODE_REALTIME)       &&
1047                                          (thread->saved_mode != TH_MODE_REALTIME)                       ) {
1048                                 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK))
1049                                         timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1050                                 else
1051                                         timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1052                         }
1053                         else
1054                                 *get_default = TRUE;
1055
1056                         thread_unlock(thread);
1057                         splx(s);
1058                 }
1059
1060                 if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1061                         thread_extended_policy_t        info;
1062
1063                         info = (thread_extended_policy_t)policy_info;
1064                         info->timeshare = timeshare;
1065                 }
1066
1067                 break;
1068         }
1069
1070         case THREAD_TIME_CONSTRAINT_POLICY:
1071         {
1072                 thread_time_constraint_policy_t         info;
1073
1074                 if (*count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
1075                         result = KERN_INVALID_ARGUMENT;
1076                         break;
1077                 }
1078
1079                 info = (thread_time_constraint_policy_t)policy_info;
1080
1081                 if (!(*get_default)) {
1082                         spl_t s = splsched();
1083                         thread_lock(thread);
1084
1085                         if (    (thread->sched_mode == TH_MODE_REALTIME)        ||
1086                                         (thread->saved_mode == TH_MODE_REALTIME)                ) {
1087                                 info->period = thread->realtime.period;
1088                                 info->computation = thread->realtime.computation;
1089                                 info->constraint = thread->realtime.constraint;
1090                                 info->preemptible = thread->realtime.preemptible;
1091                         }
1092                         else
1093                                 *get_default = TRUE;
1094
1095                         thread_unlock(thread);
1096                         splx(s);
1097                 }
1098
1099                 if (*get_default) {
1100                         info->period = 0;
1101                         info->computation = default_timeshare_computation;
1102                         info->constraint = default_timeshare_constraint;
1103                         info->preemptible = TRUE;
1104                 }
1105
1106                 break;
1107         }
1108
1109         case THREAD_PRECEDENCE_POLICY:
1110         {
1111                 thread_precedence_policy_t              info;
1112
1113                 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1114                         result = KERN_INVALID_ARGUMENT;
1115                         break;
1116                 }
1117
1118                 info = (thread_precedence_policy_t)policy_info;
1119
1120                 if (!(*get_default)) {
1121                         spl_t s = splsched();
1122                         thread_lock(thread);
1123
1124                         info->importance = thread->importance;
1125
1126                         thread_unlock(thread);
1127                         splx(s);
1128                 }
1129                 else
1130                         info->importance = 0;
1131
1132                 break;
1133         }
1134
1135         case THREAD_AFFINITY_POLICY:
1136         {
1137                 thread_affinity_policy_t                info;
1138
1139                 if (!thread_affinity_is_supported()) {
1140                         result = KERN_NOT_SUPPORTED;
1141                         break;
1142                 }
1143                 if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1144                         result = KERN_INVALID_ARGUMENT;
1145                         break;
1146                 }
1147
1148                 info = (thread_affinity_policy_t)policy_info;
1149
1150                 if (!(*get_default))
1151                         info->affinity_tag = thread_affinity_get(thread);
1152                 else
1153                         info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1154
1155                 break;
1156         }
1157
1158         case THREAD_POLICY_STATE:
1159         {
1160                 thread_policy_state_t           info;
1161
1162                 if (*count < THREAD_POLICY_STATE_COUNT) {
1163                         result = KERN_INVALID_ARGUMENT;
1164                         break;
1165                 }
1166
1167                 /* Only root can get this info */
1168                 if (current_task()->sec_token.val[0] != 0) {
1169                         result = KERN_PROTECTION_FAILURE;
1170                         break;
1171                 }
1172
1173                 info = (thread_policy_state_t)(void*)policy_info;
1174
1175                 if (!(*get_default)) {
1176                         info->flags = 0;
1177
1178                         spl_t s = splsched();
1179                         thread_lock(thread);
1180
1181                         info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1182
1183                         info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1184                         info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1185
1186                         info->thps_user_promotions          = thread->user_promotions;
1187                         info->thps_user_promotion_basepri   = thread->user_promotion_basepri;
1188                         info->thps_ipc_overrides            = thread->ipc_overrides;
1189
1190                         proc_get_thread_policy_bitfield(thread, info);
1191
1192                         thread_unlock(thread);
1193                         splx(s);
1194                 } else {
1195                         info->requested = 0;
1196                         info->effective = 0;
1197                         info->pending = 0;
1198                 }
1199
1200                 break;
1201         }
1202
1203         case THREAD_LATENCY_QOS_POLICY:
1204         {
1205                 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1206                 thread_latency_qos_t plqos;
1207
1208                 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1209                         result = KERN_INVALID_ARGUMENT;
1210                         break;
1211                 }
1212
1213                 if (*get_default) {
1214                         plqos = 0;
1215                 } else {
1216                         plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1217                 }
1218
1219                 info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1220         }
1221         break;
1222
1223         case THREAD_THROUGHPUT_QOS_POLICY:
1224         {
1225                 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1226                 thread_throughput_qos_t ptqos;
1227
1228                 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1229                         result = KERN_INVALID_ARGUMENT;
1230                         break;
1231                 }
1232
1233                 if (*get_default) {
1234                         ptqos = 0;
1235                 } else {
1236                         ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1237                 }
1238
1239                 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1240         }
1241         break;
1242
1243         case THREAD_QOS_POLICY:
1244         {
1245                 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1246
1247                 if (*count < THREAD_QOS_POLICY_COUNT) {
1248                         result = KERN_INVALID_ARGUMENT;
1249                         break;
1250                 }
1251
1252                 if (!(*get_default)) {
1253                         int relprio_value = 0;
1254                         info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1255                                                                        TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1256
1257                         info->tier_importance = -relprio_value;
1258                 } else {
1259                         info->qos_tier = THREAD_QOS_UNSPECIFIED;
1260                         info->tier_importance = 0;
1261                 }
1262
1263                 break;
1264         }
1265
1266         default:
1267                 result = KERN_INVALID_ARGUMENT;
1268                 break;
1269         }
1270
1271         thread_mtx_unlock(thread);
1272
1273         return (result);
1274 }
1275
1276 void
1277 thread_policy_create(thread_t thread)
1278 {
1279         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1280                                   (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1281                                   thread_tid(thread), theffective_0(thread),
1282                                   theffective_1(thread), thread->base_pri, 0);
1283
1284         /* We pass a pend token but ignore it */
1285         struct task_pend_token pend_token = {};
1286
1287         thread_policy_update_internal_spinlocked(thread, TRUE, &pend_token);
1288
1289         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1290                                   (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1291                                   thread_tid(thread), theffective_0(thread),
1292                                   theffective_1(thread), thread->base_pri, 0);
1293 }
1294
1295 static void
1296 thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token)
1297 {
1298         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1299                                   (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1300                                   thread_tid(thread), theffective_0(thread),
1301                                   theffective_1(thread), thread->base_pri, 0);
1302
1303         thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1304
1305         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1306                                   (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1307                                   thread_tid(thread), theffective_0(thread),
1308                                   theffective_1(thread), thread->base_pri, 0);
1309 }
1310
1311
1312
1313 /*
1314  * One thread state update function TO RULE THEM ALL
1315  *
1316  * This function updates the thread effective policy fields
1317  * and pushes the results to the relevant subsystems.
1318  *
1319  * Returns TRUE if a pended action needs to be run.
1320  *
1321  * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1322  */
1323 static void
1324 thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority,
1325                                          task_pend_token_t pend_token)
1326 {
1327         /*
1328          * Step 1:
1329          *  Gather requested policy and effective task state
1330          */
1331
1332         struct thread_requested_policy requested = thread->requested_policy;
1333         struct task_effective_policy task_effective = thread->task->effective_policy;
1334
1335         /*
1336          * Step 2:
1337          *  Calculate new effective policies from requested policy, task and thread state
1338          *  Rules:
1339          *      Don't change requested, it won't take effect
1340          */
1341
1342         struct thread_effective_policy next = {};
1343
1344         next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1345
1346         uint32_t next_qos = requested.thrp_qos;
1347
1348         if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1349                 if (requested.thrp_qos_override != THREAD_QOS_UNSPECIFIED)
1350                         next_qos = MAX(requested.thrp_qos_override, next_qos);
1351
1352                 if (requested.thrp_qos_promote != THREAD_QOS_UNSPECIFIED)
1353                         next_qos = MAX(requested.thrp_qos_promote, next_qos);
1354
1355                 if (requested.thrp_qos_ipc_override != THREAD_QOS_UNSPECIFIED)
1356                         next_qos = MAX(requested.thrp_qos_ipc_override, next_qos);
1357         }
1358
1359         next.thep_qos = next_qos;
1360
1361         /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1362         if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1363                 if (next.thep_qos != THREAD_QOS_UNSPECIFIED)
1364                         next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1365                 else
1366                         next.thep_qos = task_effective.tep_qos_clamp;
1367         }
1368
1369         /*
1370          * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1371          * This allows QoS promotions to work properly even after the process is unclamped.
1372          */
1373         next.thep_qos_promote = next.thep_qos;
1374
1375         /* The ceiling only applies to threads that are in the QoS world */
1376         if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1377             next.thep_qos                  != THREAD_QOS_UNSPECIFIED) {
1378                 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1379         }
1380
1381         /* Apply the sync ipc qos override */
1382         if (requested.thrp_qos_sync_ipc_override != THREAD_QOS_UNSPECIFIED)
1383                 next.thep_qos = MAX(requested.thrp_qos_sync_ipc_override, next.thep_qos);
1384
1385         /*
1386          * The QoS relative priority is only applicable when the original programmer's
1387          * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1388          * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1389          * since otherwise it would be lower than unclamped threads. Similarly, in the
1390          * presence of boosting, the programmer doesn't know what other actors
1391          * are boosting the thread.
1392          */
1393         if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1394             (requested.thrp_qos == next.thep_qos) &&
1395             (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1396                 next.thep_qos_relprio = requested.thrp_qos_relprio;
1397         } else {
1398                 next.thep_qos_relprio = 0;
1399         }
1400
1401         /* Calculate DARWIN_BG */
1402         boolean_t wants_darwinbg        = FALSE;
1403         boolean_t wants_all_sockets_bg  = FALSE; /* Do I want my existing sockets to be bg */
1404
1405         /*
1406          * If DARWIN_BG has been requested at either level, it's engaged.
1407          * darwinbg threads always create bg sockets,
1408          * but only some types of darwinbg change the sockets
1409          * after they're created
1410          */
1411         if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg)
1412                 wants_all_sockets_bg = wants_darwinbg = TRUE;
1413
1414         if (requested.thrp_pidbind_bg)
1415                 wants_all_sockets_bg = wants_darwinbg = TRUE;
1416
1417         if (task_effective.tep_darwinbg)
1418                 wants_darwinbg = TRUE;
1419
1420         if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1421             next.thep_qos == THREAD_QOS_MAINTENANCE)
1422                 wants_darwinbg = TRUE;
1423
1424         /* Calculate side effects of DARWIN_BG */
1425
1426         if (wants_darwinbg)
1427                 next.thep_darwinbg = 1;
1428
1429         if (next.thep_darwinbg || task_effective.tep_new_sockets_bg)
1430                 next.thep_new_sockets_bg = 1;
1431
1432         /* Don't use task_effective.tep_all_sockets_bg here */
1433         if (wants_all_sockets_bg)
1434                 next.thep_all_sockets_bg = 1;
1435
1436         /* darwinbg implies background QOS (or lower) */
1437         if (next.thep_darwinbg &&
1438             (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1439                 next.thep_qos = THREAD_QOS_BACKGROUND;
1440                 next.thep_qos_relprio = 0;
1441         }
1442
1443         /* Calculate IO policy */
1444
1445         int iopol = THROTTLE_LEVEL_TIER0;
1446
1447         /* Factor in the task's IO policy */
1448         if (next.thep_darwinbg)
1449                 iopol = MAX(iopol, task_effective.tep_bg_iotier);
1450
1451         iopol = MAX(iopol, task_effective.tep_io_tier);
1452
1453         /* Look up the associated IO tier value for the QoS class */
1454         iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1455
1456         iopol = MAX(iopol, requested.thrp_int_iotier);
1457         iopol = MAX(iopol, requested.thrp_ext_iotier);
1458
1459         next.thep_io_tier = iopol;
1460
1461         /*
1462          * If a QoS override is causing IO to go into a lower tier, we also set
1463          * the passive bit so that a thread doesn't end up stuck in its own throttle
1464          * window when the override goes away.
1465          */
1466         boolean_t qos_io_override_active = FALSE;
1467         if (thread_qos_policy_params.qos_iotier[next.thep_qos] <
1468             thread_qos_policy_params.qos_iotier[requested.thrp_qos])
1469                 qos_io_override_active = TRUE;
1470
1471         /* Calculate Passive IO policy */
1472         if (requested.thrp_ext_iopassive    ||
1473             requested.thrp_int_iopassive    ||
1474             qos_io_override_active          ||
1475             task_effective.tep_io_passive   )
1476                 next.thep_io_passive = 1;
1477
1478         /* Calculate timer QOS */
1479         uint32_t latency_qos = requested.thrp_latency_qos;
1480
1481         latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1482         latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1483
1484         next.thep_latency_qos = latency_qos;
1485
1486         /* Calculate throughput QOS */
1487         uint32_t through_qos = requested.thrp_through_qos;
1488
1489         through_qos = MAX(through_qos, task_effective.tep_through_qos);
1490         through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1491
1492         next.thep_through_qos = through_qos;
1493
1494         if (task_effective.tep_terminated || requested.thrp_terminated) {
1495                 /* Shoot down the throttles that slow down exit or response to SIGTERM */
1496                 next.thep_terminated    = 1;
1497                 next.thep_darwinbg      = 0;
1498                 next.thep_io_tier       = THROTTLE_LEVEL_TIER0;
1499                 next.thep_qos           = THREAD_QOS_UNSPECIFIED;
1500                 next.thep_latency_qos   = LATENCY_QOS_TIER_UNSPECIFIED;
1501                 next.thep_through_qos   = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1502         }
1503
1504         /*
1505          * Step 3:
1506          *  Swap out old policy for new policy
1507          */
1508
1509         struct thread_effective_policy prev = thread->effective_policy;
1510
1511         thread_update_qos_cpu_time_locked(thread);
1512
1513         /* This is the point where the new values become visible to other threads */
1514         thread->effective_policy = next;
1515
1516         /*
1517          * Step 4:
1518          *  Pend updates that can't be done while holding the thread lock
1519          */
1520
1521         if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg)
1522                 pend_token->tpt_update_sockets = 1;
1523
1524         /* TODO: Doesn't this only need to be done if the throttle went up? */
1525         if (prev.thep_io_tier != next.thep_io_tier)
1526                 pend_token->tpt_update_throttle = 1;
1527
1528         /*
1529          * Check for the attributes that sfi_thread_classify() consults,
1530          *  and trigger SFI re-evaluation.
1531          */
1532         if (prev.thep_qos      != next.thep_qos         ||
1533             prev.thep_darwinbg != next.thep_darwinbg    )
1534                 pend_token->tpt_update_thread_sfi = 1;
1535
1536         /*
1537          * Step 5:
1538          *  Update other subsystems as necessary if something has changed
1539          */
1540
1541         /* Check for the attributes that thread_recompute_priority() consults */
1542         if (prev.thep_qos               != next.thep_qos                ||
1543             prev.thep_qos_relprio       != next.thep_qos_relprio        ||
1544             prev.thep_qos_ui_is_urgent  != next.thep_qos_ui_is_urgent   ||
1545             prev.thep_terminated        != next.thep_terminated         ||
1546             pend_token->tpt_force_recompute_pri == 1                    ||
1547             recompute_priority) {
1548                 thread_recompute_priority(thread);
1549         }
1550 }
1551
1552
1553 /*
1554  * Initiate a thread policy state transition on a thread with its TID
1555  * Useful if you cannot guarantee the thread won't get terminated
1556  * Precondition: No locks are held
1557  * Will take task lock - using the non-tid variant is faster
1558  * if you already have a thread ref.
1559  */
1560 void
1561 proc_set_thread_policy_with_tid(task_t     task,
1562                                 uint64_t   tid,
1563                                 int        category,
1564                                 int        flavor,
1565                                 int        value)
1566 {
1567         /* takes task lock, returns ref'ed thread or NULL */
1568         thread_t thread = task_findtid(task, tid);
1569
1570         if (thread == THREAD_NULL)
1571                 return;
1572
1573         proc_set_thread_policy(thread, category, flavor, value);
1574
1575         thread_deallocate(thread);
1576 }
1577
1578 /*
1579  * Initiate a thread policy transition on a thread
1580  * This path supports networking transitions (i.e. darwinbg transitions)
1581  * Precondition: No locks are held
1582  */
1583 void
1584 proc_set_thread_policy(thread_t   thread,
1585                        int        category,
1586                        int        flavor,
1587                        int        value)
1588 {
1589         struct task_pend_token pend_token = {};
1590
1591         thread_mtx_lock(thread);
1592
1593         proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
1594
1595         thread_mtx_unlock(thread);
1596
1597         thread_policy_update_complete_unlocked(thread, &pend_token);
1598 }
1599
1600 /*
1601  * KPI for pthread kext to call to set thread base QoS values during a workq wakeup
1602  * May be called with interrupts disabled and workqueue/waitqueue/kqueue locks held
1603  *
1604  * Does NOT do update completion, so the thread MUST be in a safe place WRT
1605  * IO throttling and SFI.
1606  *
1607  * TODO: Can I assert 'it must be in a safe place'?
1608  */
1609 kern_return_t
1610 thread_set_workq_qos(thread_t   thread,
1611                      int        qos_tier,
1612                      int        relprio) /* relprio is -16 to 0 */
1613 {
1614         assert(qos_tier >= 0 && qos_tier <= THREAD_QOS_LAST);
1615         assert(relprio  <= 0 && relprio  >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1616
1617         if (!(qos_tier >= 0 && qos_tier <= THREAD_QOS_LAST))
1618                 return KERN_FAILURE;
1619         if (!(relprio  <= 0 && relprio  >= THREAD_QOS_MIN_TIER_IMPORTANCE))
1620                 return KERN_FAILURE;
1621
1622         if (qos_tier == THREAD_QOS_UNSPECIFIED) {
1623                 assert(relprio == 0);
1624                 if (relprio != 0)
1625                         return KERN_FAILURE;
1626         }
1627
1628         assert(thread->static_param);
1629         if (!thread->static_param) {
1630                 return KERN_FAILURE;
1631         }
1632
1633         /* Concern: this doesn't hold the mutex... */
1634         //if (!thread->active)
1635         //      return KERN_TERMINATED;
1636
1637         struct task_pend_token pend_token = {};
1638
1639         proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO, qos_tier, -relprio, &pend_token);
1640
1641         assert(pend_token.tpt_update_sockets == 0);
1642         /* we don't need to update throttle or sfi because pthread kext promises the thread is in a safe place */
1643         /* TODO: Do we need to update SFI to ensure it gets tagged with the AST? */
1644
1645         return KERN_SUCCESS;
1646 }
1647
1648
1649 /*
1650  * Do the things that can't be done while holding a thread mutex.
1651  * These are set up to call back into thread policy to get the latest value,
1652  * so they don't have to be synchronized with the update.
1653  * The only required semantic is 'call this sometime after updating effective policy'
1654  *
1655  * Precondition: Thread mutex is not held
1656  *
1657  * This may be called with the task lock held, but in that case it won't be
1658  * called with tpt_update_sockets set.
1659  */
1660 void
1661 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1662 {
1663 #ifdef MACH_BSD
1664         if (pend_token->tpt_update_sockets)
1665                 proc_apply_task_networkbg(thread->task->bsd_info, thread);
1666 #endif /* MACH_BSD */
1667
1668         if (pend_token->tpt_update_throttle)
1669                 rethrottle_thread(thread->uthread);
1670
1671         if (pend_token->tpt_update_thread_sfi)
1672                 sfi_reevaluate(thread);
1673 }
1674
1675 /*
1676  * Set and update thread policy
1677  * Thread mutex might be held
1678  */
1679 static void
1680 proc_set_thread_policy_locked(thread_t          thread,
1681                               int               category,
1682                               int               flavor,
1683                               int               value,
1684                               int               value2,
1685                               task_pend_token_t pend_token)
1686 {
1687         spl_t s = splsched();
1688         thread_lock(thread);
1689
1690         proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1691
1692         thread_unlock(thread);
1693         splx(s);
1694 }
1695
1696 /*
1697  * Set and update thread policy
1698  * Thread spinlock is held
1699  */
1700 static void
1701 proc_set_thread_policy_spinlocked(thread_t          thread,
1702                                   int               category,
1703                                   int               flavor,
1704                                   int               value,
1705                                   int               value2,
1706                                   task_pend_token_t pend_token)
1707 {
1708         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1709                                   (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1710                                   thread_tid(thread), threquested_0(thread),
1711                                   threquested_1(thread), value, 0);
1712
1713         thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2);
1714
1715         thread_policy_update_spinlocked(thread, FALSE, pend_token);
1716
1717         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1718                                   (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1719                                   thread_tid(thread), threquested_0(thread),
1720                                   threquested_1(thread), tpending(pend_token), 0);
1721 }
1722
1723 /*
1724  * Set the requested state for a specific flavor to a specific value.
1725  */
1726 static void
1727 thread_set_requested_policy_spinlocked(thread_t     thread,
1728                                        int          category,
1729                                        int          flavor,
1730                                        int          value,
1731                                        int          value2)
1732 {
1733         int tier, passive;
1734
1735         struct thread_requested_policy requested = thread->requested_policy;
1736
1737         switch (flavor) {
1738
1739         /* Category: EXTERNAL and INTERNAL, thread and task */
1740
1741                 case TASK_POLICY_DARWIN_BG:
1742                         if (category == TASK_POLICY_EXTERNAL)
1743                                 requested.thrp_ext_darwinbg = value;
1744                         else
1745                                 requested.thrp_int_darwinbg = value;
1746                         break;
1747
1748                 case TASK_POLICY_IOPOL:
1749                         proc_iopol_to_tier(value, &tier, &passive);
1750                         if (category == TASK_POLICY_EXTERNAL) {
1751                                 requested.thrp_ext_iotier  = tier;
1752                                 requested.thrp_ext_iopassive = passive;
1753                         } else {
1754                                 requested.thrp_int_iotier  = tier;
1755                                 requested.thrp_int_iopassive = passive;
1756                         }
1757                         break;
1758
1759                 case TASK_POLICY_IO:
1760                         if (category == TASK_POLICY_EXTERNAL)
1761                                 requested.thrp_ext_iotier = value;
1762                         else
1763                                 requested.thrp_int_iotier = value;
1764                         break;
1765
1766                 case TASK_POLICY_PASSIVE_IO:
1767                         if (category == TASK_POLICY_EXTERNAL)
1768                                 requested.thrp_ext_iopassive = value;
1769                         else
1770                                 requested.thrp_int_iopassive = value;
1771                         break;
1772
1773         /* Category: ATTRIBUTE, thread only */
1774
1775                 case TASK_POLICY_PIDBIND_BG:
1776                         assert(category == TASK_POLICY_ATTRIBUTE);
1777                         requested.thrp_pidbind_bg = value;
1778                         break;
1779
1780                 case TASK_POLICY_LATENCY_QOS:
1781                         assert(category == TASK_POLICY_ATTRIBUTE);
1782                         requested.thrp_latency_qos = value;
1783                         break;
1784
1785                 case TASK_POLICY_THROUGH_QOS:
1786                         assert(category == TASK_POLICY_ATTRIBUTE);
1787                         requested.thrp_through_qos = value;
1788                         break;
1789
1790                 case TASK_POLICY_QOS:
1791                         assert(category == TASK_POLICY_ATTRIBUTE);
1792                         requested.thrp_qos = value;
1793                         break;
1794
1795                 case TASK_POLICY_QOS_OVERRIDE:
1796                         assert(category == TASK_POLICY_ATTRIBUTE);
1797                         requested.thrp_qos_override = value;
1798                         break;
1799
1800                 case TASK_POLICY_QOS_AND_RELPRIO:
1801                         assert(category == TASK_POLICY_ATTRIBUTE);
1802                         requested.thrp_qos = value;
1803                         requested.thrp_qos_relprio = value2;
1804                         DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
1805                         break;
1806
1807                 case TASK_POLICY_QOS_PROMOTE:
1808                         assert(category == TASK_POLICY_ATTRIBUTE);
1809                         requested.thrp_qos_promote = value;
1810                         break;
1811
1812                 case TASK_POLICY_QOS_IPC_OVERRIDE:
1813                         assert(category == TASK_POLICY_ATTRIBUTE);
1814                         requested.thrp_qos_ipc_override = value;
1815                         break;
1816
1817                 case TASK_POLICY_QOS_SYNC_IPC_OVERRIDE:
1818                         assert(category == TASK_POLICY_ATTRIBUTE);
1819                         requested.thrp_qos_sync_ipc_override = value;
1820                         break;
1821
1822                 case TASK_POLICY_TERMINATED:
1823                         assert(category == TASK_POLICY_ATTRIBUTE);
1824                         requested.thrp_terminated = value;
1825                         break;
1826
1827                 default:
1828                         panic("unknown task policy: %d %d %d", category, flavor, value);
1829                         break;
1830         }
1831
1832         thread->requested_policy = requested;
1833 }
1834
1835 /*
1836  * Gets what you set. Effective values may be different.
1837  * Precondition: No locks are held
1838  */
1839 int
1840 proc_get_thread_policy(thread_t   thread,
1841                        int        category,
1842                        int        flavor)
1843 {
1844         int value = 0;
1845         thread_mtx_lock(thread);
1846         value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
1847         thread_mtx_unlock(thread);
1848         return value;
1849 }
1850
1851 static int
1852 proc_get_thread_policy_locked(thread_t   thread,
1853                               int        category,
1854                               int        flavor,
1855                               int*       value2)
1856 {
1857         int value = 0;
1858
1859         spl_t s = splsched();
1860         thread_lock(thread);
1861
1862         value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
1863
1864         thread_unlock(thread);
1865         splx(s);
1866
1867         return value;
1868 }
1869
1870 /*
1871  * Gets what you set. Effective values may be different.
1872  */
1873 static int
1874 thread_get_requested_policy_spinlocked(thread_t thread,
1875                                        int      category,
1876                                        int      flavor,
1877                                        int*     value2)
1878 {
1879         int value = 0;
1880
1881         struct thread_requested_policy requested = thread->requested_policy;
1882
1883         switch (flavor) {
1884                 case TASK_POLICY_DARWIN_BG:
1885                         if (category == TASK_POLICY_EXTERNAL)
1886                                 value = requested.thrp_ext_darwinbg;
1887                         else
1888                                 value = requested.thrp_int_darwinbg;
1889                         break;
1890                 case TASK_POLICY_IOPOL:
1891                         if (category == TASK_POLICY_EXTERNAL)
1892                                 value = proc_tier_to_iopol(requested.thrp_ext_iotier,
1893                                                            requested.thrp_ext_iopassive);
1894                         else
1895                                 value = proc_tier_to_iopol(requested.thrp_int_iotier,
1896                                                            requested.thrp_int_iopassive);
1897                         break;
1898                 case TASK_POLICY_IO:
1899                         if (category == TASK_POLICY_EXTERNAL)
1900                                 value = requested.thrp_ext_iotier;
1901                         else
1902                                 value = requested.thrp_int_iotier;
1903                         break;
1904                 case TASK_POLICY_PASSIVE_IO:
1905                         if (category == TASK_POLICY_EXTERNAL)
1906                                 value = requested.thrp_ext_iopassive;
1907                         else
1908                                 value = requested.thrp_int_iopassive;
1909                         break;
1910                 case TASK_POLICY_QOS:
1911                         assert(category == TASK_POLICY_ATTRIBUTE);
1912                         value = requested.thrp_qos;
1913                         break;
1914                 case TASK_POLICY_QOS_OVERRIDE:
1915                         assert(category == TASK_POLICY_ATTRIBUTE);
1916                         value = requested.thrp_qos_override;
1917                         break;
1918                 case TASK_POLICY_LATENCY_QOS:
1919                         assert(category == TASK_POLICY_ATTRIBUTE);
1920                         value = requested.thrp_latency_qos;
1921                         break;
1922                 case TASK_POLICY_THROUGH_QOS:
1923                         assert(category == TASK_POLICY_ATTRIBUTE);
1924                         value = requested.thrp_through_qos;
1925                         break;
1926                 case TASK_POLICY_QOS_AND_RELPRIO:
1927                         assert(category == TASK_POLICY_ATTRIBUTE);
1928                         assert(value2 != NULL);
1929                         value = requested.thrp_qos;
1930                         *value2 = requested.thrp_qos_relprio;
1931                         break;
1932                 case TASK_POLICY_QOS_PROMOTE:
1933                         assert(category == TASK_POLICY_ATTRIBUTE);
1934                         value = requested.thrp_qos_promote;
1935                         break;
1936                 case TASK_POLICY_QOS_IPC_OVERRIDE:
1937                         assert(category == TASK_POLICY_ATTRIBUTE);
1938                         value = requested.thrp_qos_ipc_override;
1939                         break;
1940                 case TASK_POLICY_TERMINATED:
1941                         assert(category == TASK_POLICY_ATTRIBUTE);
1942                         value = requested.thrp_terminated;
1943                         break;
1944
1945                 default:
1946                         panic("unknown policy_flavor %d", flavor);
1947                         break;
1948         }
1949
1950         return value;
1951 }
1952
1953 /*
1954  * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
1955  *
1956  * NOTE: This accessor does not take the task or thread lock.
1957  * Notifications of state updates need to be externally synchronized with state queries.
1958  * This routine *MUST* remain interrupt safe, as it is potentially invoked
1959  * within the context of a timer interrupt.
1960  *
1961  * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
1962  *      Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
1963  *      I don't think that cost is worth not having the right answer.
1964  */
1965 int
1966 proc_get_effective_thread_policy(thread_t thread,
1967                                  int      flavor)
1968 {
1969         int value = 0;
1970
1971         switch (flavor) {
1972                 case TASK_POLICY_DARWIN_BG:
1973                         /*
1974                          * This call is used within the timer layer, as well as
1975                          * prioritizing requests to the graphics system.
1976                          * It also informs SFI and originator-bg-state.
1977                          * Returns 1 for background mode, 0 for normal mode
1978                          */
1979
1980                         value = thread->effective_policy.thep_darwinbg ? 1 : 0;
1981                         break;
1982                 case TASK_POLICY_IO:
1983                         /*
1984                          * The I/O system calls here to find out what throttling tier to apply to an operation.
1985                          * Returns THROTTLE_LEVEL_* values
1986                          */
1987                         value = thread->effective_policy.thep_io_tier;
1988                         if (thread->iotier_override != THROTTLE_LEVEL_NONE)
1989                                 value = MIN(value, thread->iotier_override);
1990                         break;
1991                 case TASK_POLICY_PASSIVE_IO:
1992                         /*
1993                          * The I/O system calls here to find out whether an operation should be passive.
1994                          * (i.e. not cause operations with lower throttle tiers to be throttled)
1995                          * Returns 1 for passive mode, 0 for normal mode
1996                          *
1997                          * If an override is causing IO to go into a lower tier, we also set
1998                          * the passive bit so that a thread doesn't end up stuck in its own throttle
1999                          * window when the override goes away.
2000                          */
2001                         value = thread->effective_policy.thep_io_passive ? 1 : 0;
2002                         if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2003                             thread->iotier_override < thread->effective_policy.thep_io_tier)
2004                                 value = 1;
2005                         break;
2006                 case TASK_POLICY_ALL_SOCKETS_BG:
2007                         /*
2008                          * do_background_socket() calls this to determine whether
2009                          * it should change the thread's sockets
2010                          * Returns 1 for background mode, 0 for normal mode
2011                          * This consults both thread and task so un-DBGing a thread while the task is BG
2012                          * doesn't get you out of the network throttle.
2013                          */
2014                         value = (thread->effective_policy.thep_all_sockets_bg ||
2015                                  thread->task->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2016                         break;
2017                 case TASK_POLICY_NEW_SOCKETS_BG:
2018                         /*
2019                          * socreate() calls this to determine if it should mark a new socket as background
2020                          * Returns 1 for background mode, 0 for normal mode
2021                          */
2022                         value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2023                         break;
2024                 case TASK_POLICY_LATENCY_QOS:
2025                         /*
2026                          * timer arming calls into here to find out the timer coalescing level
2027                          * Returns a latency QoS tier (0-6)
2028                          */
2029                         value = thread->effective_policy.thep_latency_qos;
2030                         break;
2031                 case TASK_POLICY_THROUGH_QOS:
2032                         /*
2033                          * This value is passed into the urgency callout from the scheduler
2034                          * to the performance management subsystem.
2035                          *
2036                          * Returns a throughput QoS tier (0-6)
2037                          */
2038                         value = thread->effective_policy.thep_through_qos;
2039                         break;
2040                 case TASK_POLICY_QOS:
2041                         /*
2042                          * This is communicated to the performance management layer and SFI.
2043                          *
2044                          * Returns a QoS policy tier
2045                          */
2046                         value = thread->effective_policy.thep_qos;
2047                         break;
2048                 default:
2049                         panic("unknown thread policy flavor %d", flavor);
2050                         break;
2051         }
2052
2053         return value;
2054 }
2055
2056
2057 /*
2058  * (integer_t) casts limit the number of bits we can fit here
2059  * this interface is deprecated and replaced by the _EXT struct ?
2060  */
2061 static void
2062 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2063 {
2064         uint64_t bits = 0;
2065         struct thread_requested_policy requested = thread->requested_policy;
2066
2067         bits |= (requested.thrp_int_darwinbg    ? POLICY_REQ_INT_DARWIN_BG  : 0);
2068         bits |= (requested.thrp_ext_darwinbg    ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2069         bits |= (requested.thrp_int_iotier      ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2070         bits |= (requested.thrp_ext_iotier      ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2071         bits |= (requested.thrp_int_iopassive   ? POLICY_REQ_INT_PASSIVE_IO : 0);
2072         bits |= (requested.thrp_ext_iopassive   ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2073
2074         bits |= (requested.thrp_qos             ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2075         bits |= (requested.thrp_qos_override    ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT)   : 0);
2076
2077         bits |= (requested.thrp_pidbind_bg      ? POLICY_REQ_PIDBIND_BG     : 0);
2078
2079         bits |= (requested.thrp_latency_qos     ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2080         bits |= (requested.thrp_through_qos     ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2081
2082         info->requested = (integer_t) bits;
2083         bits = 0;
2084
2085         struct thread_effective_policy effective = thread->effective_policy;
2086
2087         bits |= (effective.thep_darwinbg        ? POLICY_EFF_DARWIN_BG      : 0);
2088
2089         bits |= (effective.thep_io_tier         ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2090         bits |= (effective.thep_io_passive      ? POLICY_EFF_IO_PASSIVE     : 0);
2091         bits |= (effective.thep_all_sockets_bg  ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2092         bits |= (effective.thep_new_sockets_bg  ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2093
2094         bits |= (effective.thep_qos             ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2095
2096         bits |= (effective.thep_latency_qos     ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2097         bits |= (effective.thep_through_qos     ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2098
2099         info->effective = (integer_t)bits;
2100         bits = 0;
2101
2102         info->pending = 0;
2103 }
2104
2105 /*
2106  * Sneakily trace either the task and thread requested
2107  * or just the thread requested, depending on if we have enough room.
2108  * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2109  *
2110  *                                LP32            LP64
2111  * threquested_0(thread)          thread[0]       task[0]
2112  * threquested_1(thread)          thread[1]       thread[0]
2113  *
2114  */
2115
2116 uintptr_t
2117 threquested_0(thread_t thread)
2118 {
2119         static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2120
2121         uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2122
2123         return raw[0];
2124 }
2125
2126 uintptr_t
2127 threquested_1(thread_t thread)
2128 {
2129 #if defined __LP64__
2130         return *(uintptr_t*)&thread->task->requested_policy;
2131 #else
2132         uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2133         return raw[1];
2134 #endif
2135 }
2136
2137 uintptr_t
2138 theffective_0(thread_t thread)
2139 {
2140         static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2141
2142         uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2143         return raw[0];
2144 }
2145
2146 uintptr_t
2147 theffective_1(thread_t thread)
2148 {
2149 #if defined __LP64__
2150         return *(uintptr_t*)&thread->task->effective_policy;
2151 #else
2152         uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2153         return raw[1];
2154 #endif
2155 }
2156
2157
2158 /*
2159  * Set an override on the thread which is consulted with a
2160  * higher priority than the task/thread policy. This should
2161  * only be set for temporary grants until the thread
2162  * returns to the userspace boundary
2163  *
2164  * We use atomic operations to swap in the override, with
2165  * the assumption that the thread itself can
2166  * read the override and clear it on return to userspace.
2167  *
2168  * No locking is performed, since it is acceptable to see
2169  * a stale override for one loop through throttle_lowpri_io().
2170  * However a thread reference must be held on the thread.
2171  */
2172
2173 void set_thread_iotier_override(thread_t thread, int policy)
2174 {
2175         int current_override;
2176
2177         /* Let most aggressive I/O policy win until user boundary */
2178         do {
2179                 current_override = thread->iotier_override;
2180
2181                 if (current_override != THROTTLE_LEVEL_NONE)
2182                         policy = MIN(current_override, policy);
2183
2184                 if (current_override == policy) {
2185                         /* no effective change */
2186                         return;
2187                 }
2188         } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2189
2190         /*
2191          * Since the thread may be currently throttled,
2192          * re-evaluate tiers and potentially break out
2193          * of an msleep
2194          */
2195         rethrottle_thread(thread->uthread);
2196 }
2197
2198 /*
2199  * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2200  * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2201  * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2202  * priority thread. In these cases, we attempt to propagate the priority token, as long
2203  * as the subsystem informs us of the relationships between the threads. The userspace
2204  * synchronization subsystem should maintain the information of owner->resource and
2205  * resource->waiters itself.
2206  */
2207
2208 /*
2209  * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2210  * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2211  * to be handled specially in the future, but for now it's fine to slam
2212  * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2213  */
2214 static void canonicalize_resource_and_type(user_addr_t *resource, int *resource_type) {
2215         if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2216                 /* Map all input resource/type to a single one */
2217                 *resource = USER_ADDR_NULL;
2218                 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2219         } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2220                 /* no transform */
2221         } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH) {
2222                 /* Map all dispatch overrides to a single one, to avoid memory overhead */
2223                 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE) {
2224                         *resource = USER_ADDR_NULL;
2225                 }
2226         } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2227                 /* Map all mutex overrides to a single one, to avoid memory overhead */
2228                 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2229                         *resource = USER_ADDR_NULL;
2230                 }
2231         }
2232 }
2233
2234 /* This helper routine finds an existing override if known. Locking should be done by caller */
2235 static struct thread_qos_override *
2236 find_qos_override(thread_t thread,
2237                   user_addr_t resource,
2238                   int resource_type)
2239 {
2240         struct thread_qos_override *override;
2241
2242         override = thread->overrides;
2243         while (override) {
2244                 if (override->override_resource == resource &&
2245                     override->override_resource_type == resource_type) {
2246                         return override;
2247                 }
2248
2249                 override = override->override_next;
2250         }
2251
2252         return NULL;
2253 }
2254
2255 static void
2256 find_and_decrement_qos_override(thread_t       thread,
2257                                 user_addr_t    resource,
2258                                 int            resource_type,
2259                                 boolean_t      reset,
2260                                 struct thread_qos_override **free_override_list)
2261 {
2262         struct thread_qos_override *override, *override_prev;
2263
2264         override_prev = NULL;
2265         override = thread->overrides;
2266         while (override) {
2267                 struct thread_qos_override *override_next = override->override_next;
2268
2269                 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource  || override->override_resource == resource) &&
2270                     (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2271
2272                         if (reset) {
2273                                 override->override_contended_resource_count = 0;
2274                         } else {
2275                                 override->override_contended_resource_count--;
2276                         }
2277
2278                         if (override->override_contended_resource_count == 0) {
2279                                 if (override_prev == NULL) {
2280                                         thread->overrides = override_next;
2281                                 } else {
2282                                         override_prev->override_next = override_next;
2283                                 }
2284
2285                                 /* Add to out-param for later zfree */
2286                                 override->override_next = *free_override_list;
2287                                 *free_override_list = override;
2288                         } else {
2289                                 override_prev = override;
2290                         }
2291
2292                         if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2293                                 return;
2294                         }
2295                 } else {
2296                         override_prev = override;
2297                 }
2298
2299                 override = override_next;
2300         }
2301 }
2302
2303 /* This helper recalculates the current requested override using the policy selected at boot */
2304 static int
2305 calculate_requested_qos_override(thread_t thread)
2306 {
2307         if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2308                 return THREAD_QOS_UNSPECIFIED;
2309         }
2310
2311         /* iterate over all overrides and calculate MAX */
2312         struct thread_qos_override *override;
2313         int qos_override = THREAD_QOS_UNSPECIFIED;
2314
2315         override = thread->overrides;
2316         while (override) {
2317                 if (qos_override_mode != QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH ||
2318                         override->override_resource_type != THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE) {
2319                         qos_override = MAX(qos_override, override->override_qos);
2320                 }
2321
2322                 override = override->override_next;
2323         }
2324
2325         return qos_override;
2326 }
2327
2328 /*
2329  * Returns:
2330  * - 0 on success
2331  * - EINVAL if some invalid input was passed
2332  * - EFAULT if user_lock_addr != NULL and needs to be faulted (userland has to
2333  *   fault and retry)
2334  * - ESTALE if user_lock_addr != NULL &&
2335  *   ulock_owner_value_to_port_name(*user_lock_addr) != user_lock_owner
2336  */
2337 static int
2338 proc_thread_qos_add_override_internal(thread_t         thread,
2339                                       int              override_qos,
2340                                       boolean_t        first_override_for_resource,
2341                                       user_addr_t      resource,
2342                                       int              resource_type,
2343                                       user_addr_t      user_lock_addr,
2344                                       mach_port_name_t user_lock_owner)
2345 {
2346         struct task_pend_token pend_token = {};
2347         int rc = 0;
2348
2349         thread_mtx_lock(thread);
2350
2351         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2352                                                   thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2353
2354         DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2355                         uint64_t, thread->requested_policy.thrp_qos,
2356                         uint64_t, thread->effective_policy.thep_qos,
2357                         int, override_qos, boolean_t, first_override_for_resource);
2358
2359         struct thread_qos_override *override;
2360         struct thread_qos_override *override_new = NULL;
2361         int new_qos_override, prev_qos_override;
2362         int new_effective_qos;
2363
2364         canonicalize_resource_and_type(&resource, &resource_type);
2365
2366         override = find_qos_override(thread, resource, resource_type);
2367         if (first_override_for_resource && !override) {
2368                 /* We need to allocate a new object. Drop the thread lock and
2369                  * recheck afterwards in case someone else added the override
2370                  */
2371                 thread_mtx_unlock(thread);
2372                 override_new = zalloc(thread_qos_override_zone);
2373                 thread_mtx_lock(thread);
2374                 override = find_qos_override(thread, resource, resource_type);
2375         }
2376         if (user_lock_addr) {
2377                 uint64_t val;
2378                 /* Workaround lack of explicit support for 'no-fault copyin'
2379                  * <rdar://problem/24999882>, as disabling preemption prevents paging in
2380                  */
2381                 disable_preemption();
2382                 rc = copyin_word(user_lock_addr, &val, sizeof(user_lock_owner));
2383                 enable_preemption();
2384                 if (rc == 0 && ulock_owner_value_to_port_name((uint32_t)val) != user_lock_owner) {
2385                         rc = ESTALE;
2386                 }
2387                 if (rc) {
2388                         prev_qos_override = proc_get_thread_policy_locked(thread,
2389                                         TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2390                         new_qos_override = prev_qos_override;
2391                         new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2392                         thread_mtx_unlock(thread);
2393                         goto out;
2394                 }
2395         }
2396         if (first_override_for_resource && override) {
2397                 /* Someone else already allocated while the thread lock was dropped */
2398                 override->override_contended_resource_count++;
2399         } else if (!override && override_new) {
2400                 override = override_new;
2401                 override_new = NULL;
2402                 override->override_next = thread->overrides;
2403                 /* since first_override_for_resource was TRUE */
2404                 override->override_contended_resource_count = 1;
2405                 override->override_resource = resource;
2406                 override->override_resource_type = resource_type;
2407                 override->override_qos = THREAD_QOS_UNSPECIFIED;
2408                 thread->overrides = override;
2409         }
2410
2411         if (override) {
2412                 if (override->override_qos == THREAD_QOS_UNSPECIFIED)
2413                         override->override_qos = override_qos;
2414                 else
2415                         override->override_qos = MAX(override->override_qos, override_qos);
2416         }
2417
2418         /* Determine how to combine the various overrides into a single current
2419          * requested override
2420          */
2421         new_qos_override = calculate_requested_qos_override(thread);
2422
2423         prev_qos_override = proc_get_thread_policy_locked(thread,
2424                         TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2425
2426         if (new_qos_override != prev_qos_override) {
2427                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2428                                               TASK_POLICY_QOS_OVERRIDE,
2429                                               new_qos_override, 0, &pend_token);
2430         }
2431
2432         new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2433
2434         thread_mtx_unlock(thread);
2435
2436         thread_policy_update_complete_unlocked(thread, &pend_token);
2437
2438 out:
2439         if (override_new) {
2440                 zfree(thread_qos_override_zone, override_new);
2441         }
2442
2443         DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2444                       int, new_qos_override, int, new_effective_qos, int, rc);
2445
2446         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2447                                                   new_qos_override, resource, resource_type, 0, 0);
2448
2449         return rc;
2450 }
2451
2452 int
2453 proc_thread_qos_add_override_check_owner(thread_t thread,
2454                                          int override_qos,
2455                                          boolean_t first_override_for_resource,
2456                                          user_addr_t resource,
2457                                          int resource_type,
2458                                          user_addr_t user_lock_addr,
2459                                          mach_port_name_t user_lock_owner)
2460 {
2461         return proc_thread_qos_add_override_internal(thread, override_qos,
2462                         first_override_for_resource, resource, resource_type,
2463                         user_lock_addr, user_lock_owner);
2464 }
2465
2466 boolean_t
2467 proc_thread_qos_add_override(task_t           task,
2468                              thread_t         thread,
2469                              uint64_t         tid,
2470                              int              override_qos,
2471                              boolean_t        first_override_for_resource,
2472                              user_addr_t      resource,
2473                              int              resource_type)
2474 {
2475         boolean_t has_thread_reference = FALSE;
2476         int rc = 0;
2477
2478         if (thread == THREAD_NULL) {
2479                 thread = task_findtid(task, tid);
2480                 /* returns referenced thread */
2481
2482                 if (thread == THREAD_NULL) {
2483                         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2484                                                                   tid, 0, 0xdead, 0, 0);
2485                         return FALSE;
2486                 }
2487                 has_thread_reference = TRUE;
2488         } else {
2489                 assert(thread->task == task);
2490         }
2491         rc = proc_thread_qos_add_override_internal(thread, override_qos,
2492                         first_override_for_resource, resource, resource_type, 0, 0);
2493         if (has_thread_reference) {
2494                 thread_deallocate(thread);
2495         }
2496
2497         return rc == 0;
2498 }
2499
2500 static int
2501 proc_thread_qos_remove_override_internal(thread_t       thread,
2502                                          user_addr_t    resource,
2503                                          int            resource_type,
2504                                          boolean_t      reset,
2505                                          boolean_t      squash)
2506 {
2507         struct task_pend_token pend_token = {};
2508
2509         struct thread_qos_override *deferred_free_override_list = NULL;
2510         int new_qos_override, prev_qos_override, new_effective_qos, prev_qos;
2511         int new_qos = THREAD_QOS_UNSPECIFIED;
2512
2513         thread_mtx_lock(thread);
2514
2515         canonicalize_resource_and_type(&resource, &resource_type);
2516
2517         find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2518
2519         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2520                               thread_tid(thread), resource, reset, 0, 0);
2521
2522         DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2523                         uint64_t, thread->requested_policy.thrp_qos,
2524                         uint64_t, thread->effective_policy.thep_qos);
2525
2526         /* Determine how to combine the various overrides into a single current requested override */
2527         new_qos_override = calculate_requested_qos_override(thread);
2528
2529         spl_t s = splsched();
2530         thread_lock(thread);
2531
2532         /*
2533          * The override chain and therefore the value of the current override is locked with thread mutex,
2534          * so we can do a get/set without races.  However, the rest of thread policy is locked under the spinlock.
2535          * This means you can't change the current override from a spinlock-only setter.
2536          */
2537         prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2538
2539         if (squash) {
2540                 int prev_ipc_override;
2541                 int prev_override;
2542
2543                 /*
2544                  * Remove the specified overrides, and set the current override as the new base QoS.
2545                  * Return the new QoS value.
2546                  */
2547                 prev_ipc_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_IPC_OVERRIDE, NULL);
2548                 prev_override = MAX(prev_qos_override, prev_ipc_override);
2549
2550                 prev_qos = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS, NULL);
2551
2552                 new_qos = MAX(prev_qos, prev_override);
2553                 if (new_qos != prev_qos)
2554                         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS, new_qos, 0, &pend_token);
2555         }
2556
2557         if (new_qos_override != prev_qos_override)
2558                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2559
2560         new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2561
2562         thread_unlock(thread);
2563         splx(s);
2564
2565         thread_mtx_unlock(thread);
2566
2567         thread_policy_update_complete_unlocked(thread, &pend_token);
2568
2569         while (deferred_free_override_list) {
2570                 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2571
2572                 zfree(thread_qos_override_zone, deferred_free_override_list);
2573                 deferred_free_override_list = override_next;
2574         }
2575
2576         DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2577                       int, new_qos_override, int, new_effective_qos);
2578
2579         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2580                               thread_tid(thread), squash, 0, 0, 0);
2581
2582         return new_qos;
2583 }
2584
2585 boolean_t
2586 proc_thread_qos_remove_override(task_t      task,
2587                                 thread_t    thread,
2588                                 uint64_t    tid,
2589                                 user_addr_t resource,
2590                                 int         resource_type)
2591 {
2592         boolean_t has_thread_reference = FALSE;
2593
2594         if (thread == THREAD_NULL) {
2595                 thread = task_findtid(task, tid);
2596                 /* returns referenced thread */
2597
2598                 if (thread == THREAD_NULL) {
2599                         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2600                                               tid, 0, 0xdead, 0, 0);
2601                         return FALSE;
2602                 }
2603                 has_thread_reference = TRUE;
2604         } else {
2605                 assert(task == thread->task);
2606         }
2607
2608         proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE, FALSE);
2609
2610         if (has_thread_reference)
2611                 thread_deallocate(thread);
2612
2613         return TRUE;
2614 }
2615
2616 boolean_t
2617 proc_thread_qos_reset_override(task_t       task,
2618                                thread_t     thread,
2619                                uint64_t     tid,
2620                                user_addr_t  resource,
2621                                int          resource_type)
2622
2623 {
2624         boolean_t has_thread_reference = FALSE;
2625
2626         if (thread == THREAD_NULL) {
2627                 thread = task_findtid(task, tid);
2628                 /* returns referenced thread */
2629
2630                 if (thread == THREAD_NULL) {
2631                         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2632                                               tid, 0, 0xdead, 0, 0);
2633                         return FALSE;
2634                 }
2635                 has_thread_reference = TRUE;
2636         } else {
2637                 assert(task == thread->task);
2638         }
2639
2640         proc_thread_qos_remove_override_internal(thread, resource, resource_type, TRUE, FALSE);
2641
2642         if (has_thread_reference)
2643                 thread_deallocate(thread);
2644
2645         return TRUE;
2646 }
2647
2648 /*
2649  * Clears the requested overrides, and replaces the current QoS with the max
2650  * of the current QoS and the current override, then returns the new QoS.
2651  *
2652  * This is useful in order to reset overrides before parking a workqueue thread,
2653  * but avoid dropping priority and getting preempted right before parking.
2654  *
2655  * Called without any locks held.
2656  */
2657 int
2658 proc_thread_qos_squash_override(thread_t thread, user_addr_t resource, int resource_type)
2659 {
2660         return proc_thread_qos_remove_override_internal(thread, resource, resource_type, TRUE, TRUE);
2661 }
2662
2663 /* Deallocate before thread termination */
2664 void proc_thread_qos_deallocate(thread_t thread)
2665 {
2666         /*
2667          * There are no more references to this thread,
2668          * therefore this thread must not own any more locks,
2669          * therefore there must not be any more user promotions.
2670          */
2671         assert(thread->user_promotions == 0);
2672         assert(thread->requested_policy.thrp_qos_promote == THREAD_QOS_UNSPECIFIED);
2673         assert(thread->user_promotion_basepri == 0);
2674
2675         /* This thread must have no more IPC overrides. */
2676         assert(thread->ipc_overrides == 0);
2677         assert(thread->requested_policy.thrp_qos_ipc_override == THREAD_QOS_UNSPECIFIED);
2678         assert(thread->sync_ipc_overrides == 0);
2679         assert(thread->requested_policy.thrp_qos_sync_ipc_override == THREAD_QOS_UNSPECIFIED);
2680
2681         /*
2682          * Clear out any lingering override objects.
2683          */
2684         struct thread_qos_override *override;
2685
2686         thread_mtx_lock(thread);
2687         override = thread->overrides;
2688         thread->overrides = NULL;
2689         thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2690         /* We don't need to re-evaluate thread policy here because the thread has already exited */
2691         thread_mtx_unlock(thread);
2692
2693         while (override) {
2694                 struct thread_qos_override *override_next = override->override_next;
2695
2696                 zfree(thread_qos_override_zone, override);
2697                 override = override_next;
2698         }
2699 }
2700
2701 /*
2702  * Set up the primordial thread's QoS
2703  */
2704 void
2705 task_set_main_thread_qos(task_t task, thread_t thread) {
2706         struct task_pend_token pend_token = {};
2707
2708         assert(thread->task == task);
2709
2710         thread_mtx_lock(thread);
2711
2712         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2713                                   (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2714                                   thread_tid(thread), threquested_0(thread), threquested_1(thread),
2715                                   thread->requested_policy.thrp_qos, 0);
2716
2717         int primordial_qos = task_compute_main_thread_qos(task);
2718
2719         proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS,
2720                                       primordial_qos, 0, &pend_token);
2721
2722         thread_mtx_unlock(thread);
2723
2724         thread_policy_update_complete_unlocked(thread, &pend_token);
2725
2726         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2727                                   (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2728                                   thread_tid(thread), threquested_0(thread), threquested_1(thread),
2729                                   primordial_qos, 0);
2730 }
2731
2732 /*
2733  * KPI for pthread kext
2734  *
2735  * Return a good guess at what the initial manager QoS will be
2736  * Dispatch can override this in userspace if it so chooses
2737  */
2738 int
2739 task_get_default_manager_qos(task_t task)
2740 {
2741         int primordial_qos = task_compute_main_thread_qos(task);
2742
2743         if (primordial_qos == THREAD_QOS_LEGACY)
2744                 primordial_qos = THREAD_QOS_USER_INITIATED;
2745
2746         return primordial_qos;
2747 }
2748
2749
2750 /*
2751  * Promote thread with the user level properties of 'promoter'
2752  * Mutexes may be held, but it's OK to take the throttle lock
2753  *
2754  * if 'new_promotion' is TRUE, this is a new promotion.
2755  * if FALSE, we are updating an existing promotion.
2756  */
2757 static void
2758 thread_user_promotion_promote(thread_t  thread,
2759                               thread_t  promoter,
2760                               struct promote_token* promote_token,
2761                               boolean_t new_promotion)
2762 {
2763         struct task_pend_token pend_token = {};
2764
2765         uint32_t promoter_base_pri = 0, promoter_qos = THREAD_QOS_UNSPECIFIED;
2766
2767         spl_t s = splsched();
2768         thread_lock(promoter);
2769
2770         /*
2771          * We capture the 'promotion qos' here, which is captured
2772          * before task-level clamping.
2773          *
2774          * This means that if the process gets unclamped while a promotion,
2775          * is in effect, the owning thread ends up with the correct QoS.
2776          *
2777          * This does NOT work correctly across processes, as the correct QoS
2778          * in one is not necessarily the correct QoS in another.
2779          * When we add support for multi-process ulock boosting, we need to
2780          * do something more complex.
2781          */
2782         promoter_qos = promoter->effective_policy.thep_qos_promote;
2783
2784         /* TODO: extract 'effective unclamped base pri' instead */
2785         promoter_base_pri = promoter->base_pri;
2786
2787         thread_unlock(promoter);
2788         splx(s);
2789
2790         /* clamp out realtime to max user pri */
2791         promoter_base_pri = MIN(promoter_base_pri, MAXPRI_USER);
2792
2793         /* add in the saved promotion token */
2794         assert(promote_token->pt_basepri <= MAXPRI_USER);
2795
2796         promoter_base_pri = MAX(promoter_base_pri, promote_token->pt_basepri);
2797         promoter_qos = MAX(promoter_qos, promote_token->pt_qos);
2798
2799         /* save the max for later */
2800         promote_token->pt_basepri = promoter_base_pri;
2801         promote_token->pt_qos = promoter_qos;
2802
2803         s = splsched();
2804         thread_lock(thread);
2805
2806         if (new_promotion) {
2807                 if (thread->user_promotions == 0) {
2808                         assert(thread->requested_policy.thrp_qos_promote == THREAD_QOS_UNSPECIFIED);
2809                         assert(thread->user_promotion_basepri == 0);
2810                 }
2811
2812                 thread->user_promotions++;
2813         } else {
2814                 assert(thread->user_promotions > 0);
2815         }
2816
2817         uint32_t thread_qos     = thread->requested_policy.thrp_qos_promote;
2818         uint32_t thread_basepri = thread->user_promotion_basepri;
2819
2820         uint32_t new_qos     = MAX(thread_qos, promoter_qos);
2821         uint32_t new_basepri = MAX(thread_basepri, promoter_base_pri);
2822
2823         /* TODO: Fast path the 'new is lower than effective' case to avoid full reevaluation */
2824         if (thread_qos != new_qos || thread_basepri != new_basepri) {
2825
2826                 thread->user_promotion_basepri = new_basepri;
2827
2828                 pend_token.tpt_force_recompute_pri = 1;
2829
2830                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2831                                                   TASK_POLICY_QOS_PROMOTE, new_qos,
2832                                                   0, &pend_token);
2833         }
2834
2835         thread_unlock(thread);
2836         splx(s);
2837
2838         thread_policy_update_complete_unlocked(thread, &pend_token);
2839 }
2840
2841 /* Add a user promotion to thread */
2842 void
2843 thread_user_promotion_add(thread_t thread,
2844                           thread_t promoter,
2845                           struct promote_token* promote_token)
2846 {
2847         thread_user_promotion_promote(thread, promoter, promote_token, TRUE);
2848 }
2849
2850 /* Update an existing user promotion on thread */
2851 void
2852 thread_user_promotion_update(thread_t thread,
2853                              thread_t promoter,
2854                              struct promote_token* promote_token)
2855 {
2856         thread_user_promotion_promote(thread, promoter, promote_token, FALSE);
2857 }
2858
2859 /*
2860  * Drop a user promotion on thread
2861  * Mutexes may be held, but it's OK to take the throttle lock
2862  */
2863 void
2864 thread_user_promotion_drop(thread_t thread)
2865 {
2866         struct task_pend_token pend_token = {};
2867
2868         spl_t s = splsched();
2869         thread_lock(thread);
2870
2871         assert(thread->user_promotions > 0);
2872
2873         if (--thread->user_promotions == 0) {
2874                 thread->requested_policy.thrp_qos_promote = THREAD_QOS_UNSPECIFIED;
2875                 thread->user_promotion_basepri = 0;
2876
2877                 pend_token.tpt_force_recompute_pri = 1;
2878
2879                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2880                                                   TASK_POLICY_QOS_PROMOTE, THREAD_QOS_UNSPECIFIED,
2881                                                   0, &pend_token);
2882         }
2883
2884         thread_unlock(thread);
2885         splx(s);
2886
2887         thread_policy_update_complete_unlocked(thread, &pend_token);
2888 }
2889
2890
2891 /*
2892  * Set the thread's QoS IPC override
2893  * Owned by the IPC subsystem
2894  *
2895  * May be called with spinlocks held, but not spinlocks
2896  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
2897  *
2898  * One 'add' must be balanced by one 'drop'.
2899  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
2900  * Before the thread is deallocated, there must be 0 remaining overrides.
2901  */
2902 static void
2903 thread_ipc_override(thread_t    thread,
2904                     uint32_t    qos_override,
2905                     boolean_t   is_new_override)
2906 {
2907         struct task_pend_token pend_token = {};
2908         boolean_t needs_update;
2909
2910         spl_t s = splsched();
2911         thread_lock(thread);
2912
2913         uint32_t old_override = thread->requested_policy.thrp_qos_ipc_override;
2914
2915         assert(qos_override > THREAD_QOS_UNSPECIFIED);
2916         assert(qos_override < THREAD_QOS_LAST);
2917         if (is_new_override) {
2918                 if (thread->ipc_overrides++ == 0) {
2919                         /* This add is the first override for this thread */
2920                         assert(old_override == THREAD_QOS_UNSPECIFIED);
2921                 } else {
2922                         /* There are already other overrides in effect for this thread */
2923                         assert(old_override > THREAD_QOS_UNSPECIFIED);
2924                 }
2925         } else {
2926                 /* There must be at least one override (the previous add call) in effect */
2927                 assert(thread->ipc_overrides > 0);
2928                 assert(old_override > THREAD_QOS_UNSPECIFIED);
2929         }
2930
2931         /*
2932          * We can't allow lowering if there are several IPC overrides because
2933          * the caller can't possibly know the whole truth
2934          */
2935         if (thread->ipc_overrides == 1) {
2936                 needs_update = qos_override != old_override;
2937         } else {
2938                 needs_update = qos_override > old_override;
2939         }
2940
2941         if (needs_update) {
2942                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2943                                                   TASK_POLICY_QOS_IPC_OVERRIDE,
2944                                                   qos_override, 0, &pend_token);
2945                 assert(pend_token.tpt_update_sockets == 0);
2946         }
2947
2948         thread_unlock(thread);
2949         splx(s);
2950
2951         /*
2952          * this is only safe after rethrottle_thread supports
2953          * being called from spinlock context
2954          */
2955         thread_policy_update_complete_unlocked(thread, &pend_token);
2956 }
2957
2958 void
2959 thread_add_ipc_override(thread_t    thread,
2960                         uint32_t    qos_override)
2961 {
2962         thread_ipc_override(thread, qos_override, TRUE);
2963 }
2964
2965 void
2966 thread_update_ipc_override(thread_t     thread,
2967                            uint32_t     qos_override)
2968 {
2969         thread_ipc_override(thread, qos_override, FALSE);
2970 }
2971
2972 void
2973 thread_drop_ipc_override(thread_t thread)
2974 {
2975         struct task_pend_token pend_token = {};
2976
2977         spl_t s = splsched();
2978         thread_lock(thread);
2979
2980         assert(thread->ipc_overrides > 0);
2981
2982         if (--thread->ipc_overrides == 0) {
2983                 /*
2984                  * There are no more overrides for this thread, so we should
2985                  * clear out the saturated override value
2986                  */
2987
2988                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2989                                                   TASK_POLICY_QOS_IPC_OVERRIDE, THREAD_QOS_UNSPECIFIED,
2990                                                   0, &pend_token);
2991         }
2992
2993         thread_unlock(thread);
2994         splx(s);
2995
2996         /*
2997          * this is only safe after rethrottle_thread supports
2998          * being called from spinlock context
2999          */
3000         thread_policy_update_complete_unlocked(thread, &pend_token);
3001 }
3002
3003 void
3004 thread_add_sync_ipc_override(thread_t   thread)
3005 {
3006         struct task_pend_token pend_token = {};
3007
3008         spl_t s = splsched();
3009         thread_lock(thread);
3010
3011         uint32_t old_override __unused = thread->requested_policy.thrp_qos_sync_ipc_override;
3012
3013         if (thread->sync_ipc_overrides++ == 0) {
3014                 /* This add is the first override for this thread */
3015                 assert(old_override == THREAD_QOS_UNSPECIFIED);
3016         } else {
3017                 /* There are already other overrides in effect for this thread */
3018                 assert(old_override == THREAD_QOS_USER_INTERACTIVE);
3019                 thread_unlock(thread);
3020                 splx(s);
3021                 return;
3022         }
3023
3024         uint32_t new_override = THREAD_QOS_USER_INTERACTIVE;
3025
3026         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3027                                           TASK_POLICY_QOS_SYNC_IPC_OVERRIDE,
3028                                           new_override, 0, &pend_token);
3029
3030         assert(pend_token.tpt_update_sockets == 0);
3031
3032         thread_unlock(thread);
3033         splx(s);
3034
3035         /*
3036          * this is only safe after rethrottle_thread supports
3037          * being called from spinlock context
3038          */
3039         thread_policy_update_complete_unlocked(thread, &pend_token);
3040 }
3041
3042 void
3043 thread_drop_sync_ipc_override(thread_t thread)
3044 {
3045         struct task_pend_token pend_token = {};
3046
3047         spl_t s = splsched();
3048         thread_lock(thread);
3049
3050         assert(thread->sync_ipc_overrides > 0);
3051
3052         if (--thread->sync_ipc_overrides == 0) {
3053                 /*
3054                  * There are no more overrides for this thread, so we should
3055                  * clear out the saturated override value
3056                  */
3057
3058                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3059                                                   TASK_POLICY_QOS_SYNC_IPC_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3060                                                   0, &pend_token);
3061         }
3062
3063         thread_unlock(thread);
3064         splx(s);
3065
3066         /*
3067          * this is only safe after rethrottle_thread supports
3068          * being called from spinlock context
3069          */
3070         thread_policy_update_complete_unlocked(thread, &pend_token);
3071 }
3072
3073 /* Get current IPC override, may be called from spinlock context */
3074 uint32_t
3075 thread_get_ipc_override(thread_t thread)
3076 {
3077         return proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_IPC_OVERRIDE, NULL);
3078 }
3079
3080 /*
3081  * This function will promote the thread priority
3082  * since exec could block other threads calling
3083  * proc_find on the proc. This boost must be removed
3084  * via call to thread_clear_exec_promotion.
3085  */
3086 void
3087 thread_set_exec_promotion(thread_t thread)
3088 {
3089         spl_t s;
3090
3091         s = splsched();
3092         thread_lock(thread);
3093
3094         assert((thread->sched_flags & TH_SFLAG_EXEC_PROMOTED) == 0);
3095
3096         if (thread->sched_pri < EXEC_BOOST_PRIORITY ||
3097             !(thread->sched_flags & TH_SFLAG_EXEC_PROMOTED)) {
3098                 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_EXEC_PROMOTE) | DBG_FUNC_NONE,
3099                                       (uintptr_t)thread_tid(thread),
3100                                       thread->sched_pri, thread->base_pri,
3101                                       EXEC_BOOST_PRIORITY, 0);
3102                 thread->sched_flags |= TH_SFLAG_EXEC_PROMOTED;
3103                 if (thread->sched_pri < EXEC_BOOST_PRIORITY)
3104                         set_sched_pri(thread, EXEC_BOOST_PRIORITY);
3105         }
3106
3107         thread_unlock(thread);
3108         splx(s);
3109 }
3110
3111 /*
3112  * This function will clear the exec thread
3113  * promotion set on the thread by thread_set_exec_promotion.
3114  */
3115 void
3116 thread_clear_exec_promotion(thread_t thread)
3117 {
3118         spl_t s;
3119
3120         s = splsched();
3121         thread_lock(thread);
3122         assert(thread->sched_flags & TH_SFLAG_EXEC_PROMOTED);
3123
3124         if (thread->sched_flags & TH_SFLAG_EXEC_PROMOTED) {
3125                 thread->sched_flags &= ~TH_SFLAG_EXEC_PROMOTED;
3126
3127                 if (thread->sched_flags & TH_SFLAG_PROMOTED_MASK) {
3128                         /* it still has other promotions (mutex/rw_lock) */
3129                 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
3130                         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_EXEC_DEMOTE) | DBG_FUNC_NONE,
3131                                               (uintptr_t)thread_tid(thread),
3132                                               thread->sched_pri,
3133                                               thread->base_pri,
3134                                               DEPRESSPRI, 0);
3135                         set_sched_pri(thread, DEPRESSPRI);
3136                 } else {
3137                         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_EXEC_DEMOTE) | DBG_FUNC_NONE,
3138                                               (uintptr_t)thread_tid(thread),
3139                                               thread->sched_pri,
3140                                               thread->base_pri,
3141                                               thread->base_pri, 0);
3142                         thread_recompute_sched_pri(thread, FALSE);
3143                 }
3144         }
3145
3146         thread_unlock(thread);
3147         splx(s);
3148 }