osfmk/kern/thread_policy.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <mach/mach_types.h>
  30 #include <mach/thread_act_server.h>
  31
  32 #include <kern/kern_types.h>
  33 #include <kern/processor.h>
  34 #include <kern/thread.h>
  35 #include <kern/affinity.h>
  36 #include <mach/task_policy.h>
  37 #include <kern/sfi.h>
  38 #include <kern/policy_internal.h>
  39 #include <sys/errno.h>
  40 #include <sys/ulock.h>
  41
  42 #include <mach/machine/sdt.h>
  43
  44 #ifdef MACH_BSD
  45 extern int      proc_selfpid(void);
  46 extern char *   proc_name_address(void *p);
  47 extern void     rethrottle_thread(void * uthread);
  48 #endif /* MACH_BSD */
  49
  50 #define QOS_EXTRACT(q)        ((q) & 0xff)
  51
  52 uint32_t qos_override_mode;
  53 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
  54 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
  55 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
  56 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH 3
  57 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 4
  58
  59 extern zone_t thread_qos_override_zone;
  60
  61 static boolean_t
  62 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset, boolean_t squash);
  63
  64 /*
  65  * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
  66  * to threads that don't have a QoS class set.
  67  */
  68 const qos_policy_params_t thread_qos_policy_params = {
  69         /*
  70          * This table defines the starting base priority of the thread,
  71          * which will be modified by the thread importance and the task max priority
  72          * before being applied.
  73          */
  74         .qos_pri[THREAD_QOS_UNSPECIFIED]                = 0, /* not consulted */
  75         .qos_pri[THREAD_QOS_USER_INTERACTIVE]           = BASEPRI_BACKGROUND, /* i.e. 46 */
  76         .qos_pri[THREAD_QOS_USER_INITIATED]             = BASEPRI_USER_INITIATED,
  77         .qos_pri[THREAD_QOS_LEGACY]                     = BASEPRI_DEFAULT,
  78         .qos_pri[THREAD_QOS_UTILITY]                    = BASEPRI_UTILITY,
  79         .qos_pri[THREAD_QOS_BACKGROUND]                 = MAXPRI_THROTTLE,
  80         .qos_pri[THREAD_QOS_MAINTENANCE]                = MAXPRI_THROTTLE,
  81
  82         /*
  83          * This table defines the highest IO priority that a thread marked with this
  84          * QoS class can have.
  85          */
  86 #if CONFIG_EMBEDDED
  87         .qos_iotier[THREAD_QOS_UNSPECIFIED]             = THROTTLE_LEVEL_TIER0,
  88         .qos_iotier[THREAD_QOS_USER_INTERACTIVE]        = THROTTLE_LEVEL_TIER0,
  89         .qos_iotier[THREAD_QOS_USER_INITIATED]          = THROTTLE_LEVEL_TIER0,
  90         .qos_iotier[THREAD_QOS_LEGACY]                  = THROTTLE_LEVEL_TIER0,
  91         .qos_iotier[THREAD_QOS_UTILITY]                 = THROTTLE_LEVEL_TIER0,
  92         .qos_iotier[THREAD_QOS_BACKGROUND]              = THROTTLE_LEVEL_TIER3,
  93         .qos_iotier[THREAD_QOS_MAINTENANCE]             = THROTTLE_LEVEL_TIER3,
  94 #else
  95         .qos_iotier[THREAD_QOS_UNSPECIFIED]             = THROTTLE_LEVEL_TIER0,
  96         .qos_iotier[THREAD_QOS_USER_INTERACTIVE]        = THROTTLE_LEVEL_TIER0,
  97         .qos_iotier[THREAD_QOS_USER_INITIATED]          = THROTTLE_LEVEL_TIER0,
  98         .qos_iotier[THREAD_QOS_LEGACY]                  = THROTTLE_LEVEL_TIER0,
  99         .qos_iotier[THREAD_QOS_UTILITY]                 = THROTTLE_LEVEL_TIER1,
 100         .qos_iotier[THREAD_QOS_BACKGROUND]              = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
 101         .qos_iotier[THREAD_QOS_MAINTENANCE]             = THROTTLE_LEVEL_TIER3,
 102 #endif
 103
 104         /*
 105          * This table defines the highest QoS level that
 106          * a thread marked with this QoS class can have.
 107          */
 108
 109         .qos_through_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
 110         .qos_through_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
 111         .qos_through_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
 112         .qos_through_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
 113         .qos_through_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
 114         .qos_through_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
 115         .qos_through_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
 116
 117         .qos_latency_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
 118         .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(LATENCY_QOS_TIER_0),
 119         .qos_latency_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(LATENCY_QOS_TIER_1),
 120         .qos_latency_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(LATENCY_QOS_TIER_1),
 121         .qos_latency_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 122         .qos_latency_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 123         .qos_latency_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 124 };
 125
 126 static void
 127 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
 128
 129 static int
 130 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
 131
 132 static void
 133 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
 134
 135 static void
 136 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 137
 138 static void
 139 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 140
 141 static void
 142 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2);
 143
 144 static int
 145 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
 146
 147 static int
 148 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
 149
 150 static void
 151 thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
 152
 153 static void
 154 thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
 155
 156 void
 157 thread_policy_init(void) {
 158         if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
 159                 printf("QOS override mode: 0x%08x\n", qos_override_mode);
 160         } else {
 161                 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
 162         }
 163 }
 164
 165 boolean_t
 166 thread_has_qos_policy(thread_t thread) {
 167         return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
 168 }
 169
 170
 171 static void
 172 thread_remove_qos_policy_locked(thread_t thread,
 173                                 task_pend_token_t pend_token)
 174 {
 175
 176         __unused int prev_qos = thread->requested_policy.thrp_qos;
 177
 178         DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
 179
 180         proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
 181                                       THREAD_QOS_UNSPECIFIED, 0, pend_token);
 182 }
 183
 184 kern_return_t
 185 thread_remove_qos_policy(thread_t thread)
 186 {
 187         struct task_pend_token pend_token = {};
 188
 189         thread_mtx_lock(thread);
 190         if (!thread->active) {
 191                 thread_mtx_unlock(thread);
 192                 return KERN_TERMINATED;
 193         }
 194
 195         thread_remove_qos_policy_locked(thread, &pend_token);
 196
 197         thread_mtx_unlock(thread);
 198
 199         thread_policy_update_complete_unlocked(thread, &pend_token);
 200
 201         return KERN_SUCCESS;
 202 }
 203
 204
 205 boolean_t
 206 thread_is_static_param(thread_t thread)
 207 {
 208         if (thread->static_param) {
 209                 DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
 210                 return TRUE;
 211         }
 212         return FALSE;
 213 }
 214
 215 /*
 216  * Relative priorities can range between 0REL and -15REL. These
 217  * map to QoS-specific ranges, to create non-overlapping priority
 218  * ranges.
 219  */
 220 static int
 221 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
 222 {
 223         int next_lower_qos;
 224
 225         /* Fast path, since no validation or scaling is needed */
 226         if (qos_relprio == 0) return 0;
 227
 228         switch (qos) {
 229                 case THREAD_QOS_USER_INTERACTIVE:
 230                         next_lower_qos = THREAD_QOS_USER_INITIATED;
 231                         break;
 232                 case THREAD_QOS_USER_INITIATED:
 233                         next_lower_qos = THREAD_QOS_LEGACY;
 234                         break;
 235                 case THREAD_QOS_LEGACY:
 236                         next_lower_qos = THREAD_QOS_UTILITY;
 237                         break;
 238                 case THREAD_QOS_UTILITY:
 239                         next_lower_qos = THREAD_QOS_BACKGROUND;
 240                         break;
 241                 case THREAD_QOS_MAINTENANCE:
 242                 case THREAD_QOS_BACKGROUND:
 243                         next_lower_qos = 0;
 244                         break;
 245                 default:
 246                         panic("Unrecognized QoS %d", qos);
 247                         return 0;
 248         }
 249
 250         int prio_range_max = thread_qos_policy_params.qos_pri[qos];
 251         int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
 252
 253         /*
 254          * We now have the valid range that the scaled relative priority can map to. Note
 255          * that the lower bound is exclusive, but the upper bound is inclusive. If the
 256          * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
 257          * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
 258          * remainder.
 259          */
 260         int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
 261
 262         return scaled_relprio;
 263 }
 264
 265 /*
 266  * flag set by -qos-policy-allow boot-arg to allow
 267  * testing thread qos policy from userspace
 268  */
 269 boolean_t allow_qos_policy_set = FALSE;
 270
 271 kern_return_t
 272 thread_policy_set(
 273         thread_t                                thread,
 274         thread_policy_flavor_t  flavor,
 275         thread_policy_t                 policy_info,
 276         mach_msg_type_number_t  count)
 277 {
 278         thread_qos_policy_data_t req_qos;
 279         kern_return_t kr;
 280
 281         req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
 282
 283         if (thread == THREAD_NULL)
 284                 return (KERN_INVALID_ARGUMENT);
 285
 286         if (allow_qos_policy_set == FALSE) {
 287                 if (thread_is_static_param(thread))
 288                         return (KERN_POLICY_STATIC);
 289
 290                 if (flavor == THREAD_QOS_POLICY)
 291                         return (KERN_INVALID_ARGUMENT);
 292         }
 293
 294         /* Threads without static_param set reset their QoS when other policies are applied. */
 295         if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
 296                 /* Store the existing tier, if we fail this call it is used to reset back. */
 297                 req_qos.qos_tier = thread->requested_policy.thrp_qos;
 298                 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
 299
 300                 kr = thread_remove_qos_policy(thread);
 301                 if (kr != KERN_SUCCESS) {
 302                         return kr;
 303                 }
 304         }
 305
 306         kr = thread_policy_set_internal(thread, flavor, policy_info, count);
 307
 308         /* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */
 309         if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
 310                 if (kr != KERN_SUCCESS) {
 311                         /* Reset back to our original tier as the set failed. */
 312                         (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
 313                 }
 314         }
 315
 316         return kr;
 317 }
 318
 319 kern_return_t
 320 thread_policy_set_internal(
 321                            thread_t                     thread,
 322                            thread_policy_flavor_t       flavor,
 323                            thread_policy_t              policy_info,
 324                            mach_msg_type_number_t       count)
 325 {
 326         kern_return_t result = KERN_SUCCESS;
 327         struct task_pend_token pend_token = {};
 328
 329         thread_mtx_lock(thread);
 330         if (!thread->active) {
 331                 thread_mtx_unlock(thread);
 332
 333                 return (KERN_TERMINATED);
 334         }
 335
 336         switch (flavor) {
 337
 338         case THREAD_EXTENDED_POLICY:
 339         {
 340                 boolean_t timeshare = TRUE;
 341
 342                 if (count >= THREAD_EXTENDED_POLICY_COUNT) {
 343                         thread_extended_policy_t info;
 344
 345                         info = (thread_extended_policy_t)policy_info;
 346                         timeshare = info->timeshare;
 347                 }
 348
 349                 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
 350
 351                 spl_t s = splsched();
 352                 thread_lock(thread);
 353
 354                 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
 355
 356                 thread_unlock(thread);
 357                 splx(s);
 358
 359                 pend_token.tpt_update_thread_sfi = 1;
 360
 361                 break;
 362         }
 363
 364         case THREAD_TIME_CONSTRAINT_POLICY:
 365         {
 366                 thread_time_constraint_policy_t info;
 367
 368                 if (count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
 369                         result = KERN_INVALID_ARGUMENT;
 370                         break;
 371                 }
 372
 373                 info = (thread_time_constraint_policy_t)policy_info;
 374                 if (info->constraint  < info->computation   ||
 375                     info->computation > max_rt_quantum      ||
 376                     info->computation < min_rt_quantum      ) {
 377                         result = KERN_INVALID_ARGUMENT;
 378                         break;
 379                 }
 380
 381                 spl_t s = splsched();
 382                 thread_lock(thread);
 383
 384                 thread->realtime.period         = info->period;
 385                 thread->realtime.computation    = info->computation;
 386                 thread->realtime.constraint     = info->constraint;
 387                 thread->realtime.preemptible    = info->preemptible;
 388
 389                 thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
 390
 391                 thread_unlock(thread);
 392                 splx(s);
 393
 394                 pend_token.tpt_update_thread_sfi = 1;
 395
 396                 break;
 397         }
 398
 399         case THREAD_PRECEDENCE_POLICY:
 400         {
 401                 thread_precedence_policy_t info;
 402
 403                 if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
 404                         result = KERN_INVALID_ARGUMENT;
 405                         break;
 406                 }
 407                 info = (thread_precedence_policy_t)policy_info;
 408
 409                 spl_t s = splsched();
 410                 thread_lock(thread);
 411
 412                 thread->importance = info->importance;
 413
 414                 thread_recompute_priority(thread);
 415
 416                 thread_unlock(thread);
 417                 splx(s);
 418
 419                 break;
 420         }
 421
 422         case THREAD_AFFINITY_POLICY:
 423         {
 424                 thread_affinity_policy_t info;
 425
 426                 if (!thread_affinity_is_supported()) {
 427                         result = KERN_NOT_SUPPORTED;
 428                         break;
 429                 }
 430                 if (count < THREAD_AFFINITY_POLICY_COUNT) {
 431                         result = KERN_INVALID_ARGUMENT;
 432                         break;
 433                 }
 434
 435                 info = (thread_affinity_policy_t) policy_info;
 436                 /*
 437                  * Unlock the thread mutex here and
 438                  * return directly after calling thread_affinity_set().
 439                  * This is necessary for correct lock ordering because
 440                  * thread_affinity_set() takes the task lock.
 441                  */
 442                 thread_mtx_unlock(thread);
 443                 return thread_affinity_set(thread, info->affinity_tag);
 444         }
 445
 446 #if CONFIG_EMBEDDED
 447         case THREAD_BACKGROUND_POLICY:
 448         {
 449                 thread_background_policy_t info;
 450
 451                 if (count < THREAD_BACKGROUND_POLICY_COUNT) {
 452                         result = KERN_INVALID_ARGUMENT;
 453                         break;
 454                 }
 455
 456                 if (thread->task != current_task()) {
 457                         result = KERN_PROTECTION_FAILURE;
 458                         break;
 459                 }
 460
 461                 info = (thread_background_policy_t) policy_info;
 462
 463                 int enable;
 464
 465                 if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG)
 466                         enable = TASK_POLICY_ENABLE;
 467                 else
 468                         enable = TASK_POLICY_DISABLE;
 469
 470                 int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
 471
 472                 proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
 473
 474                 break;
 475         }
 476 #endif /* CONFIG_EMBEDDED */
 477
 478         case THREAD_THROUGHPUT_QOS_POLICY:
 479         {
 480                 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
 481                 thread_throughput_qos_t tqos;
 482
 483                 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
 484                         result = KERN_INVALID_ARGUMENT;
 485                         break;
 486                 }
 487
 488                 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS)
 489                         break;
 490
 491                 tqos = qos_extract(info->thread_throughput_qos_tier);
 492
 493                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
 494                                               TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
 495
 496                 break;
 497         }
 498
 499         case THREAD_LATENCY_QOS_POLICY:
 500         {
 501                 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
 502                 thread_latency_qos_t lqos;
 503
 504                 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
 505                         result = KERN_INVALID_ARGUMENT;
 506                         break;
 507                 }
 508
 509                 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS)
 510                         break;
 511
 512                 lqos = qos_extract(info->thread_latency_qos_tier);
 513
 514                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
 515                                               TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
 516
 517                 break;
 518         }
 519
 520         case THREAD_QOS_POLICY:
 521         {
 522                 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
 523
 524                 if (count < THREAD_QOS_POLICY_COUNT) {
 525                         result = KERN_INVALID_ARGUMENT;
 526                         break;
 527                 }
 528
 529                 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
 530                         result = KERN_INVALID_ARGUMENT;
 531                         break;
 532                 }
 533
 534                 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
 535                         result = KERN_INVALID_ARGUMENT;
 536                         break;
 537                 }
 538
 539                 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
 540                         result = KERN_INVALID_ARGUMENT;
 541                         break;
 542                 }
 543
 544                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
 545                                               info->qos_tier, -info->tier_importance, &pend_token);
 546
 547                 break;
 548         }
 549
 550         default:
 551                 result = KERN_INVALID_ARGUMENT;
 552                 break;
 553         }
 554
 555         thread_mtx_unlock(thread);
 556
 557         thread_policy_update_complete_unlocked(thread, &pend_token);
 558
 559         return (result);
 560 }
 561
 562 /*
 563  * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
 564  * Both result in FIXED mode scheduling.
 565  */
 566 static sched_mode_t
 567 convert_policy_to_sched_mode(integer_t policy) {
 568         switch (policy) {
 569                 case POLICY_TIMESHARE:
 570                         return TH_MODE_TIMESHARE;
 571                 case POLICY_RR:
 572                 case POLICY_FIFO:
 573                         return TH_MODE_FIXED;
 574                 default:
 575                         panic("unexpected sched policy: %d", policy);
 576                         return TH_MODE_NONE;
 577         }
 578 }
 579
 580 /*
 581  * Called either with the thread mutex locked
 582  * or from the pthread kext in a 'safe place'.
 583  */
 584 static kern_return_t
 585 thread_set_mode_and_absolute_pri_internal(thread_t              thread,
 586                                           sched_mode_t          mode,
 587                                           integer_t             priority,
 588                                           task_pend_token_t     pend_token)
 589 {
 590         kern_return_t kr = KERN_SUCCESS;
 591
 592         spl_t s = splsched();
 593         thread_lock(thread);
 594
 595         /* This path isn't allowed to change a thread out of realtime. */
 596         if ((thread->sched_mode == TH_MODE_REALTIME) ||
 597             (thread->saved_mode == TH_MODE_REALTIME)) {
 598                 kr = KERN_FAILURE;
 599                 goto unlock;
 600         }
 601
 602         if (thread->policy_reset) {
 603                 kr = KERN_SUCCESS;
 604                 goto unlock;
 605         }
 606
 607         sched_mode_t old_mode = thread->sched_mode;
 608
 609         /*
 610          * Reverse engineer and apply the correct importance value
 611          * from the requested absolute priority value.
 612          *
 613          * TODO: Store the absolute priority value instead
 614          */
 615
 616         if (priority >= thread->max_priority)
 617                 priority = thread->max_priority - thread->task_priority;
 618         else if (priority >= MINPRI_KERNEL)
 619                 priority -=  MINPRI_KERNEL;
 620         else if (priority >= MINPRI_RESERVED)
 621                 priority -=  MINPRI_RESERVED;
 622         else
 623                 priority -= BASEPRI_DEFAULT;
 624
 625         priority += thread->task_priority;
 626
 627         if (priority > thread->max_priority)
 628                 priority = thread->max_priority;
 629         else if (priority < MINPRI)
 630                 priority = MINPRI;
 631
 632         thread->importance = priority - thread->task_priority;
 633
 634         thread_set_user_sched_mode_and_recompute_pri(thread, mode);
 635
 636         if (mode != old_mode)
 637                 pend_token->tpt_update_thread_sfi = 1;
 638
 639 unlock:
 640         thread_unlock(thread);
 641         splx(s);
 642
 643         return kr;
 644 }
 645
 646 /*
 647  * KPI for pthread kext
 648  *
 649  * Set scheduling policy & absolute priority for thread
 650  * May be called from waitqueue callout context with spinlocks held
 651  * Thread mutex lock is not held
 652  */
 653 kern_return_t
 654 thread_set_workq_pri(thread_t  thread,
 655                      integer_t priority,
 656                      integer_t policy)
 657 {
 658         struct task_pend_token pend_token = {};
 659         sched_mode_t mode = convert_policy_to_sched_mode(policy);
 660
 661         assert(thread->static_param);
 662         if (!thread->static_param)
 663                 return KERN_FAILURE;
 664
 665         /* Concern: this doesn't hold the mutex... */
 666         if (!thread->active)
 667                 return KERN_TERMINATED;
 668
 669         kern_return_t kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
 670
 671         if (pend_token.tpt_update_thread_sfi)
 672                 sfi_reevaluate(thread);
 673
 674         return kr;
 675 }
 676
 677 /*
 678  * thread_set_mode_and_absolute_pri:
 679  *
 680  * Set scheduling policy & absolute priority for thread, for deprecated
 681  * thread_set_policy and thread_policy interfaces.
 682  *
 683  * Called with nothing locked.
 684  */
 685 kern_return_t
 686 thread_set_mode_and_absolute_pri(thread_t   thread,
 687                                  integer_t  policy,
 688                                  integer_t  priority)
 689 {
 690         kern_return_t kr = KERN_SUCCESS;
 691         struct task_pend_token pend_token = {};
 692
 693         sched_mode_t mode = convert_policy_to_sched_mode(policy);
 694
 695         thread_mtx_lock(thread);
 696
 697         if (!thread->active) {
 698                 kr = KERN_TERMINATED;
 699                 goto unlock;
 700         }
 701
 702         if (thread_is_static_param(thread)) {
 703                 kr = KERN_POLICY_STATIC;
 704                 goto unlock;
 705         }
 706
 707         /* Setting legacy policies on threads kills the current QoS */
 708         if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED)
 709                 thread_remove_qos_policy_locked(thread, &pend_token);
 710
 711         kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
 712
 713 unlock:
 714         thread_mtx_unlock(thread);
 715
 716         thread_policy_update_complete_unlocked(thread, &pend_token);
 717
 718         return (kr);
 719 }
 720
 721 /*
 722  * Set the thread's requested mode and recompute priority
 723  * Called with thread mutex and thread locked
 724  *
 725  * TODO: Mitigate potential problems caused by moving thread to end of runq
 726  * whenever its priority is recomputed
 727  *      Only remove when it actually changes? Attempt to re-insert at appropriate location?
 728  */
 729 static void
 730 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
 731 {
 732         if (thread->policy_reset)
 733                 return;
 734
 735         boolean_t removed = thread_run_queue_remove(thread);
 736
 737         /*
 738          * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
 739          * That way there's zero confusion over which the user wants
 740          * and which the kernel wants.
 741          */
 742         if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK)
 743                 thread->saved_mode = mode;
 744         else
 745                 sched_set_thread_mode(thread, mode);
 746
 747         thread_recompute_priority(thread);
 748
 749         if (removed)
 750                 thread_run_queue_reinsert(thread, SCHED_TAILQ);
 751 }
 752
 753 /* called at splsched with thread lock locked */
 754 static void
 755 thread_update_qos_cpu_time_locked(thread_t thread)
 756 {
 757         task_t task = thread->task;
 758         uint64_t timer_sum, timer_delta;
 759
 760         /*
 761          * This is only as accurate as the distance between
 762          * last context switch (embedded) or last user/kernel boundary transition (desktop)
 763          * because user_timer and system_timer are only updated then.
 764          *
 765          * TODO: Consider running a thread_timer_event operation here to update it first.
 766          *       Maybe doable with interrupts disabled from current thread.
 767          *       If the thread is on a different core, may not be easy to get right.
 768          *
 769          * TODO: There should be a function for this in timer.c
 770          */
 771
 772         timer_sum = timer_grab(&thread->user_timer);
 773         timer_sum += timer_grab(&thread->system_timer);
 774         timer_delta = timer_sum - thread->vtimer_qos_save;
 775
 776         thread->vtimer_qos_save = timer_sum;
 777
 778         uint64_t* task_counter = NULL;
 779
 780         /* Update the task-level qos stats atomically, because we don't have the task lock. */
 781         switch (thread->effective_policy.thep_qos) {
 782                 case THREAD_QOS_DEFAULT:            task_counter = &task->cpu_time_qos_stats.cpu_time_qos_default;          break;
 783                 case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_qos_stats.cpu_time_qos_maintenance;      break;
 784                 case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_qos_stats.cpu_time_qos_background;       break;
 785                 case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_qos_stats.cpu_time_qos_utility;          break;
 786                 case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_qos_stats.cpu_time_qos_legacy;           break;
 787                 case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_qos_stats.cpu_time_qos_user_initiated;   break;
 788                 case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_qos_stats.cpu_time_qos_user_interactive; break;
 789                 default:
 790                         panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
 791         }
 792
 793         OSAddAtomic64(timer_delta, task_counter);
 794 }
 795
 796 /*
 797  * called with no thread locks held
 798  * may hold task lock
 799  */
 800 void
 801 thread_update_qos_cpu_time(thread_t thread)
 802 {
 803         thread_mtx_lock(thread);
 804
 805         spl_t s = splsched();
 806         thread_lock(thread);
 807
 808         thread_update_qos_cpu_time_locked(thread);
 809
 810         thread_unlock(thread);
 811         splx(s);
 812
 813         thread_mtx_unlock(thread);
 814 }
 815
 816 /*
 817  * Calculate base priority from thread attributes, and set it on the thread
 818  *
 819  * Called with thread_lock and thread mutex held.
 820  */
 821 void
 822 thread_recompute_priority(
 823         thread_t                thread)
 824 {
 825         integer_t               priority;
 826
 827         if (thread->policy_reset)
 828                 return;
 829
 830         if (thread->sched_mode == TH_MODE_REALTIME) {
 831                 sched_set_thread_base_priority(thread, BASEPRI_RTQUEUES);
 832                 return;
 833         } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
 834                 int qos = thread->effective_policy.thep_qos;
 835                 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
 836                 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
 837                 int qos_scaled_relprio;
 838
 839                 assert(qos >= 0 && qos < THREAD_QOS_LAST);
 840                 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
 841
 842                 priority = thread_qos_policy_params.qos_pri[qos];
 843                 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
 844
 845                 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
 846                         /* Bump priority 46 to 47 when in a frontmost app */
 847                         qos_scaled_relprio += 1;
 848                 }
 849
 850                 /* TODO: factor in renice priority here? */
 851
 852                 priority += qos_scaled_relprio;
 853         } else {
 854                 if (thread->importance > MAXPRI)
 855                         priority = MAXPRI;
 856                 else if (thread->importance < -MAXPRI)
 857                         priority = -MAXPRI;
 858                 else
 859                         priority = thread->importance;
 860
 861                 priority += thread->task_priority;
 862         }
 863
 864         priority = MAX(priority, thread->user_promotion_basepri);
 865
 866         /*
 867          * Clamp priority back into the allowed range for this task.
 868          *  The initial priority value could be out of this range due to:
 869          *      Task clamped to BG or Utility (max-pri is 4, or 20)
 870          *      Task is user task (max-pri is 63)
 871          *      Task is kernel task (max-pri is 95)
 872          * Note that thread->importance is user-settable to any integer
 873          * via THREAD_PRECEDENCE_POLICY.
 874          */
 875         if (priority > thread->max_priority)
 876                 priority = thread->max_priority;
 877         else if (priority < MINPRI)
 878                 priority = MINPRI;
 879
 880         if (thread->saved_mode == TH_MODE_REALTIME &&
 881             thread->sched_flags & TH_SFLAG_FAILSAFE)
 882                 priority = DEPRESSPRI;
 883
 884         if (thread->effective_policy.thep_terminated == TRUE) {
 885                 /*
 886                  * We temporarily want to override the expected priority to
 887                  * ensure that the thread exits in a timely manner.
 888                  * Note that this is allowed to exceed thread->max_priority
 889                  * so that the thread is no longer clamped to background
 890                  * during the final exit phase.
 891                  */
 892                 if (priority < thread->task_priority)
 893                         priority = thread->task_priority;
 894                 if (priority < BASEPRI_DEFAULT)
 895                         priority = BASEPRI_DEFAULT;
 896         }
 897
 898 #if CONFIG_EMBEDDED
 899         /* No one can have a base priority less than MAXPRI_THROTTLE */
 900         if (priority < MAXPRI_THROTTLE)
 901                 priority = MAXPRI_THROTTLE;
 902 #endif /* CONFIG_EMBEDDED */
 903
 904         sched_set_thread_base_priority(thread, priority);
 905 }
 906
 907 /* Called with the task lock held, but not the thread mutex or spinlock */
 908 void
 909 thread_policy_update_tasklocked(
 910                                 thread_t           thread,
 911                                 integer_t          priority,
 912                                 integer_t          max_priority,
 913                                 task_pend_token_t  pend_token)
 914 {
 915         thread_mtx_lock(thread);
 916
 917         if (!thread->active || thread->policy_reset) {
 918                 thread_mtx_unlock(thread);
 919                 return;
 920         }
 921
 922         spl_t s = splsched();
 923         thread_lock(thread);
 924
 925         __unused
 926         integer_t old_max_priority = thread->max_priority;
 927
 928         thread->task_priority = priority;
 929         thread->max_priority = max_priority;
 930
 931 #if CONFIG_EMBEDDED
 932         /*
 933          * When backgrounding a thread, iOS has the semantic that
 934          * realtime and fixed priority threads should be demoted
 935          * to timeshare background threads.
 936          *
 937          * On OSX, realtime and fixed priority threads don't lose their mode.
 938          *
 939          * TODO: Do this inside the thread policy update routine in order to avoid double
 940          * remove/reinsert for a runnable thread
 941          */
 942         if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
 943                 sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
 944         } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
 945                 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
 946         }
 947 #endif /* CONFIG_EMBEDDED */
 948
 949         thread_policy_update_spinlocked(thread, TRUE, pend_token);
 950
 951         thread_unlock(thread);
 952         splx(s);
 953
 954         thread_mtx_unlock(thread);
 955 }
 956
 957 /*
 958  * Reset thread to default state in preparation for termination
 959  * Called with thread mutex locked
 960  *
 961  * Always called on current thread, so we don't need a run queue remove
 962  */
 963 void
 964 thread_policy_reset(
 965         thread_t                thread)
 966 {
 967         spl_t           s;
 968
 969         assert(thread == current_thread());
 970
 971         s = splsched();
 972         thread_lock(thread);
 973
 974         if (thread->sched_flags & TH_SFLAG_FAILSAFE)
 975                 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
 976
 977         if (thread->sched_flags & TH_SFLAG_THROTTLED)
 978                 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
 979
 980         /* At this point, the various demotions should be inactive */
 981         assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
 982         assert(!(thread->sched_flags & TH_SFLAG_THROTTLED));
 983         assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
 984
 985         /* Reset thread back to task-default basepri and mode  */
 986         sched_mode_t newmode = SCHED(initial_thread_sched_mode)(thread->task);
 987
 988         sched_set_thread_mode(thread, newmode);
 989
 990         thread->importance = 0;
 991
 992         /* Prevent further changes to thread base priority or mode */
 993         thread->policy_reset = 1;
 994
 995         sched_set_thread_base_priority(thread, thread->task_priority);
 996
 997         thread_unlock(thread);
 998         splx(s);
 999 }
1000
1001 kern_return_t
1002 thread_policy_get(
1003         thread_t                                thread,
1004         thread_policy_flavor_t  flavor,
1005         thread_policy_t                 policy_info,
1006         mach_msg_type_number_t  *count,
1007         boolean_t                               *get_default)
1008 {
1009         kern_return_t                   result = KERN_SUCCESS;
1010
1011         if (thread == THREAD_NULL)
1012                 return (KERN_INVALID_ARGUMENT);
1013
1014         thread_mtx_lock(thread);
1015         if (!thread->active) {
1016                 thread_mtx_unlock(thread);
1017
1018                 return (KERN_TERMINATED);
1019         }
1020
1021         switch (flavor) {
1022
1023         case THREAD_EXTENDED_POLICY:
1024         {
1025                 boolean_t               timeshare = TRUE;
1026
1027                 if (!(*get_default)) {
1028                         spl_t s = splsched();
1029                         thread_lock(thread);
1030
1031                         if (     (thread->sched_mode != TH_MODE_REALTIME)       &&
1032                                          (thread->saved_mode != TH_MODE_REALTIME)                       ) {
1033                                 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK))
1034                                         timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1035                                 else
1036                                         timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1037                         }
1038                         else
1039                                 *get_default = TRUE;
1040
1041                         thread_unlock(thread);
1042                         splx(s);
1043                 }
1044
1045                 if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1046                         thread_extended_policy_t        info;
1047
1048                         info = (thread_extended_policy_t)policy_info;
1049                         info->timeshare = timeshare;
1050                 }
1051
1052                 break;
1053         }
1054
1055         case THREAD_TIME_CONSTRAINT_POLICY:
1056         {
1057                 thread_time_constraint_policy_t         info;
1058
1059                 if (*count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
1060                         result = KERN_INVALID_ARGUMENT;
1061                         break;
1062                 }
1063
1064                 info = (thread_time_constraint_policy_t)policy_info;
1065
1066                 if (!(*get_default)) {
1067                         spl_t s = splsched();
1068                         thread_lock(thread);
1069
1070                         if (    (thread->sched_mode == TH_MODE_REALTIME)        ||
1071                                         (thread->saved_mode == TH_MODE_REALTIME)                ) {
1072                                 info->period = thread->realtime.period;
1073                                 info->computation = thread->realtime.computation;
1074                                 info->constraint = thread->realtime.constraint;
1075                                 info->preemptible = thread->realtime.preemptible;
1076                         }
1077                         else
1078                                 *get_default = TRUE;
1079
1080                         thread_unlock(thread);
1081                         splx(s);
1082                 }
1083
1084                 if (*get_default) {
1085                         info->period = 0;
1086                         info->computation = default_timeshare_computation;
1087                         info->constraint = default_timeshare_constraint;
1088                         info->preemptible = TRUE;
1089                 }
1090
1091                 break;
1092         }
1093
1094         case THREAD_PRECEDENCE_POLICY:
1095         {
1096                 thread_precedence_policy_t              info;
1097
1098                 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1099                         result = KERN_INVALID_ARGUMENT;
1100                         break;
1101                 }
1102
1103                 info = (thread_precedence_policy_t)policy_info;
1104
1105                 if (!(*get_default)) {
1106                         spl_t s = splsched();
1107                         thread_lock(thread);
1108
1109                         info->importance = thread->importance;
1110
1111                         thread_unlock(thread);
1112                         splx(s);
1113                 }
1114                 else
1115                         info->importance = 0;
1116
1117                 break;
1118         }
1119
1120         case THREAD_AFFINITY_POLICY:
1121         {
1122                 thread_affinity_policy_t                info;
1123
1124                 if (!thread_affinity_is_supported()) {
1125                         result = KERN_NOT_SUPPORTED;
1126                         break;
1127                 }
1128                 if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1129                         result = KERN_INVALID_ARGUMENT;
1130                         break;
1131                 }
1132
1133                 info = (thread_affinity_policy_t)policy_info;
1134
1135                 if (!(*get_default))
1136                         info->affinity_tag = thread_affinity_get(thread);
1137                 else
1138                         info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1139
1140                 break;
1141         }
1142
1143         case THREAD_POLICY_STATE:
1144         {
1145                 thread_policy_state_t           info;
1146
1147                 if (*count < THREAD_POLICY_STATE_COUNT) {
1148                         result = KERN_INVALID_ARGUMENT;
1149                         break;
1150                 }
1151
1152                 /* Only root can get this info */
1153                 if (current_task()->sec_token.val[0] != 0) {
1154                         result = KERN_PROTECTION_FAILURE;
1155                         break;
1156                 }
1157
1158                 info = (thread_policy_state_t)(void*)policy_info;
1159
1160                 if (!(*get_default)) {
1161                         info->flags = 0;
1162
1163                         spl_t s = splsched();
1164                         thread_lock(thread);
1165
1166                         info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1167
1168                         info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1169                         info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1170
1171                         info->thps_user_promotions          = thread->user_promotions;
1172                         info->thps_user_promotion_basepri   = thread->user_promotion_basepri;
1173                         info->thps_ipc_overrides            = thread->ipc_overrides;
1174
1175                         proc_get_thread_policy_bitfield(thread, info);
1176
1177                         thread_unlock(thread);
1178                         splx(s);
1179                 } else {
1180                         info->requested = 0;
1181                         info->effective = 0;
1182                         info->pending = 0;
1183                 }
1184
1185                 break;
1186         }
1187
1188         case THREAD_LATENCY_QOS_POLICY:
1189         {
1190                 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1191                 thread_latency_qos_t plqos;
1192
1193                 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1194                         result = KERN_INVALID_ARGUMENT;
1195                         break;
1196                 }
1197
1198                 if (*get_default) {
1199                         plqos = 0;
1200                 } else {
1201                         plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1202                 }
1203
1204                 info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1205         }
1206         break;
1207
1208         case THREAD_THROUGHPUT_QOS_POLICY:
1209         {
1210                 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1211                 thread_throughput_qos_t ptqos;
1212
1213                 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1214                         result = KERN_INVALID_ARGUMENT;
1215                         break;
1216                 }
1217
1218                 if (*get_default) {
1219                         ptqos = 0;
1220                 } else {
1221                         ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1222                 }
1223
1224                 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1225         }
1226         break;
1227
1228         case THREAD_QOS_POLICY:
1229         {
1230                 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1231
1232                 if (*count < THREAD_QOS_POLICY_COUNT) {
1233                         result = KERN_INVALID_ARGUMENT;
1234                         break;
1235                 }
1236
1237                 if (!(*get_default)) {
1238                         int relprio_value = 0;
1239                         info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1240                                                                        TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1241
1242                         info->tier_importance = -relprio_value;
1243                 } else {
1244                         info->qos_tier = THREAD_QOS_UNSPECIFIED;
1245                         info->tier_importance = 0;
1246                 }
1247
1248                 break;
1249         }
1250
1251         default:
1252                 result = KERN_INVALID_ARGUMENT;
1253                 break;
1254         }
1255
1256         thread_mtx_unlock(thread);
1257
1258         return (result);
1259 }
1260
1261 void
1262 thread_policy_create(thread_t thread)
1263 {
1264         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1265                                   (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1266                                   thread_tid(thread), theffective_0(thread),
1267                                   theffective_1(thread), thread->base_pri, 0);
1268
1269         /* We pass a pend token but ignore it */
1270         struct task_pend_token pend_token = {};
1271
1272         thread_policy_update_internal_spinlocked(thread, TRUE, &pend_token);
1273
1274         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1275                                   (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1276                                   thread_tid(thread), theffective_0(thread),
1277                                   theffective_1(thread), thread->base_pri, 0);
1278 }
1279
1280 static void
1281 thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token)
1282 {
1283         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1284                                   (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1285                                   thread_tid(thread), theffective_0(thread),
1286                                   theffective_1(thread), thread->base_pri, 0);
1287
1288         thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1289
1290         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1291                                   (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1292                                   thread_tid(thread), theffective_0(thread),
1293                                   theffective_1(thread), thread->base_pri, 0);
1294 }
1295
1296
1297
1298 /*
1299  * One thread state update function TO RULE THEM ALL
1300  *
1301  * This function updates the thread effective policy fields
1302  * and pushes the results to the relevant subsystems.
1303  *
1304  * Returns TRUE if a pended action needs to be run.
1305  *
1306  * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1307  */
1308 static void
1309 thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority,
1310                                          task_pend_token_t pend_token)
1311 {
1312         /*
1313          * Step 1:
1314          *  Gather requested policy and effective task state
1315          */
1316
1317         struct thread_requested_policy requested = thread->requested_policy;
1318         struct task_effective_policy task_effective = thread->task->effective_policy;
1319
1320         /*
1321          * Step 2:
1322          *  Calculate new effective policies from requested policy, task and thread state
1323          *  Rules:
1324          *      Don't change requested, it won't take effect
1325          */
1326
1327         struct thread_effective_policy next = {};
1328
1329         next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1330
1331         uint32_t next_qos = requested.thrp_qos;
1332
1333         if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1334                 if (requested.thrp_qos_override != THREAD_QOS_UNSPECIFIED)
1335                         next_qos = MAX(requested.thrp_qos_override, next_qos);
1336
1337                 if (requested.thrp_qos_promote != THREAD_QOS_UNSPECIFIED)
1338                         next_qos = MAX(requested.thrp_qos_promote, next_qos);
1339
1340                 if (requested.thrp_qos_ipc_override != THREAD_QOS_UNSPECIFIED)
1341                         next_qos = MAX(requested.thrp_qos_ipc_override, next_qos);
1342         }
1343
1344         next.thep_qos = next_qos;
1345
1346         /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1347         if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1348                 if (next.thep_qos != THREAD_QOS_UNSPECIFIED)
1349                         next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1350                 else
1351                         next.thep_qos = task_effective.tep_qos_clamp;
1352         }
1353
1354         /*
1355          * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1356          * This allows QoS promotions to work properly even after the process is unclamped.
1357          */
1358         next.thep_qos_promote = next.thep_qos;
1359
1360         /* The ceiling only applies to threads that are in the QoS world */
1361         if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1362             next.thep_qos                  != THREAD_QOS_UNSPECIFIED) {
1363                 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1364         }
1365
1366         /* Apply the sync ipc qos override */
1367         if (requested.thrp_qos_sync_ipc_override != THREAD_QOS_UNSPECIFIED)
1368                 next.thep_qos = MAX(requested.thrp_qos_sync_ipc_override, next.thep_qos);
1369
1370         /*
1371          * The QoS relative priority is only applicable when the original programmer's
1372          * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1373          * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1374          * since otherwise it would be lower than unclamped threads. Similarly, in the
1375          * presence of boosting, the programmer doesn't know what other actors
1376          * are boosting the thread.
1377          */
1378         if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1379             (requested.thrp_qos == next.thep_qos) &&
1380             (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1381                 next.thep_qos_relprio = requested.thrp_qos_relprio;
1382         } else {
1383                 next.thep_qos_relprio = 0;
1384         }
1385
1386         /* Calculate DARWIN_BG */
1387         boolean_t wants_darwinbg        = FALSE;
1388         boolean_t wants_all_sockets_bg  = FALSE; /* Do I want my existing sockets to be bg */
1389
1390         /*
1391          * If DARWIN_BG has been requested at either level, it's engaged.
1392          * darwinbg threads always create bg sockets,
1393          * but only some types of darwinbg change the sockets
1394          * after they're created
1395          */
1396         if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg)
1397                 wants_all_sockets_bg = wants_darwinbg = TRUE;
1398
1399         if (requested.thrp_pidbind_bg)
1400                 wants_all_sockets_bg = wants_darwinbg = TRUE;
1401
1402         if (task_effective.tep_darwinbg)
1403                 wants_darwinbg = TRUE;
1404
1405         if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1406             next.thep_qos == THREAD_QOS_MAINTENANCE)
1407                 wants_darwinbg = TRUE;
1408
1409         /* Calculate side effects of DARWIN_BG */
1410
1411         if (wants_darwinbg)
1412                 next.thep_darwinbg = 1;
1413
1414         if (next.thep_darwinbg || task_effective.tep_new_sockets_bg)
1415                 next.thep_new_sockets_bg = 1;
1416
1417         /* Don't use task_effective.tep_all_sockets_bg here */
1418         if (wants_all_sockets_bg)
1419                 next.thep_all_sockets_bg = 1;
1420
1421         /* darwinbg implies background QOS (or lower) */
1422         if (next.thep_darwinbg &&
1423             (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1424                 next.thep_qos = THREAD_QOS_BACKGROUND;
1425                 next.thep_qos_relprio = 0;
1426         }
1427
1428         /* Calculate IO policy */
1429
1430         int iopol = THROTTLE_LEVEL_TIER0;
1431
1432         /* Factor in the task's IO policy */
1433         if (next.thep_darwinbg)
1434                 iopol = MAX(iopol, task_effective.tep_bg_iotier);
1435
1436         iopol = MAX(iopol, task_effective.tep_io_tier);
1437
1438         /* Look up the associated IO tier value for the QoS class */
1439         iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1440
1441         iopol = MAX(iopol, requested.thrp_int_iotier);
1442         iopol = MAX(iopol, requested.thrp_ext_iotier);
1443
1444         next.thep_io_tier = iopol;
1445
1446         /*
1447          * If a QoS override is causing IO to go into a lower tier, we also set
1448          * the passive bit so that a thread doesn't end up stuck in its own throttle
1449          * window when the override goes away.
1450          */
1451         boolean_t qos_io_override_active = FALSE;
1452         if (thread_qos_policy_params.qos_iotier[next.thep_qos] <
1453             thread_qos_policy_params.qos_iotier[requested.thrp_qos])
1454                 qos_io_override_active = TRUE;
1455
1456         /* Calculate Passive IO policy */
1457         if (requested.thrp_ext_iopassive    ||
1458             requested.thrp_int_iopassive    ||
1459             qos_io_override_active          ||
1460             task_effective.tep_io_passive   )
1461                 next.thep_io_passive = 1;
1462
1463         /* Calculate timer QOS */
1464         uint32_t latency_qos = requested.thrp_latency_qos;
1465
1466         latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1467         latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1468
1469         next.thep_latency_qos = latency_qos;
1470
1471         /* Calculate throughput QOS */
1472         uint32_t through_qos = requested.thrp_through_qos;
1473
1474         through_qos = MAX(through_qos, task_effective.tep_through_qos);
1475         through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1476
1477         next.thep_through_qos = through_qos;
1478
1479         if (task_effective.tep_terminated || requested.thrp_terminated) {
1480                 /* Shoot down the throttles that slow down exit or response to SIGTERM */
1481                 next.thep_terminated    = 1;
1482                 next.thep_darwinbg      = 0;
1483                 next.thep_io_tier       = THROTTLE_LEVEL_TIER0;
1484                 next.thep_qos           = THREAD_QOS_UNSPECIFIED;
1485                 next.thep_latency_qos   = LATENCY_QOS_TIER_UNSPECIFIED;
1486                 next.thep_through_qos   = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1487         }
1488
1489         /*
1490          * Step 3:
1491          *  Swap out old policy for new policy
1492          */
1493
1494         struct thread_effective_policy prev = thread->effective_policy;
1495
1496         thread_update_qos_cpu_time_locked(thread);
1497
1498         /* This is the point where the new values become visible to other threads */
1499         thread->effective_policy = next;
1500
1501         /*
1502          * Step 4:
1503          *  Pend updates that can't be done while holding the thread lock
1504          */
1505
1506         if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg)
1507                 pend_token->tpt_update_sockets = 1;
1508
1509         /* TODO: Doesn't this only need to be done if the throttle went up? */
1510         if (prev.thep_io_tier != next.thep_io_tier)
1511                 pend_token->tpt_update_throttle = 1;
1512
1513         /*
1514          * Check for the attributes that sfi_thread_classify() consults,
1515          *  and trigger SFI re-evaluation.
1516          */
1517         if (prev.thep_qos      != next.thep_qos         ||
1518             prev.thep_darwinbg != next.thep_darwinbg    )
1519                 pend_token->tpt_update_thread_sfi = 1;
1520
1521         /*
1522          * Step 5:
1523          *  Update other subsystems as necessary if something has changed
1524          */
1525
1526         /* Check for the attributes that thread_recompute_priority() consults */
1527         if (prev.thep_qos               != next.thep_qos                ||
1528             prev.thep_qos_relprio       != next.thep_qos_relprio        ||
1529             prev.thep_qos_ui_is_urgent  != next.thep_qos_ui_is_urgent   ||
1530             prev.thep_terminated        != next.thep_terminated         ||
1531             pend_token->tpt_force_recompute_pri == 1                    ||
1532             recompute_priority) {
1533                 thread_recompute_priority(thread);
1534         }
1535 }
1536
1537
1538 /*
1539  * Initiate a thread policy state transition on a thread with its TID
1540  * Useful if you cannot guarantee the thread won't get terminated
1541  * Precondition: No locks are held
1542  * Will take task lock - using the non-tid variant is faster
1543  * if you already have a thread ref.
1544  */
1545 void
1546 proc_set_thread_policy_with_tid(task_t     task,
1547                                 uint64_t   tid,
1548                                 int        category,
1549                                 int        flavor,
1550                                 int        value)
1551 {
1552         /* takes task lock, returns ref'ed thread or NULL */
1553         thread_t thread = task_findtid(task, tid);
1554
1555         if (thread == THREAD_NULL)
1556                 return;
1557
1558         proc_set_thread_policy(thread, category, flavor, value);
1559
1560         thread_deallocate(thread);
1561 }
1562
1563 /*
1564  * Initiate a thread policy transition on a thread
1565  * This path supports networking transitions (i.e. darwinbg transitions)
1566  * Precondition: No locks are held
1567  */
1568 void
1569 proc_set_thread_policy(thread_t   thread,
1570                        int        category,
1571                        int        flavor,
1572                        int        value)
1573 {
1574         struct task_pend_token pend_token = {};
1575
1576         thread_mtx_lock(thread);
1577
1578         proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
1579
1580         thread_mtx_unlock(thread);
1581
1582         thread_policy_update_complete_unlocked(thread, &pend_token);
1583 }
1584
1585 /*
1586  * KPI for pthread kext to call to set thread base QoS values during a workq wakeup
1587  * May be called with interrupts disabled and workqueue/waitqueue/kqueue locks held
1588  *
1589  * Does NOT do update completion, so the thread MUST be in a safe place WRT
1590  * IO throttling and SFI.
1591  *
1592  * TODO: Can I assert 'it must be in a safe place'?
1593  */
1594 kern_return_t
1595 thread_set_workq_qos(thread_t   thread,
1596                      int        qos_tier,
1597                      int        relprio) /* relprio is -16 to 0 */
1598 {
1599         assert(qos_tier >= 0 && qos_tier <= THREAD_QOS_LAST);
1600         assert(relprio  <= 0 && relprio  >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1601
1602         if (!(qos_tier >= 0 && qos_tier <= THREAD_QOS_LAST))
1603                 return KERN_FAILURE;
1604         if (!(relprio  <= 0 && relprio  >= THREAD_QOS_MIN_TIER_IMPORTANCE))
1605                 return KERN_FAILURE;
1606
1607         if (qos_tier == THREAD_QOS_UNSPECIFIED) {
1608                 assert(relprio == 0);
1609                 if (relprio != 0)
1610                         return KERN_FAILURE;
1611         }
1612
1613         assert(thread->static_param);
1614         if (!thread->static_param) {
1615                 return KERN_FAILURE;
1616         }
1617
1618         /* Concern: this doesn't hold the mutex... */
1619         //if (!thread->active)
1620         //      return KERN_TERMINATED;
1621
1622         struct task_pend_token pend_token = {};
1623
1624         proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO, qos_tier, -relprio, &pend_token);
1625
1626         assert(pend_token.tpt_update_sockets == 0);
1627         /* we don't need to update throttle or sfi because pthread kext promises the thread is in a safe place */
1628         /* TODO: Do we need to update SFI to ensure it gets tagged with the AST? */
1629
1630         return KERN_SUCCESS;
1631 }
1632
1633
1634 /*
1635  * Do the things that can't be done while holding a thread mutex.
1636  * These are set up to call back into thread policy to get the latest value,
1637  * so they don't have to be synchronized with the update.
1638  * The only required semantic is 'call this sometime after updating effective policy'
1639  *
1640  * Precondition: Thread mutex is not held
1641  *
1642  * This may be called with the task lock held, but in that case it won't be
1643  * called with tpt_update_sockets set.
1644  */
1645 void
1646 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1647 {
1648 #ifdef MACH_BSD
1649         if (pend_token->tpt_update_sockets)
1650                 proc_apply_task_networkbg(thread->task->bsd_info, thread);
1651 #endif /* MACH_BSD */
1652
1653         if (pend_token->tpt_update_throttle)
1654                 rethrottle_thread(thread->uthread);
1655
1656         if (pend_token->tpt_update_thread_sfi)
1657                 sfi_reevaluate(thread);
1658 }
1659
1660 /*
1661  * Set and update thread policy
1662  * Thread mutex might be held
1663  */
1664 static void
1665 proc_set_thread_policy_locked(thread_t          thread,
1666                               int               category,
1667                               int               flavor,
1668                               int               value,
1669                               int               value2,
1670                               task_pend_token_t pend_token)
1671 {
1672         spl_t s = splsched();
1673         thread_lock(thread);
1674
1675         proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1676
1677         thread_unlock(thread);
1678         splx(s);
1679 }
1680
1681 /*
1682  * Set and update thread policy
1683  * Thread spinlock is held
1684  */
1685 static void
1686 proc_set_thread_policy_spinlocked(thread_t          thread,
1687                                   int               category,
1688                                   int               flavor,
1689                                   int               value,
1690                                   int               value2,
1691                                   task_pend_token_t pend_token)
1692 {
1693         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1694                                   (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1695                                   thread_tid(thread), threquested_0(thread),
1696                                   threquested_1(thread), value, 0);
1697
1698         thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2);
1699
1700         thread_policy_update_spinlocked(thread, FALSE, pend_token);
1701
1702         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1703                                   (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1704                                   thread_tid(thread), threquested_0(thread),
1705                                   threquested_1(thread), tpending(pend_token), 0);
1706 }
1707
1708 /*
1709  * Set the requested state for a specific flavor to a specific value.
1710  */
1711 static void
1712 thread_set_requested_policy_spinlocked(thread_t     thread,
1713                                        int          category,
1714                                        int          flavor,
1715                                        int          value,
1716                                        int          value2)
1717 {
1718         int tier, passive;
1719
1720         struct thread_requested_policy requested = thread->requested_policy;
1721
1722         switch (flavor) {
1723
1724         /* Category: EXTERNAL and INTERNAL, thread and task */
1725
1726                 case TASK_POLICY_DARWIN_BG:
1727                         if (category == TASK_POLICY_EXTERNAL)
1728                                 requested.thrp_ext_darwinbg = value;
1729                         else
1730                                 requested.thrp_int_darwinbg = value;
1731                         break;
1732
1733                 case TASK_POLICY_IOPOL:
1734                         proc_iopol_to_tier(value, &tier, &passive);
1735                         if (category == TASK_POLICY_EXTERNAL) {
1736                                 requested.thrp_ext_iotier  = tier;
1737                                 requested.thrp_ext_iopassive = passive;
1738                         } else {
1739                                 requested.thrp_int_iotier  = tier;
1740                                 requested.thrp_int_iopassive = passive;
1741                         }
1742                         break;
1743
1744                 case TASK_POLICY_IO:
1745                         if (category == TASK_POLICY_EXTERNAL)
1746                                 requested.thrp_ext_iotier = value;
1747                         else
1748                                 requested.thrp_int_iotier = value;
1749                         break;
1750
1751                 case TASK_POLICY_PASSIVE_IO:
1752                         if (category == TASK_POLICY_EXTERNAL)
1753                                 requested.thrp_ext_iopassive = value;
1754                         else
1755                                 requested.thrp_int_iopassive = value;
1756                         break;
1757
1758         /* Category: ATTRIBUTE, thread only */
1759
1760                 case TASK_POLICY_PIDBIND_BG:
1761                         assert(category == TASK_POLICY_ATTRIBUTE);
1762                         requested.thrp_pidbind_bg = value;
1763                         break;
1764
1765                 case TASK_POLICY_LATENCY_QOS:
1766                         assert(category == TASK_POLICY_ATTRIBUTE);
1767                         requested.thrp_latency_qos = value;
1768                         break;
1769
1770                 case TASK_POLICY_THROUGH_QOS:
1771                         assert(category == TASK_POLICY_ATTRIBUTE);
1772                         requested.thrp_through_qos = value;
1773                         break;
1774
1775                 case TASK_POLICY_QOS:
1776                         assert(category == TASK_POLICY_ATTRIBUTE);
1777                         requested.thrp_qos = value;
1778                         break;
1779
1780                 case TASK_POLICY_QOS_OVERRIDE:
1781                         assert(category == TASK_POLICY_ATTRIBUTE);
1782                         requested.thrp_qos_override = value;
1783                         break;
1784
1785                 case TASK_POLICY_QOS_AND_RELPRIO:
1786                         assert(category == TASK_POLICY_ATTRIBUTE);
1787                         requested.thrp_qos = value;
1788                         requested.thrp_qos_relprio = value2;
1789                         DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
1790                         break;
1791
1792                 case TASK_POLICY_QOS_PROMOTE:
1793                         assert(category == TASK_POLICY_ATTRIBUTE);
1794                         requested.thrp_qos_promote = value;
1795                         break;
1796
1797                 case TASK_POLICY_QOS_IPC_OVERRIDE:
1798                         assert(category == TASK_POLICY_ATTRIBUTE);
1799                         requested.thrp_qos_ipc_override = value;
1800                         break;
1801
1802                 case TASK_POLICY_QOS_SYNC_IPC_OVERRIDE:
1803                         assert(category == TASK_POLICY_ATTRIBUTE);
1804                         requested.thrp_qos_sync_ipc_override = value;
1805                         break;
1806
1807                 case TASK_POLICY_TERMINATED:
1808                         assert(category == TASK_POLICY_ATTRIBUTE);
1809                         requested.thrp_terminated = value;
1810                         break;
1811
1812                 default:
1813                         panic("unknown task policy: %d %d %d", category, flavor, value);
1814                         break;
1815         }
1816
1817         thread->requested_policy = requested;
1818 }
1819
1820 /*
1821  * Gets what you set. Effective values may be different.
1822  * Precondition: No locks are held
1823  */
1824 int
1825 proc_get_thread_policy(thread_t   thread,
1826                        int        category,
1827                        int        flavor)
1828 {
1829         int value = 0;
1830         thread_mtx_lock(thread);
1831         value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
1832         thread_mtx_unlock(thread);
1833         return value;
1834 }
1835
1836 static int
1837 proc_get_thread_policy_locked(thread_t   thread,
1838                               int        category,
1839                               int        flavor,
1840                               int*       value2)
1841 {
1842         int value = 0;
1843
1844         spl_t s = splsched();
1845         thread_lock(thread);
1846
1847         value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
1848
1849         thread_unlock(thread);
1850         splx(s);
1851
1852         return value;
1853 }
1854
1855 /*
1856  * Gets what you set. Effective values may be different.
1857  */
1858 static int
1859 thread_get_requested_policy_spinlocked(thread_t thread,
1860                                        int      category,
1861                                        int      flavor,
1862                                        int*     value2)
1863 {
1864         int value = 0;
1865
1866         struct thread_requested_policy requested = thread->requested_policy;
1867
1868         switch (flavor) {
1869                 case TASK_POLICY_DARWIN_BG:
1870                         if (category == TASK_POLICY_EXTERNAL)
1871                                 value = requested.thrp_ext_darwinbg;
1872                         else
1873                                 value = requested.thrp_int_darwinbg;
1874                         break;
1875                 case TASK_POLICY_IOPOL:
1876                         if (category == TASK_POLICY_EXTERNAL)
1877                                 value = proc_tier_to_iopol(requested.thrp_ext_iotier,
1878                                                            requested.thrp_ext_iopassive);
1879                         else
1880                                 value = proc_tier_to_iopol(requested.thrp_int_iotier,
1881                                                            requested.thrp_int_iopassive);
1882                         break;
1883                 case TASK_POLICY_IO:
1884                         if (category == TASK_POLICY_EXTERNAL)
1885                                 value = requested.thrp_ext_iotier;
1886                         else
1887                                 value = requested.thrp_int_iotier;
1888                         break;
1889                 case TASK_POLICY_PASSIVE_IO:
1890                         if (category == TASK_POLICY_EXTERNAL)
1891                                 value = requested.thrp_ext_iopassive;
1892                         else
1893                                 value = requested.thrp_int_iopassive;
1894                         break;
1895                 case TASK_POLICY_QOS:
1896                         assert(category == TASK_POLICY_ATTRIBUTE);
1897                         value = requested.thrp_qos;
1898                         break;
1899                 case TASK_POLICY_QOS_OVERRIDE:
1900                         assert(category == TASK_POLICY_ATTRIBUTE);
1901                         value = requested.thrp_qos_override;
1902                         break;
1903                 case TASK_POLICY_LATENCY_QOS:
1904                         assert(category == TASK_POLICY_ATTRIBUTE);
1905                         value = requested.thrp_latency_qos;
1906                         break;
1907                 case TASK_POLICY_THROUGH_QOS:
1908                         assert(category == TASK_POLICY_ATTRIBUTE);
1909                         value = requested.thrp_through_qos;
1910                         break;
1911                 case TASK_POLICY_QOS_AND_RELPRIO:
1912                         assert(category == TASK_POLICY_ATTRIBUTE);
1913                         assert(value2 != NULL);
1914                         value = requested.thrp_qos;
1915                         *value2 = requested.thrp_qos_relprio;
1916                         break;
1917                 case TASK_POLICY_QOS_PROMOTE:
1918                         assert(category == TASK_POLICY_ATTRIBUTE);
1919                         value = requested.thrp_qos_promote;
1920                         break;
1921                 case TASK_POLICY_QOS_IPC_OVERRIDE:
1922                         assert(category == TASK_POLICY_ATTRIBUTE);
1923                         value = requested.thrp_qos_ipc_override;
1924                         break;
1925                 case TASK_POLICY_TERMINATED:
1926                         assert(category == TASK_POLICY_ATTRIBUTE);
1927                         value = requested.thrp_terminated;
1928                         break;
1929
1930                 default:
1931                         panic("unknown policy_flavor %d", flavor);
1932                         break;
1933         }
1934
1935         return value;
1936 }
1937
1938 /*
1939  * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
1940  *
1941  * NOTE: This accessor does not take the task or thread lock.
1942  * Notifications of state updates need to be externally synchronized with state queries.
1943  * This routine *MUST* remain interrupt safe, as it is potentially invoked
1944  * within the context of a timer interrupt.
1945  *
1946  * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
1947  *      Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
1948  *      I don't think that cost is worth not having the right answer.
1949  */
1950 int
1951 proc_get_effective_thread_policy(thread_t thread,
1952                                  int      flavor)
1953 {
1954         int value = 0;
1955
1956         switch (flavor) {
1957                 case TASK_POLICY_DARWIN_BG:
1958                         /*
1959                          * This call is used within the timer layer, as well as
1960                          * prioritizing requests to the graphics system.
1961                          * It also informs SFI and originator-bg-state.
1962                          * Returns 1 for background mode, 0 for normal mode
1963                          */
1964
1965                         value = thread->effective_policy.thep_darwinbg ? 1 : 0;
1966                         break;
1967                 case TASK_POLICY_IO:
1968                         /*
1969                          * The I/O system calls here to find out what throttling tier to apply to an operation.
1970                          * Returns THROTTLE_LEVEL_* values
1971                          */
1972                         value = thread->effective_policy.thep_io_tier;
1973                         if (thread->iotier_override != THROTTLE_LEVEL_NONE)
1974                                 value = MIN(value, thread->iotier_override);
1975                         break;
1976                 case TASK_POLICY_PASSIVE_IO:
1977                         /*
1978                          * The I/O system calls here to find out whether an operation should be passive.
1979                          * (i.e. not cause operations with lower throttle tiers to be throttled)
1980                          * Returns 1 for passive mode, 0 for normal mode
1981                          *
1982                          * If an override is causing IO to go into a lower tier, we also set
1983                          * the passive bit so that a thread doesn't end up stuck in its own throttle
1984                          * window when the override goes away.
1985                          */
1986                         value = thread->effective_policy.thep_io_passive ? 1 : 0;
1987                         if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
1988                             thread->iotier_override < thread->effective_policy.thep_io_tier)
1989                                 value = 1;
1990                         break;
1991                 case TASK_POLICY_ALL_SOCKETS_BG:
1992                         /*
1993                          * do_background_socket() calls this to determine whether
1994                          * it should change the thread's sockets
1995                          * Returns 1 for background mode, 0 for normal mode
1996                          * This consults both thread and task so un-DBGing a thread while the task is BG
1997                          * doesn't get you out of the network throttle.
1998                          */
1999                         value = (thread->effective_policy.thep_all_sockets_bg ||
2000                                  thread->task->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2001                         break;
2002                 case TASK_POLICY_NEW_SOCKETS_BG:
2003                         /*
2004                          * socreate() calls this to determine if it should mark a new socket as background
2005                          * Returns 1 for background mode, 0 for normal mode
2006                          */
2007                         value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2008                         break;
2009                 case TASK_POLICY_LATENCY_QOS:
2010                         /*
2011                          * timer arming calls into here to find out the timer coalescing level
2012                          * Returns a latency QoS tier (0-6)
2013                          */
2014                         value = thread->effective_policy.thep_latency_qos;
2015                         break;
2016                 case TASK_POLICY_THROUGH_QOS:
2017                         /*
2018                          * This value is passed into the urgency callout from the scheduler
2019                          * to the performance management subsystem.
2020                          *
2021                          * Returns a throughput QoS tier (0-6)
2022                          */
2023                         value = thread->effective_policy.thep_through_qos;
2024                         break;
2025                 case TASK_POLICY_QOS:
2026                         /*
2027                          * This is communicated to the performance management layer and SFI.
2028                          *
2029                          * Returns a QoS policy tier
2030                          */
2031                         value = thread->effective_policy.thep_qos;
2032                         break;
2033                 default:
2034                         panic("unknown thread policy flavor %d", flavor);
2035                         break;
2036         }
2037
2038         return value;
2039 }
2040
2041
2042 /*
2043  * (integer_t) casts limit the number of bits we can fit here
2044  * this interface is deprecated and replaced by the _EXT struct ?
2045  */
2046 static void
2047 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2048 {
2049         uint64_t bits = 0;
2050         struct thread_requested_policy requested = thread->requested_policy;
2051
2052         bits |= (requested.thrp_int_darwinbg    ? POLICY_REQ_INT_DARWIN_BG  : 0);
2053         bits |= (requested.thrp_ext_darwinbg    ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2054         bits |= (requested.thrp_int_iotier      ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2055         bits |= (requested.thrp_ext_iotier      ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2056         bits |= (requested.thrp_int_iopassive   ? POLICY_REQ_INT_PASSIVE_IO : 0);
2057         bits |= (requested.thrp_ext_iopassive   ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2058
2059         bits |= (requested.thrp_qos             ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2060         bits |= (requested.thrp_qos_override    ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT)   : 0);
2061
2062         bits |= (requested.thrp_pidbind_bg      ? POLICY_REQ_PIDBIND_BG     : 0);
2063
2064         bits |= (requested.thrp_latency_qos     ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2065         bits |= (requested.thrp_through_qos     ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2066
2067         info->requested = (integer_t) bits;
2068         bits = 0;
2069
2070         struct thread_effective_policy effective = thread->effective_policy;
2071
2072         bits |= (effective.thep_darwinbg        ? POLICY_EFF_DARWIN_BG      : 0);
2073
2074         bits |= (effective.thep_io_tier         ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2075         bits |= (effective.thep_io_passive      ? POLICY_EFF_IO_PASSIVE     : 0);
2076         bits |= (effective.thep_all_sockets_bg  ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2077         bits |= (effective.thep_new_sockets_bg  ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2078
2079         bits |= (effective.thep_qos             ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2080
2081         bits |= (effective.thep_latency_qos     ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2082         bits |= (effective.thep_through_qos     ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2083
2084         info->effective = (integer_t)bits;
2085         bits = 0;
2086
2087         info->pending = 0;
2088 }
2089
2090 /*
2091  * Sneakily trace either the task and thread requested
2092  * or just the thread requested, depending on if we have enough room.
2093  * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2094  *
2095  *                                LP32            LP64
2096  * threquested_0(thread)          thread[0]       task[0]
2097  * threquested_1(thread)          thread[1]       thread[0]
2098  *
2099  */
2100
2101 uintptr_t
2102 threquested_0(thread_t thread)
2103 {
2104         static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2105
2106         uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2107
2108         return raw[0];
2109 }
2110
2111 uintptr_t
2112 threquested_1(thread_t thread)
2113 {
2114 #if defined __LP64__
2115         return *(uintptr_t*)&thread->task->requested_policy;
2116 #else
2117         uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2118         return raw[1];
2119 #endif
2120 }
2121
2122 uintptr_t
2123 theffective_0(thread_t thread)
2124 {
2125         static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2126
2127         uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2128         return raw[0];
2129 }
2130
2131 uintptr_t
2132 theffective_1(thread_t thread)
2133 {
2134 #if defined __LP64__
2135         return *(uintptr_t*)&thread->task->effective_policy;
2136 #else
2137         uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2138         return raw[1];
2139 #endif
2140 }
2141
2142
2143 /*
2144  * Set an override on the thread which is consulted with a
2145  * higher priority than the task/thread policy. This should
2146  * only be set for temporary grants until the thread
2147  * returns to the userspace boundary
2148  *
2149  * We use atomic operations to swap in the override, with
2150  * the assumption that the thread itself can
2151  * read the override and clear it on return to userspace.
2152  *
2153  * No locking is performed, since it is acceptable to see
2154  * a stale override for one loop through throttle_lowpri_io().
2155  * However a thread reference must be held on the thread.
2156  */
2157
2158 void set_thread_iotier_override(thread_t thread, int policy)
2159 {
2160         int current_override;
2161
2162         /* Let most aggressive I/O policy win until user boundary */
2163         do {
2164                 current_override = thread->iotier_override;
2165
2166                 if (current_override != THROTTLE_LEVEL_NONE)
2167                         policy = MIN(current_override, policy);
2168
2169                 if (current_override == policy) {
2170                         /* no effective change */
2171                         return;
2172                 }
2173         } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2174
2175         /*
2176          * Since the thread may be currently throttled,
2177          * re-evaluate tiers and potentially break out
2178          * of an msleep
2179          */
2180         rethrottle_thread(thread->uthread);
2181 }
2182
2183 /*
2184  * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2185  * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2186  * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2187  * priority thread. In these cases, we attempt to propagate the priority token, as long
2188  * as the subsystem informs us of the relationships between the threads. The userspace
2189  * synchronization subsystem should maintain the information of owner->resource and
2190  * resource->waiters itself.
2191  */
2192
2193 /*
2194  * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2195  * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2196  * to be handled specially in the future, but for now it's fine to slam
2197  * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2198  */
2199 static void canonicalize_resource_and_type(user_addr_t *resource, int *resource_type) {
2200         if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2201                 /* Map all input resource/type to a single one */
2202                 *resource = USER_ADDR_NULL;
2203                 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2204         } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2205                 /* no transform */
2206         } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH) {
2207                 /* Map all dispatch overrides to a single one, to avoid memory overhead */
2208                 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE) {
2209                         *resource = USER_ADDR_NULL;
2210                 }
2211         } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2212                 /* Map all mutex overrides to a single one, to avoid memory overhead */
2213                 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2214                         *resource = USER_ADDR_NULL;
2215                 }
2216         }
2217 }
2218
2219 /* This helper routine finds an existing override if known. Locking should be done by caller */
2220 static struct thread_qos_override *
2221 find_qos_override(thread_t thread,
2222                   user_addr_t resource,
2223                   int resource_type)
2224 {
2225         struct thread_qos_override *override;
2226
2227         override = thread->overrides;
2228         while (override) {
2229                 if (override->override_resource == resource &&
2230                     override->override_resource_type == resource_type) {
2231                         return override;
2232                 }
2233
2234                 override = override->override_next;
2235         }
2236
2237         return NULL;
2238 }
2239
2240 static void
2241 find_and_decrement_qos_override(thread_t       thread,
2242                                 user_addr_t    resource,
2243                                 int            resource_type,
2244                                 boolean_t      reset,
2245                                 struct thread_qos_override **free_override_list)
2246 {
2247         struct thread_qos_override *override, *override_prev;
2248
2249         override_prev = NULL;
2250         override = thread->overrides;
2251         while (override) {
2252                 struct thread_qos_override *override_next = override->override_next;
2253
2254                 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource  || override->override_resource == resource) &&
2255                     (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2256
2257                         if (reset) {
2258                                 override->override_contended_resource_count = 0;
2259                         } else {
2260                                 override->override_contended_resource_count--;
2261                         }
2262
2263                         if (override->override_contended_resource_count == 0) {
2264                                 if (override_prev == NULL) {
2265                                         thread->overrides = override_next;
2266                                 } else {
2267                                         override_prev->override_next = override_next;
2268                                 }
2269
2270                                 /* Add to out-param for later zfree */
2271                                 override->override_next = *free_override_list;
2272                                 *free_override_list = override;
2273                         } else {
2274                                 override_prev = override;
2275                         }
2276
2277                         if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2278                                 return;
2279                         }
2280                 } else {
2281                         override_prev = override;
2282                 }
2283
2284                 override = override_next;
2285         }
2286 }
2287
2288 /* This helper recalculates the current requested override using the policy selected at boot */
2289 static int
2290 calculate_requested_qos_override(thread_t thread)
2291 {
2292         if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2293                 return THREAD_QOS_UNSPECIFIED;
2294         }
2295
2296         /* iterate over all overrides and calculate MAX */
2297         struct thread_qos_override *override;
2298         int qos_override = THREAD_QOS_UNSPECIFIED;
2299
2300         override = thread->overrides;
2301         while (override) {
2302                 if (qos_override_mode != QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH ||
2303                         override->override_resource_type != THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE) {
2304                         qos_override = MAX(qos_override, override->override_qos);
2305                 }
2306
2307                 override = override->override_next;
2308         }
2309
2310         return qos_override;
2311 }
2312
2313 /*
2314  * Returns:
2315  * - 0 on success
2316  * - EINVAL if some invalid input was passed
2317  * - EFAULT if user_lock_addr != NULL and needs to be faulted (userland has to
2318  *   fault and retry)
2319  * - ESTALE if user_lock_addr != NULL &&
2320  *   ulock_owner_value_to_port_name(*user_lock_addr) != user_lock_owner
2321  */
2322 static int
2323 proc_thread_qos_add_override_internal(thread_t         thread,
2324                                       int              override_qos,
2325                                       boolean_t        first_override_for_resource,
2326                                       user_addr_t      resource,
2327                                       int              resource_type,
2328                                       user_addr_t      user_lock_addr,
2329                                       mach_port_name_t user_lock_owner)
2330 {
2331         struct task_pend_token pend_token = {};
2332         int rc = 0;
2333
2334         thread_mtx_lock(thread);
2335
2336         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2337                                                   thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2338
2339         DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2340                         uint64_t, thread->requested_policy.thrp_qos,
2341                         uint64_t, thread->effective_policy.thep_qos,
2342                         int, override_qos, boolean_t, first_override_for_resource);
2343
2344         struct thread_qos_override *override;
2345         struct thread_qos_override *override_new = NULL;
2346         int new_qos_override, prev_qos_override;
2347         int new_effective_qos;
2348
2349         canonicalize_resource_and_type(&resource, &resource_type);
2350
2351         override = find_qos_override(thread, resource, resource_type);
2352         if (first_override_for_resource && !override) {
2353                 /* We need to allocate a new object. Drop the thread lock and
2354                  * recheck afterwards in case someone else added the override
2355                  */
2356                 thread_mtx_unlock(thread);
2357                 override_new = zalloc(thread_qos_override_zone);
2358                 thread_mtx_lock(thread);
2359                 override = find_qos_override(thread, resource, resource_type);
2360         }
2361         if (user_lock_addr) {
2362                 uint64_t val;
2363                 /* Workaround lack of explicit support for 'no-fault copyin'
2364                  * <rdar://problem/24999882>, as disabling preemption prevents paging in
2365                  */
2366                 disable_preemption();
2367                 rc = copyin_word(user_lock_addr, &val, sizeof(user_lock_owner));
2368                 enable_preemption();
2369                 if (rc == 0 && ulock_owner_value_to_port_name((uint32_t)val) != user_lock_owner) {
2370                         rc = ESTALE;
2371                 }
2372                 if (rc) {
2373                         prev_qos_override = proc_get_thread_policy_locked(thread,
2374                                         TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2375                         new_qos_override = prev_qos_override;
2376                         new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2377                         thread_mtx_unlock(thread);
2378                         goto out;
2379                 }
2380         }
2381         if (first_override_for_resource && override) {
2382                 /* Someone else already allocated while the thread lock was dropped */
2383                 override->override_contended_resource_count++;
2384         } else if (!override && override_new) {
2385                 override = override_new;
2386                 override_new = NULL;
2387                 override->override_next = thread->overrides;
2388                 /* since first_override_for_resource was TRUE */
2389                 override->override_contended_resource_count = 1;
2390                 override->override_resource = resource;
2391                 override->override_resource_type = resource_type;
2392                 override->override_qos = THREAD_QOS_UNSPECIFIED;
2393                 thread->overrides = override;
2394         }
2395
2396         if (override) {
2397                 if (override->override_qos == THREAD_QOS_UNSPECIFIED)
2398                         override->override_qos = override_qos;
2399                 else
2400                         override->override_qos = MAX(override->override_qos, override_qos);
2401         }
2402
2403         /* Determine how to combine the various overrides into a single current
2404          * requested override
2405          */
2406         new_qos_override = calculate_requested_qos_override(thread);
2407
2408         prev_qos_override = proc_get_thread_policy_locked(thread,
2409                         TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2410
2411         if (new_qos_override != prev_qos_override) {
2412                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2413                                               TASK_POLICY_QOS_OVERRIDE,
2414                                               new_qos_override, 0, &pend_token);
2415         }
2416
2417         new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2418
2419         thread_mtx_unlock(thread);
2420
2421         thread_policy_update_complete_unlocked(thread, &pend_token);
2422
2423 out:
2424         if (override_new) {
2425                 zfree(thread_qos_override_zone, override_new);
2426         }
2427
2428         DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2429                       int, new_qos_override, int, new_effective_qos, int, rc);
2430
2431         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2432                                                   new_qos_override, resource, resource_type, 0, 0);
2433
2434         return rc;
2435 }
2436
2437 int
2438 proc_thread_qos_add_override_check_owner(thread_t thread,
2439                                          int override_qos,
2440                                          boolean_t first_override_for_resource,
2441                                          user_addr_t resource,
2442                                          int resource_type,
2443                                          user_addr_t user_lock_addr,
2444                                          mach_port_name_t user_lock_owner)
2445 {
2446         return proc_thread_qos_add_override_internal(thread, override_qos,
2447                         first_override_for_resource, resource, resource_type,
2448                         user_lock_addr, user_lock_owner);
2449 }
2450
2451 boolean_t
2452 proc_thread_qos_add_override(task_t           task,
2453                              thread_t         thread,
2454                              uint64_t         tid,
2455                              int              override_qos,
2456                              boolean_t        first_override_for_resource,
2457                              user_addr_t      resource,
2458                              int              resource_type)
2459 {
2460         boolean_t has_thread_reference = FALSE;
2461         int rc = 0;
2462
2463         if (thread == THREAD_NULL) {
2464                 thread = task_findtid(task, tid);
2465                 /* returns referenced thread */
2466
2467                 if (thread == THREAD_NULL) {
2468                         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2469                                                                   tid, 0, 0xdead, 0, 0);
2470                         return FALSE;
2471                 }
2472                 has_thread_reference = TRUE;
2473         } else {
2474                 assert(thread->task == task);
2475         }
2476         rc = proc_thread_qos_add_override_internal(thread, override_qos,
2477                         first_override_for_resource, resource, resource_type, 0, 0);
2478         if (has_thread_reference) {
2479                 thread_deallocate(thread);
2480         }
2481
2482         return rc == 0;
2483 }
2484
2485 static int
2486 proc_thread_qos_remove_override_internal(thread_t       thread,
2487                                          user_addr_t    resource,
2488                                          int            resource_type,
2489                                          boolean_t      reset,
2490                                          boolean_t      squash)
2491 {
2492         struct task_pend_token pend_token = {};
2493
2494         struct thread_qos_override *deferred_free_override_list = NULL;
2495         int new_qos_override, prev_qos_override, new_effective_qos, prev_qos;
2496         int new_qos = THREAD_QOS_UNSPECIFIED;
2497
2498         thread_mtx_lock(thread);
2499
2500         canonicalize_resource_and_type(&resource, &resource_type);
2501
2502         find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2503
2504         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2505                               thread_tid(thread), resource, reset, 0, 0);
2506
2507         DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2508                         uint64_t, thread->requested_policy.thrp_qos,
2509                         uint64_t, thread->effective_policy.thep_qos);
2510
2511         /* Determine how to combine the various overrides into a single current requested override */
2512         new_qos_override = calculate_requested_qos_override(thread);
2513
2514         spl_t s = splsched();
2515         thread_lock(thread);
2516
2517         /*
2518          * The override chain and therefore the value of the current override is locked with thread mutex,
2519          * so we can do a get/set without races.  However, the rest of thread policy is locked under the spinlock.
2520          * This means you can't change the current override from a spinlock-only setter.
2521          */
2522         prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2523
2524         if (squash) {
2525                 int prev_ipc_override;
2526                 int prev_override;
2527
2528                 /*
2529                  * Remove the specified overrides, and set the current override as the new base QoS.
2530                  * Return the new QoS value.
2531                  */
2532                 prev_ipc_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_IPC_OVERRIDE, NULL);
2533                 prev_override = MAX(prev_qos_override, prev_ipc_override);
2534
2535                 prev_qos = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS, NULL);
2536
2537                 new_qos = MAX(prev_qos, prev_override);
2538                 if (new_qos != prev_qos)
2539                         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS, new_qos, 0, &pend_token);
2540         }
2541
2542         if (new_qos_override != prev_qos_override)
2543                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2544
2545         new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2546
2547         thread_unlock(thread);
2548         splx(s);
2549
2550         thread_mtx_unlock(thread);
2551
2552         thread_policy_update_complete_unlocked(thread, &pend_token);
2553
2554         while (deferred_free_override_list) {
2555                 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2556
2557                 zfree(thread_qos_override_zone, deferred_free_override_list);
2558                 deferred_free_override_list = override_next;
2559         }
2560
2561         DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2562                       int, new_qos_override, int, new_effective_qos);
2563
2564         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2565                               thread_tid(thread), squash, 0, 0, 0);
2566
2567         return new_qos;
2568 }
2569
2570 boolean_t
2571 proc_thread_qos_remove_override(task_t      task,
2572                                 thread_t    thread,
2573                                 uint64_t    tid,
2574                                 user_addr_t resource,
2575                                 int         resource_type)
2576 {
2577         boolean_t has_thread_reference = FALSE;
2578
2579         if (thread == THREAD_NULL) {
2580                 thread = task_findtid(task, tid);
2581                 /* returns referenced thread */
2582
2583                 if (thread == THREAD_NULL) {
2584                         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2585                                               tid, 0, 0xdead, 0, 0);
2586                         return FALSE;
2587                 }
2588                 has_thread_reference = TRUE;
2589         } else {
2590                 assert(task == thread->task);
2591         }
2592
2593         proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE, FALSE);
2594
2595         if (has_thread_reference)
2596                 thread_deallocate(thread);
2597
2598         return TRUE;
2599 }
2600
2601 boolean_t
2602 proc_thread_qos_reset_override(task_t       task,
2603                                thread_t     thread,
2604                                uint64_t     tid,
2605                                user_addr_t  resource,
2606                                int          resource_type)
2607
2608 {
2609         boolean_t has_thread_reference = FALSE;
2610
2611         if (thread == THREAD_NULL) {
2612                 thread = task_findtid(task, tid);
2613                 /* returns referenced thread */
2614
2615                 if (thread == THREAD_NULL) {
2616                         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2617                                               tid, 0, 0xdead, 0, 0);
2618                         return FALSE;
2619                 }
2620                 has_thread_reference = TRUE;
2621         } else {
2622                 assert(task == thread->task);
2623         }
2624
2625         proc_thread_qos_remove_override_internal(thread, resource, resource_type, TRUE, FALSE);
2626
2627         if (has_thread_reference)
2628                 thread_deallocate(thread);
2629
2630         return TRUE;
2631 }
2632
2633 /*
2634  * Clears the requested overrides, and replaces the current QoS with the max
2635  * of the current QoS and the current override, then returns the new QoS.
2636  *
2637  * This is useful in order to reset overrides before parking a workqueue thread,
2638  * but avoid dropping priority and getting preempted right before parking.
2639  *
2640  * Called without any locks held.
2641  */
2642 int
2643 proc_thread_qos_squash_override(thread_t thread, user_addr_t resource, int resource_type)
2644 {
2645         return proc_thread_qos_remove_override_internal(thread, resource, resource_type, TRUE, TRUE);
2646 }
2647
2648 /* Deallocate before thread termination */
2649 void proc_thread_qos_deallocate(thread_t thread)
2650 {
2651         /*
2652          * There are no more references to this thread,
2653          * therefore this thread must not own any more locks,
2654          * therefore there must not be any more user promotions.
2655          */
2656         assert(thread->user_promotions == 0);
2657         assert(thread->requested_policy.thrp_qos_promote == THREAD_QOS_UNSPECIFIED);
2658         assert(thread->user_promotion_basepri == 0);
2659
2660         /* This thread must have no more IPC overrides. */
2661         assert(thread->ipc_overrides == 0);
2662         assert(thread->requested_policy.thrp_qos_ipc_override == THREAD_QOS_UNSPECIFIED);
2663         assert(thread->sync_ipc_overrides == 0);
2664         assert(thread->requested_policy.thrp_qos_sync_ipc_override == THREAD_QOS_UNSPECIFIED);
2665
2666         /*
2667          * Clear out any lingering override objects.
2668          */
2669         struct thread_qos_override *override;
2670
2671         thread_mtx_lock(thread);
2672         override = thread->overrides;
2673         thread->overrides = NULL;
2674         thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2675         /* We don't need to re-evaluate thread policy here because the thread has already exited */
2676         thread_mtx_unlock(thread);
2677
2678         while (override) {
2679                 struct thread_qos_override *override_next = override->override_next;
2680
2681                 zfree(thread_qos_override_zone, override);
2682                 override = override_next;
2683         }
2684 }
2685
2686 /*
2687  * Set up the primordial thread's QoS
2688  */
2689 void
2690 task_set_main_thread_qos(task_t task, thread_t thread) {
2691         struct task_pend_token pend_token = {};
2692
2693         assert(thread->task == task);
2694
2695         thread_mtx_lock(thread);
2696
2697         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2698                                   (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2699                                   thread_tid(thread), threquested_0(thread), threquested_1(thread),
2700                                   thread->requested_policy.thrp_qos, 0);
2701
2702         int primordial_qos = task_compute_main_thread_qos(task);
2703
2704         proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS,
2705                                       primordial_qos, 0, &pend_token);
2706
2707         thread_mtx_unlock(thread);
2708
2709         thread_policy_update_complete_unlocked(thread, &pend_token);
2710
2711         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2712                                   (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2713                                   thread_tid(thread), threquested_0(thread), threquested_1(thread),
2714                                   primordial_qos, 0);
2715 }
2716
2717 /*
2718  * KPI for pthread kext
2719  *
2720  * Return a good guess at what the initial manager QoS will be
2721  * Dispatch can override this in userspace if it so chooses
2722  */
2723 int
2724 task_get_default_manager_qos(task_t task)
2725 {
2726         int primordial_qos = task_compute_main_thread_qos(task);
2727
2728         if (primordial_qos == THREAD_QOS_LEGACY)
2729                 primordial_qos = THREAD_QOS_USER_INITIATED;
2730
2731         return primordial_qos;
2732 }
2733
2734
2735 /*
2736  * Promote thread with the user level properties of 'promoter'
2737  * Mutexes may be held, but it's OK to take the throttle lock
2738  *
2739  * if 'new_promotion' is TRUE, this is a new promotion.
2740  * if FALSE, we are updating an existing promotion.
2741  */
2742 static void
2743 thread_user_promotion_promote(thread_t  thread,
2744                               thread_t  promoter,
2745                               struct promote_token* promote_token,
2746                               boolean_t new_promotion)
2747 {
2748         struct task_pend_token pend_token = {};
2749
2750         uint32_t promoter_base_pri = 0, promoter_qos = THREAD_QOS_UNSPECIFIED;
2751
2752         spl_t s = splsched();
2753         thread_lock(promoter);
2754
2755         /*
2756          * We capture the 'promotion qos' here, which is captured
2757          * before task-level clamping.
2758          *
2759          * This means that if the process gets unclamped while a promotion,
2760          * is in effect, the owning thread ends up with the correct QoS.
2761          *
2762          * This does NOT work correctly across processes, as the correct QoS
2763          * in one is not necessarily the correct QoS in another.
2764          * When we add support for multi-process ulock boosting, we need to
2765          * do something more complex.
2766          */
2767         promoter_qos = promoter->effective_policy.thep_qos_promote;
2768
2769         /* TODO: extract 'effective unclamped base pri' instead */
2770         promoter_base_pri = promoter->base_pri;
2771
2772         thread_unlock(promoter);
2773         splx(s);
2774
2775         /* clamp out realtime to max user pri */
2776         promoter_base_pri = MIN(promoter_base_pri, MAXPRI_USER);
2777
2778         /* add in the saved promotion token */
2779         assert(promote_token->pt_basepri <= MAXPRI_USER);
2780
2781         promoter_base_pri = MAX(promoter_base_pri, promote_token->pt_basepri);
2782         promoter_qos = MAX(promoter_qos, promote_token->pt_qos);
2783
2784         /* save the max for later */
2785         promote_token->pt_basepri = promoter_base_pri;
2786         promote_token->pt_qos = promoter_qos;
2787
2788         s = splsched();
2789         thread_lock(thread);
2790
2791         if (new_promotion) {
2792                 if (thread->user_promotions == 0) {
2793                         assert(thread->requested_policy.thrp_qos_promote == THREAD_QOS_UNSPECIFIED);
2794                         assert(thread->user_promotion_basepri == 0);
2795                 }
2796
2797                 thread->user_promotions++;
2798         } else {
2799                 assert(thread->user_promotions > 0);
2800         }
2801
2802         uint32_t thread_qos     = thread->requested_policy.thrp_qos_promote;
2803         uint32_t thread_basepri = thread->user_promotion_basepri;
2804
2805         uint32_t new_qos     = MAX(thread_qos, promoter_qos);
2806         uint32_t new_basepri = MAX(thread_basepri, promoter_base_pri);
2807
2808         /* TODO: Fast path the 'new is lower than effective' case to avoid full reevaluation */
2809         if (thread_qos != new_qos || thread_basepri != new_basepri) {
2810
2811                 thread->user_promotion_basepri = new_basepri;
2812
2813                 pend_token.tpt_force_recompute_pri = 1;
2814
2815                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2816                                                   TASK_POLICY_QOS_PROMOTE, new_qos,
2817                                                   0, &pend_token);
2818         }
2819
2820         thread_unlock(thread);
2821         splx(s);
2822
2823         thread_policy_update_complete_unlocked(thread, &pend_token);
2824 }
2825
2826 /* Add a user promotion to thread */
2827 void
2828 thread_user_promotion_add(thread_t thread,
2829                           thread_t promoter,
2830                           struct promote_token* promote_token)
2831 {
2832         thread_user_promotion_promote(thread, promoter, promote_token, TRUE);
2833 }
2834
2835 /* Update an existing user promotion on thread */
2836 void
2837 thread_user_promotion_update(thread_t thread,
2838                              thread_t promoter,
2839                              struct promote_token* promote_token)
2840 {
2841         thread_user_promotion_promote(thread, promoter, promote_token, FALSE);
2842 }
2843
2844 /*
2845  * Drop a user promotion on thread
2846  * Mutexes may be held, but it's OK to take the throttle lock
2847  */
2848 void
2849 thread_user_promotion_drop(thread_t thread)
2850 {
2851         struct task_pend_token pend_token = {};
2852
2853         spl_t s = splsched();
2854         thread_lock(thread);
2855
2856         assert(thread->user_promotions > 0);
2857
2858         if (--thread->user_promotions == 0) {
2859                 thread->requested_policy.thrp_qos_promote = THREAD_QOS_UNSPECIFIED;
2860                 thread->user_promotion_basepri = 0;
2861
2862                 pend_token.tpt_force_recompute_pri = 1;
2863
2864                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2865                                                   TASK_POLICY_QOS_PROMOTE, THREAD_QOS_UNSPECIFIED,
2866                                                   0, &pend_token);
2867         }
2868
2869         thread_unlock(thread);
2870         splx(s);
2871
2872         thread_policy_update_complete_unlocked(thread, &pend_token);
2873 }
2874
2875
2876 /*
2877  * Set the thread's QoS IPC override
2878  * Owned by the IPC subsystem
2879  *
2880  * May be called with spinlocks held, but not spinlocks
2881  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
2882  *
2883  * One 'add' must be balanced by one 'drop'.
2884  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
2885  * Before the thread is deallocated, there must be 0 remaining overrides.
2886  */
2887 static void
2888 thread_ipc_override(thread_t    thread,
2889                     uint32_t    qos_override,
2890                     boolean_t   is_new_override)
2891 {
2892         struct task_pend_token pend_token = {};
2893         boolean_t needs_update;
2894
2895         spl_t s = splsched();
2896         thread_lock(thread);
2897
2898         uint32_t old_override = thread->requested_policy.thrp_qos_ipc_override;
2899
2900         assert(qos_override > THREAD_QOS_UNSPECIFIED);
2901         assert(qos_override < THREAD_QOS_LAST);
2902         if (is_new_override) {
2903                 if (thread->ipc_overrides++ == 0) {
2904                         /* This add is the first override for this thread */
2905                         assert(old_override == THREAD_QOS_UNSPECIFIED);
2906                 } else {
2907                         /* There are already other overrides in effect for this thread */
2908                         assert(old_override > THREAD_QOS_UNSPECIFIED);
2909                 }
2910         } else {
2911                 /* There must be at least one override (the previous add call) in effect */
2912                 assert(thread->ipc_overrides > 0);
2913                 assert(old_override > THREAD_QOS_UNSPECIFIED);
2914         }
2915
2916         /*
2917          * We can't allow lowering if there are several IPC overrides because
2918          * the caller can't possibly know the whole truth
2919          */
2920         if (thread->ipc_overrides == 1) {
2921                 needs_update = qos_override != old_override;
2922         } else {
2923                 needs_update = qos_override > old_override;
2924         }
2925
2926         if (needs_update) {
2927                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2928                                                   TASK_POLICY_QOS_IPC_OVERRIDE,
2929                                                   qos_override, 0, &pend_token);
2930                 assert(pend_token.tpt_update_sockets == 0);
2931         }
2932
2933         thread_unlock(thread);
2934         splx(s);
2935
2936         /*
2937          * this is only safe after rethrottle_thread supports
2938          * being called from spinlock context
2939          */
2940         thread_policy_update_complete_unlocked(thread, &pend_token);
2941 }
2942
2943 void
2944 thread_add_ipc_override(thread_t    thread,
2945                         uint32_t    qos_override)
2946 {
2947         thread_ipc_override(thread, qos_override, TRUE);
2948 }
2949
2950 void
2951 thread_update_ipc_override(thread_t     thread,
2952                            uint32_t     qos_override)
2953 {
2954         thread_ipc_override(thread, qos_override, FALSE);
2955 }
2956
2957 void
2958 thread_drop_ipc_override(thread_t thread)
2959 {
2960         struct task_pend_token pend_token = {};
2961
2962         spl_t s = splsched();
2963         thread_lock(thread);
2964
2965         assert(thread->ipc_overrides > 0);
2966
2967         if (--thread->ipc_overrides == 0) {
2968                 /*
2969                  * There are no more overrides for this thread, so we should
2970                  * clear out the saturated override value
2971                  */
2972
2973                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2974                                                   TASK_POLICY_QOS_IPC_OVERRIDE, THREAD_QOS_UNSPECIFIED,
2975                                                   0, &pend_token);
2976         }
2977
2978         thread_unlock(thread);
2979         splx(s);
2980
2981         /*
2982          * this is only safe after rethrottle_thread supports
2983          * being called from spinlock context
2984          */
2985         thread_policy_update_complete_unlocked(thread, &pend_token);
2986 }
2987
2988 void
2989 thread_add_sync_ipc_override(thread_t   thread)
2990 {
2991         struct task_pend_token pend_token = {};
2992
2993         spl_t s = splsched();
2994         thread_lock(thread);
2995
2996         uint32_t old_override __unused = thread->requested_policy.thrp_qos_sync_ipc_override;
2997
2998         if (thread->sync_ipc_overrides++ == 0) {
2999                 /* This add is the first override for this thread */
3000                 assert(old_override == THREAD_QOS_UNSPECIFIED);
3001         } else {
3002                 /* There are already other overrides in effect for this thread */
3003                 assert(old_override == THREAD_QOS_USER_INTERACTIVE);
3004                 thread_unlock(thread);
3005                 splx(s);
3006                 return;
3007         }
3008
3009         uint32_t new_override = THREAD_QOS_USER_INTERACTIVE;
3010
3011         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3012                                           TASK_POLICY_QOS_SYNC_IPC_OVERRIDE,
3013                                           new_override, 0, &pend_token);
3014
3015         assert(pend_token.tpt_update_sockets == 0);
3016
3017         thread_unlock(thread);
3018         splx(s);
3019
3020         /*
3021          * this is only safe after rethrottle_thread supports
3022          * being called from spinlock context
3023          */
3024         thread_policy_update_complete_unlocked(thread, &pend_token);
3025 }
3026
3027 void
3028 thread_drop_sync_ipc_override(thread_t thread)
3029 {
3030         struct task_pend_token pend_token = {};
3031
3032         spl_t s = splsched();
3033         thread_lock(thread);
3034
3035         assert(thread->sync_ipc_overrides > 0);
3036
3037         if (--thread->sync_ipc_overrides == 0) {
3038                 /*
3039                  * There are no more overrides for this thread, so we should
3040                  * clear out the saturated override value
3041                  */
3042
3043                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3044                                                   TASK_POLICY_QOS_SYNC_IPC_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3045                                                   0, &pend_token);
3046         }
3047
3048         thread_unlock(thread);
3049         splx(s);
3050
3051         /*
3052          * this is only safe after rethrottle_thread supports
3053          * being called from spinlock context
3054          */
3055         thread_policy_update_complete_unlocked(thread, &pend_token);
3056 }
3057
3058 /* Get current IPC override, may be called from spinlock context */
3059 uint32_t
3060 thread_get_ipc_override(thread_t thread)
3061 {
3062         return proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_IPC_OVERRIDE, NULL);
3063 }
3064
3065 /*
3066  * This function will promote the thread priority
3067  * since exec could block other threads calling
3068  * proc_find on the proc. This boost must be removed
3069  * via call to thread_clear_exec_promotion.
3070  */
3071 void
3072 thread_set_exec_promotion(thread_t thread)
3073 {
3074         spl_t s;
3075
3076         s = splsched();
3077         thread_lock(thread);
3078
3079         assert((thread->sched_flags & TH_SFLAG_EXEC_PROMOTED) == 0);
3080
3081         if (thread->sched_pri < EXEC_BOOST_PRIORITY ||
3082             !(thread->sched_flags & TH_SFLAG_EXEC_PROMOTED)) {
3083                 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_EXEC_PROMOTE) | DBG_FUNC_NONE,
3084                                       (uintptr_t)thread_tid(thread),
3085                                       thread->sched_pri, thread->base_pri,
3086                                       EXEC_BOOST_PRIORITY, 0);
3087                 thread->sched_flags |= TH_SFLAG_EXEC_PROMOTED;
3088                 if (thread->sched_pri < EXEC_BOOST_PRIORITY)
3089                         set_sched_pri(thread, EXEC_BOOST_PRIORITY);
3090         }
3091
3092         thread_unlock(thread);
3093         splx(s);
3094 }
3095
3096 /*
3097  * This function will clear the exec thread
3098  * promotion set on the thread by thread_set_exec_promotion.
3099  */
3100 void
3101 thread_clear_exec_promotion(thread_t thread)
3102 {
3103         spl_t s;
3104
3105         s = splsched();
3106         thread_lock(thread);
3107         assert(thread->sched_flags & TH_SFLAG_EXEC_PROMOTED);
3108
3109         if (thread->sched_flags & TH_SFLAG_EXEC_PROMOTED) {
3110                 thread->sched_flags &= ~TH_SFLAG_EXEC_PROMOTED;
3111
3112                 if (thread->sched_flags & TH_SFLAG_PROMOTED_MASK) {
3113                         /* it still has other promotions (mutex/rw_lock) */
3114                 } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
3115                         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_EXEC_DEMOTE) | DBG_FUNC_NONE,
3116                                               (uintptr_t)thread_tid(thread),
3117                                               thread->sched_pri,
3118                                               thread->base_pri,
3119                                               DEPRESSPRI, 0);
3120                         set_sched_pri(thread, DEPRESSPRI);
3121                 } else {
3122                         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_EXEC_DEMOTE) | DBG_FUNC_NONE,
3123                                               (uintptr_t)thread_tid(thread),
3124                                               thread->sched_pri,
3125                                               thread->base_pri,
3126                                               thread->base_pri, 0);
3127                         thread_recompute_sched_pri(thread, FALSE);
3128                 }
3129         }
3130
3131         thread_unlock(thread);
3132         splx(s);
3133 }