osfmk/kern/thread_policy.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <mach/mach_types.h>
  30 #include <mach/thread_act_server.h>
  31
  32 #include <kern/kern_types.h>
  33 #include <kern/processor.h>
  34 #include <kern/thread.h>
  35 #include <kern/affinity.h>
  36 #include <mach/task_policy.h>
  37 #include <kern/sfi.h>
  38 #include <kern/policy_internal.h>
  39 #include <sys/errno.h>
  40 #include <sys/ulock.h>
  41
  42 #include <mach/machine/sdt.h>
  43
  44 #ifdef MACH_BSD
  45 extern int      proc_selfpid(void);
  46 extern char *   proc_name_address(void *p);
  47 extern void     rethrottle_thread(void * uthread);
  48 #endif /* MACH_BSD */
  49
  50 #define QOS_EXTRACT(q)        ((q) & 0xff)
  51
  52 uint32_t qos_override_mode;
  53 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
  54 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
  55 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
  56 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
  57
  58 extern zone_t thread_qos_override_zone;
  59
  60 static void
  61 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
  62
  63 /*
  64  * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
  65  * to threads that don't have a QoS class set.
  66  */
  67 const qos_policy_params_t thread_qos_policy_params = {
  68         /*
  69          * This table defines the starting base priority of the thread,
  70          * which will be modified by the thread importance and the task max priority
  71          * before being applied.
  72          */
  73         .qos_pri[THREAD_QOS_UNSPECIFIED]                = 0, /* not consulted */
  74         .qos_pri[THREAD_QOS_USER_INTERACTIVE]           = BASEPRI_BACKGROUND, /* i.e. 46 */
  75         .qos_pri[THREAD_QOS_USER_INITIATED]             = BASEPRI_USER_INITIATED,
  76         .qos_pri[THREAD_QOS_LEGACY]                     = BASEPRI_DEFAULT,
  77         .qos_pri[THREAD_QOS_UTILITY]                    = BASEPRI_UTILITY,
  78         .qos_pri[THREAD_QOS_BACKGROUND]                 = MAXPRI_THROTTLE,
  79         .qos_pri[THREAD_QOS_MAINTENANCE]                = MAXPRI_THROTTLE,
  80
  81         /*
  82          * This table defines the highest IO priority that a thread marked with this
  83          * QoS class can have.
  84          */
  85         .qos_iotier[THREAD_QOS_UNSPECIFIED]             = THROTTLE_LEVEL_TIER0,
  86         .qos_iotier[THREAD_QOS_USER_INTERACTIVE]        = THROTTLE_LEVEL_TIER0,
  87         .qos_iotier[THREAD_QOS_USER_INITIATED]          = THROTTLE_LEVEL_TIER0,
  88         .qos_iotier[THREAD_QOS_LEGACY]                  = THROTTLE_LEVEL_TIER0,
  89         .qos_iotier[THREAD_QOS_UTILITY]                 = THROTTLE_LEVEL_TIER1,
  90         .qos_iotier[THREAD_QOS_BACKGROUND]              = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
  91         .qos_iotier[THREAD_QOS_MAINTENANCE]             = THROTTLE_LEVEL_TIER3,
  92
  93         /*
  94          * This table defines the highest QoS level that
  95          * a thread marked with this QoS class can have.
  96          */
  97
  98         .qos_through_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
  99         .qos_through_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
 100         .qos_through_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
 101         .qos_through_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
 102         .qos_through_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
 103         .qos_through_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
 104         .qos_through_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
 105
 106         .qos_latency_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
 107         .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(LATENCY_QOS_TIER_0),
 108         .qos_latency_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(LATENCY_QOS_TIER_1),
 109         .qos_latency_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(LATENCY_QOS_TIER_1),
 110         .qos_latency_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 111         .qos_latency_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 112         .qos_latency_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 113 };
 114
 115 static void
 116 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
 117
 118 static int
 119 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
 120
 121 static void
 122 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
 123
 124 static void
 125 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 126
 127 static void
 128 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 129
 130 static void
 131 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2);
 132
 133 static int
 134 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
 135
 136 static int
 137 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
 138
 139 static void
 140 thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
 141
 142 static void
 143 thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
 144
 145 void
 146 thread_policy_init(void)
 147 {
 148         if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
 149                 printf("QOS override mode: 0x%08x\n", qos_override_mode);
 150         } else {
 151                 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
 152         }
 153 }
 154
 155 boolean_t
 156 thread_has_qos_policy(thread_t thread)
 157 {
 158         return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
 159 }
 160
 161
 162 static void
 163 thread_remove_qos_policy_locked(thread_t thread,
 164     task_pend_token_t pend_token)
 165 {
 166         __unused int prev_qos = thread->requested_policy.thrp_qos;
 167
 168         DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
 169
 170         proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
 171             THREAD_QOS_UNSPECIFIED, 0, pend_token);
 172 }
 173
 174 kern_return_t
 175 thread_remove_qos_policy(thread_t thread)
 176 {
 177         struct task_pend_token pend_token = {};
 178
 179         thread_mtx_lock(thread);
 180         if (!thread->active) {
 181                 thread_mtx_unlock(thread);
 182                 return KERN_TERMINATED;
 183         }
 184
 185         thread_remove_qos_policy_locked(thread, &pend_token);
 186
 187         thread_mtx_unlock(thread);
 188
 189         thread_policy_update_complete_unlocked(thread, &pend_token);
 190
 191         return KERN_SUCCESS;
 192 }
 193
 194
 195 boolean_t
 196 thread_is_static_param(thread_t thread)
 197 {
 198         if (thread->static_param) {
 199                 DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
 200                 return TRUE;
 201         }
 202         return FALSE;
 203 }
 204
 205 /*
 206  * Relative priorities can range between 0REL and -15REL. These
 207  * map to QoS-specific ranges, to create non-overlapping priority
 208  * ranges.
 209  */
 210 static int
 211 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
 212 {
 213         int next_lower_qos;
 214
 215         /* Fast path, since no validation or scaling is needed */
 216         if (qos_relprio == 0) {
 217                 return 0;
 218         }
 219
 220         switch (qos) {
 221         case THREAD_QOS_USER_INTERACTIVE:
 222                 next_lower_qos = THREAD_QOS_USER_INITIATED;
 223                 break;
 224         case THREAD_QOS_USER_INITIATED:
 225                 next_lower_qos = THREAD_QOS_LEGACY;
 226                 break;
 227         case THREAD_QOS_LEGACY:
 228                 next_lower_qos = THREAD_QOS_UTILITY;
 229                 break;
 230         case THREAD_QOS_UTILITY:
 231                 next_lower_qos = THREAD_QOS_BACKGROUND;
 232                 break;
 233         case THREAD_QOS_MAINTENANCE:
 234         case THREAD_QOS_BACKGROUND:
 235                 next_lower_qos = 0;
 236                 break;
 237         default:
 238                 panic("Unrecognized QoS %d", qos);
 239                 return 0;
 240         }
 241
 242         int prio_range_max = thread_qos_policy_params.qos_pri[qos];
 243         int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
 244
 245         /*
 246          * We now have the valid range that the scaled relative priority can map to. Note
 247          * that the lower bound is exclusive, but the upper bound is inclusive. If the
 248          * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
 249          * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
 250          * remainder.
 251          */
 252         int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
 253
 254         return scaled_relprio;
 255 }
 256
 257 /*
 258  * flag set by -qos-policy-allow boot-arg to allow
 259  * testing thread qos policy from userspace
 260  */
 261 boolean_t allow_qos_policy_set = FALSE;
 262
 263 kern_return_t
 264 thread_policy_set(
 265         thread_t                                thread,
 266         thread_policy_flavor_t  flavor,
 267         thread_policy_t                 policy_info,
 268         mach_msg_type_number_t  count)
 269 {
 270         thread_qos_policy_data_t req_qos;
 271         kern_return_t kr;
 272
 273         req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
 274
 275         if (thread == THREAD_NULL) {
 276                 return KERN_INVALID_ARGUMENT;
 277         }
 278
 279         if (allow_qos_policy_set == FALSE) {
 280                 if (thread_is_static_param(thread)) {
 281                         return KERN_POLICY_STATIC;
 282                 }
 283
 284                 if (flavor == THREAD_QOS_POLICY) {
 285                         return KERN_INVALID_ARGUMENT;
 286                 }
 287         }
 288
 289         /* Threads without static_param set reset their QoS when other policies are applied. */
 290         if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
 291                 /* Store the existing tier, if we fail this call it is used to reset back. */
 292                 req_qos.qos_tier = thread->requested_policy.thrp_qos;
 293                 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
 294
 295                 kr = thread_remove_qos_policy(thread);
 296                 if (kr != KERN_SUCCESS) {
 297                         return kr;
 298                 }
 299         }
 300
 301         kr = thread_policy_set_internal(thread, flavor, policy_info, count);
 302
 303         /* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */
 304         if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
 305                 if (kr != KERN_SUCCESS) {
 306                         /* Reset back to our original tier as the set failed. */
 307                         (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
 308                 }
 309         }
 310
 311         return kr;
 312 }
 313
 314 kern_return_t
 315 thread_policy_set_internal(
 316         thread_t                     thread,
 317         thread_policy_flavor_t       flavor,
 318         thread_policy_t              policy_info,
 319         mach_msg_type_number_t       count)
 320 {
 321         kern_return_t result = KERN_SUCCESS;
 322         struct task_pend_token pend_token = {};
 323
 324         thread_mtx_lock(thread);
 325         if (!thread->active) {
 326                 thread_mtx_unlock(thread);
 327
 328                 return KERN_TERMINATED;
 329         }
 330
 331         switch (flavor) {
 332         case THREAD_EXTENDED_POLICY:
 333         {
 334                 boolean_t timeshare = TRUE;
 335
 336                 if (count >= THREAD_EXTENDED_POLICY_COUNT) {
 337                         thread_extended_policy_t info;
 338
 339                         info = (thread_extended_policy_t)policy_info;
 340                         timeshare = info->timeshare;
 341                 }
 342
 343                 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
 344
 345                 spl_t s = splsched();
 346                 thread_lock(thread);
 347
 348                 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
 349
 350                 thread_unlock(thread);
 351                 splx(s);
 352
 353                 pend_token.tpt_update_thread_sfi = 1;
 354
 355                 break;
 356         }
 357
 358         case THREAD_TIME_CONSTRAINT_POLICY:
 359         {
 360                 thread_time_constraint_policy_t info;
 361
 362                 if (count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
 363                         result = KERN_INVALID_ARGUMENT;
 364                         break;
 365                 }
 366
 367                 info = (thread_time_constraint_policy_t)policy_info;
 368                 if (info->constraint < info->computation ||
 369                     info->computation > max_rt_quantum ||
 370                     info->computation < min_rt_quantum) {
 371                         result = KERN_INVALID_ARGUMENT;
 372                         break;
 373                 }
 374
 375                 spl_t s = splsched();
 376                 thread_lock(thread);
 377
 378                 thread->realtime.period         = info->period;
 379                 thread->realtime.computation    = info->computation;
 380                 thread->realtime.constraint     = info->constraint;
 381                 thread->realtime.preemptible    = info->preemptible;
 382
 383                 thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
 384
 385                 thread_unlock(thread);
 386                 splx(s);
 387
 388                 pend_token.tpt_update_thread_sfi = 1;
 389
 390                 break;
 391         }
 392
 393         case THREAD_PRECEDENCE_POLICY:
 394         {
 395                 thread_precedence_policy_t info;
 396
 397                 if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
 398                         result = KERN_INVALID_ARGUMENT;
 399                         break;
 400                 }
 401                 info = (thread_precedence_policy_t)policy_info;
 402
 403                 spl_t s = splsched();
 404                 thread_lock(thread);
 405
 406                 thread->importance = info->importance;
 407
 408                 thread_recompute_priority(thread);
 409
 410                 thread_unlock(thread);
 411                 splx(s);
 412
 413                 break;
 414         }
 415
 416         case THREAD_AFFINITY_POLICY:
 417         {
 418                 thread_affinity_policy_t info;
 419
 420                 if (!thread_affinity_is_supported()) {
 421                         result = KERN_NOT_SUPPORTED;
 422                         break;
 423                 }
 424                 if (count < THREAD_AFFINITY_POLICY_COUNT) {
 425                         result = KERN_INVALID_ARGUMENT;
 426                         break;
 427                 }
 428
 429                 info = (thread_affinity_policy_t) policy_info;
 430                 /*
 431                  * Unlock the thread mutex here and
 432                  * return directly after calling thread_affinity_set().
 433                  * This is necessary for correct lock ordering because
 434                  * thread_affinity_set() takes the task lock.
 435                  */
 436                 thread_mtx_unlock(thread);
 437                 return thread_affinity_set(thread, info->affinity_tag);
 438         }
 439
 440 #if CONFIG_EMBEDDED
 441         case THREAD_BACKGROUND_POLICY:
 442         {
 443                 thread_background_policy_t info;
 444
 445                 if (count < THREAD_BACKGROUND_POLICY_COUNT) {
 446                         result = KERN_INVALID_ARGUMENT;
 447                         break;
 448                 }
 449
 450                 if (thread->task != current_task()) {
 451                         result = KERN_PROTECTION_FAILURE;
 452                         break;
 453                 }
 454
 455                 info = (thread_background_policy_t) policy_info;
 456
 457                 int enable;
 458
 459                 if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
 460                         enable = TASK_POLICY_ENABLE;
 461                 } else {
 462                         enable = TASK_POLICY_DISABLE;
 463                 }
 464
 465                 int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
 466
 467                 proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
 468
 469                 break;
 470         }
 471 #endif /* CONFIG_EMBEDDED */
 472
 473         case THREAD_THROUGHPUT_QOS_POLICY:
 474         {
 475                 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
 476                 thread_throughput_qos_t tqos;
 477
 478                 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
 479                         result = KERN_INVALID_ARGUMENT;
 480                         break;
 481                 }
 482
 483                 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
 484                         break;
 485                 }
 486
 487                 tqos = qos_extract(info->thread_throughput_qos_tier);
 488
 489                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
 490                     TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
 491
 492                 break;
 493         }
 494
 495         case THREAD_LATENCY_QOS_POLICY:
 496         {
 497                 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
 498                 thread_latency_qos_t lqos;
 499
 500                 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
 501                         result = KERN_INVALID_ARGUMENT;
 502                         break;
 503                 }
 504
 505                 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
 506                         break;
 507                 }
 508
 509                 lqos = qos_extract(info->thread_latency_qos_tier);
 510
 511                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
 512                     TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
 513
 514                 break;
 515         }
 516
 517         case THREAD_QOS_POLICY:
 518         {
 519                 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
 520
 521                 if (count < THREAD_QOS_POLICY_COUNT) {
 522                         result = KERN_INVALID_ARGUMENT;
 523                         break;
 524                 }
 525
 526                 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
 527                         result = KERN_INVALID_ARGUMENT;
 528                         break;
 529                 }
 530
 531                 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
 532                         result = KERN_INVALID_ARGUMENT;
 533                         break;
 534                 }
 535
 536                 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
 537                         result = KERN_INVALID_ARGUMENT;
 538                         break;
 539                 }
 540
 541                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
 542                     info->qos_tier, -info->tier_importance, &pend_token);
 543
 544                 break;
 545         }
 546
 547         default:
 548                 result = KERN_INVALID_ARGUMENT;
 549                 break;
 550         }
 551
 552         thread_mtx_unlock(thread);
 553
 554         thread_policy_update_complete_unlocked(thread, &pend_token);
 555
 556         return result;
 557 }
 558
 559 /*
 560  * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
 561  * Both result in FIXED mode scheduling.
 562  */
 563 static sched_mode_t
 564 convert_policy_to_sched_mode(integer_t policy)
 565 {
 566         switch (policy) {
 567         case POLICY_TIMESHARE:
 568                 return TH_MODE_TIMESHARE;
 569         case POLICY_RR:
 570         case POLICY_FIFO:
 571                 return TH_MODE_FIXED;
 572         default:
 573                 panic("unexpected sched policy: %d", policy);
 574                 return TH_MODE_NONE;
 575         }
 576 }
 577
 578 /*
 579  * Called either with the thread mutex locked
 580  * or from the pthread kext in a 'safe place'.
 581  */
 582 static kern_return_t
 583 thread_set_mode_and_absolute_pri_internal(thread_t              thread,
 584     sched_mode_t          mode,
 585     integer_t             priority,
 586     task_pend_token_t     pend_token)
 587 {
 588         kern_return_t kr = KERN_SUCCESS;
 589
 590         spl_t s = splsched();
 591         thread_lock(thread);
 592
 593         /* This path isn't allowed to change a thread out of realtime. */
 594         if ((thread->sched_mode == TH_MODE_REALTIME) ||
 595             (thread->saved_mode == TH_MODE_REALTIME)) {
 596                 kr = KERN_FAILURE;
 597                 goto unlock;
 598         }
 599
 600         if (thread->policy_reset) {
 601                 kr = KERN_SUCCESS;
 602                 goto unlock;
 603         }
 604
 605         sched_mode_t old_mode = thread->sched_mode;
 606
 607         /*
 608          * Reverse engineer and apply the correct importance value
 609          * from the requested absolute priority value.
 610          *
 611          * TODO: Store the absolute priority value instead
 612          */
 613
 614         if (priority >= thread->max_priority) {
 615                 priority = thread->max_priority - thread->task_priority;
 616         } else if (priority >= MINPRI_KERNEL) {
 617                 priority -=  MINPRI_KERNEL;
 618         } else if (priority >= MINPRI_RESERVED) {
 619                 priority -=  MINPRI_RESERVED;
 620         } else {
 621                 priority -= BASEPRI_DEFAULT;
 622         }
 623
 624         priority += thread->task_priority;
 625
 626         if (priority > thread->max_priority) {
 627                 priority = thread->max_priority;
 628         } else if (priority < MINPRI) {
 629                 priority = MINPRI;
 630         }
 631
 632         thread->importance = priority - thread->task_priority;
 633
 634         thread_set_user_sched_mode_and_recompute_pri(thread, mode);
 635
 636         if (mode != old_mode) {
 637                 pend_token->tpt_update_thread_sfi = 1;
 638         }
 639
 640 unlock:
 641         thread_unlock(thread);
 642         splx(s);
 643
 644         return kr;
 645 }
 646
 647 uint8_t
 648 thread_workq_pri_for_qos(thread_qos_t qos)
 649 {
 650         assert(qos < THREAD_QOS_LAST);
 651         return (uint8_t)thread_qos_policy_params.qos_pri[qos];
 652 }
 653
 654 thread_qos_t
 655 thread_workq_qos_for_pri(int priority)
 656 {
 657         int qos;
 658         if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
 659                 // indicate that workq should map >UI threads to workq's
 660                 // internal notation for above-UI work.
 661                 return THREAD_QOS_UNSPECIFIED;
 662         }
 663         for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
 664                 // map a given priority up to the next nearest qos band.
 665                 if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
 666                         return qos;
 667                 }
 668         }
 669         return THREAD_QOS_MAINTENANCE;
 670 }
 671
 672 /*
 673  * private interface for pthread workqueues
 674  *
 675  * Set scheduling policy & absolute priority for thread
 676  * May be called with spinlocks held
 677  * Thread mutex lock is not held
 678  */
 679 void
 680 thread_reset_workq_qos(thread_t thread, uint32_t qos)
 681 {
 682         struct task_pend_token pend_token = {};
 683
 684         assert(qos < THREAD_QOS_LAST);
 685
 686         spl_t s = splsched();
 687         thread_lock(thread);
 688
 689         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 690             TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
 691         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 692             TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
 693             &pend_token);
 694
 695         assert(pend_token.tpt_update_sockets == 0);
 696
 697         thread_unlock(thread);
 698         splx(s);
 699
 700         thread_policy_update_complete_unlocked(thread, &pend_token);
 701 }
 702
 703 /*
 704  * private interface for pthread workqueues
 705  *
 706  * Set scheduling policy & absolute priority for thread
 707  * May be called with spinlocks held
 708  * Thread mutex lock is held
 709  */
 710 void
 711 thread_set_workq_override(thread_t thread, uint32_t qos)
 712 {
 713         struct task_pend_token pend_token = {};
 714
 715         assert(qos < THREAD_QOS_LAST);
 716
 717         spl_t s = splsched();
 718         thread_lock(thread);
 719
 720         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 721             TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
 722
 723         assert(pend_token.tpt_update_sockets == 0);
 724
 725         thread_unlock(thread);
 726         splx(s);
 727
 728         thread_policy_update_complete_unlocked(thread, &pend_token);
 729 }
 730
 731 /*
 732  * private interface for pthread workqueues
 733  *
 734  * Set scheduling policy & absolute priority for thread
 735  * May be called with spinlocks held
 736  * Thread mutex lock is not held
 737  */
 738 void
 739 thread_set_workq_pri(thread_t  thread,
 740     thread_qos_t qos,
 741     integer_t priority,
 742     integer_t policy)
 743 {
 744         struct task_pend_token pend_token = {};
 745         sched_mode_t mode = convert_policy_to_sched_mode(policy);
 746
 747         assert(qos < THREAD_QOS_LAST);
 748         assert(thread->static_param);
 749
 750         if (!thread->static_param || !thread->active) {
 751                 return;
 752         }
 753
 754         spl_t s = splsched();
 755         thread_lock(thread);
 756
 757         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 758             TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
 759         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 760             TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
 761             0, &pend_token);
 762
 763         thread_unlock(thread);
 764         splx(s);
 765
 766         /* Concern: this doesn't hold the mutex... */
 767
 768         __assert_only kern_return_t kr;
 769         kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
 770             &pend_token);
 771         assert(kr == KERN_SUCCESS);
 772
 773         if (pend_token.tpt_update_thread_sfi) {
 774                 sfi_reevaluate(thread);
 775         }
 776 }
 777
 778 /*
 779  * thread_set_mode_and_absolute_pri:
 780  *
 781  * Set scheduling policy & absolute priority for thread, for deprecated
 782  * thread_set_policy and thread_policy interfaces.
 783  *
 784  * Called with nothing locked.
 785  */
 786 kern_return_t
 787 thread_set_mode_and_absolute_pri(thread_t   thread,
 788     integer_t  policy,
 789     integer_t  priority)
 790 {
 791         kern_return_t kr = KERN_SUCCESS;
 792         struct task_pend_token pend_token = {};
 793
 794         sched_mode_t mode = convert_policy_to_sched_mode(policy);
 795
 796         thread_mtx_lock(thread);
 797
 798         if (!thread->active) {
 799                 kr = KERN_TERMINATED;
 800                 goto unlock;
 801         }
 802
 803         if (thread_is_static_param(thread)) {
 804                 kr = KERN_POLICY_STATIC;
 805                 goto unlock;
 806         }
 807
 808         /* Setting legacy policies on threads kills the current QoS */
 809         if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
 810                 thread_remove_qos_policy_locked(thread, &pend_token);
 811         }
 812
 813         kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
 814
 815 unlock:
 816         thread_mtx_unlock(thread);
 817
 818         thread_policy_update_complete_unlocked(thread, &pend_token);
 819
 820         return kr;
 821 }
 822
 823 /*
 824  * Set the thread's requested mode and recompute priority
 825  * Called with thread mutex and thread locked
 826  *
 827  * TODO: Mitigate potential problems caused by moving thread to end of runq
 828  * whenever its priority is recomputed
 829  *      Only remove when it actually changes? Attempt to re-insert at appropriate location?
 830  */
 831 static void
 832 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
 833 {
 834         if (thread->policy_reset) {
 835                 return;
 836         }
 837
 838         boolean_t removed = thread_run_queue_remove(thread);
 839
 840         /*
 841          * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
 842          * That way there's zero confusion over which the user wants
 843          * and which the kernel wants.
 844          */
 845         if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) {
 846                 thread->saved_mode = mode;
 847         } else {
 848                 sched_set_thread_mode(thread, mode);
 849         }
 850
 851         thread_recompute_priority(thread);
 852
 853         if (removed) {
 854                 thread_run_queue_reinsert(thread, SCHED_TAILQ);
 855         }
 856 }
 857
 858 /* called at splsched with thread lock locked */
 859 static void
 860 thread_update_qos_cpu_time_locked(thread_t thread)
 861 {
 862         task_t task = thread->task;
 863         uint64_t timer_sum, timer_delta;
 864
 865         /*
 866          * This is only as accurate as the distance between
 867          * last context switch (embedded) or last user/kernel boundary transition (desktop)
 868          * because user_timer and system_timer are only updated then.
 869          *
 870          * TODO: Consider running a timer_update operation here to update it first.
 871          *       Maybe doable with interrupts disabled from current thread.
 872          *       If the thread is on a different core, may not be easy to get right.
 873          *
 874          * TODO: There should be a function for this in timer.c
 875          */
 876
 877         timer_sum = timer_grab(&thread->user_timer);
 878         timer_sum += timer_grab(&thread->system_timer);
 879         timer_delta = timer_sum - thread->vtimer_qos_save;
 880
 881         thread->vtimer_qos_save = timer_sum;
 882
 883         uint64_t* task_counter = NULL;
 884
 885         /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
 886         switch (thread->effective_policy.thep_qos) {
 887         case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
 888         case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
 889         case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
 890         case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
 891         case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
 892         case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
 893         case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
 894         default:
 895                 panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
 896         }
 897
 898         OSAddAtomic64(timer_delta, task_counter);
 899
 900         /* Update the task-level qos stats atomically, because we don't have the task lock. */
 901         switch (thread->requested_policy.thrp_qos) {
 902         case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
 903         case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
 904         case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
 905         case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
 906         case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
 907         case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
 908         case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
 909         default:
 910                 panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
 911         }
 912
 913         OSAddAtomic64(timer_delta, task_counter);
 914 }
 915
 916 /*
 917  * called with no thread locks held
 918  * may hold task lock
 919  */
 920 void
 921 thread_update_qos_cpu_time(thread_t thread)
 922 {
 923         thread_mtx_lock(thread);
 924
 925         spl_t s = splsched();
 926         thread_lock(thread);
 927
 928         thread_update_qos_cpu_time_locked(thread);
 929
 930         thread_unlock(thread);
 931         splx(s);
 932
 933         thread_mtx_unlock(thread);
 934 }
 935
 936 /*
 937  * Calculate base priority from thread attributes, and set it on the thread
 938  *
 939  * Called with thread_lock and thread mutex held.
 940  */
 941 void
 942 thread_recompute_priority(
 943         thread_t                thread)
 944 {
 945         integer_t               priority;
 946
 947         if (thread->policy_reset) {
 948                 return;
 949         }
 950
 951         if (thread->sched_mode == TH_MODE_REALTIME) {
 952                 sched_set_thread_base_priority(thread, BASEPRI_RTQUEUES);
 953                 return;
 954         } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
 955                 int qos = thread->effective_policy.thep_qos;
 956                 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
 957                 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
 958                 int qos_scaled_relprio;
 959
 960                 assert(qos >= 0 && qos < THREAD_QOS_LAST);
 961                 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
 962
 963                 priority = thread_qos_policy_params.qos_pri[qos];
 964                 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
 965
 966                 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
 967                         /* Bump priority 46 to 47 when in a frontmost app */
 968                         qos_scaled_relprio += 1;
 969                 }
 970
 971                 /* TODO: factor in renice priority here? */
 972
 973                 priority += qos_scaled_relprio;
 974         } else {
 975                 if (thread->importance > MAXPRI) {
 976                         priority = MAXPRI;
 977                 } else if (thread->importance < -MAXPRI) {
 978                         priority = -MAXPRI;
 979                 } else {
 980                         priority = thread->importance;
 981                 }
 982
 983                 priority += thread->task_priority;
 984         }
 985
 986         priority = MAX(priority, thread->user_promotion_basepri);
 987
 988         /*
 989          * Clamp priority back into the allowed range for this task.
 990          *  The initial priority value could be out of this range due to:
 991          *      Task clamped to BG or Utility (max-pri is 4, or 20)
 992          *      Task is user task (max-pri is 63)
 993          *      Task is kernel task (max-pri is 95)
 994          * Note that thread->importance is user-settable to any integer
 995          * via THREAD_PRECEDENCE_POLICY.
 996          */
 997         if (priority > thread->max_priority) {
 998                 priority = thread->max_priority;
 999         } else if (priority < MINPRI) {
1000                 priority = MINPRI;
1001         }
1002
1003         if (thread->saved_mode == TH_MODE_REALTIME &&
1004             thread->sched_flags & TH_SFLAG_FAILSAFE) {
1005                 priority = DEPRESSPRI;
1006         }
1007
1008         if (thread->effective_policy.thep_terminated == TRUE) {
1009                 /*
1010                  * We temporarily want to override the expected priority to
1011                  * ensure that the thread exits in a timely manner.
1012                  * Note that this is allowed to exceed thread->max_priority
1013                  * so that the thread is no longer clamped to background
1014                  * during the final exit phase.
1015                  */
1016                 if (priority < thread->task_priority) {
1017                         priority = thread->task_priority;
1018                 }
1019                 if (priority < BASEPRI_DEFAULT) {
1020                         priority = BASEPRI_DEFAULT;
1021                 }
1022         }
1023
1024 #if CONFIG_EMBEDDED
1025         /* No one can have a base priority less than MAXPRI_THROTTLE */
1026         if (priority < MAXPRI_THROTTLE) {
1027                 priority = MAXPRI_THROTTLE;
1028         }
1029 #endif /* CONFIG_EMBEDDED */
1030
1031         sched_set_thread_base_priority(thread, priority);
1032 }
1033
1034 /* Called with the task lock held, but not the thread mutex or spinlock */
1035 void
1036 thread_policy_update_tasklocked(
1037         thread_t           thread,
1038         integer_t          priority,
1039         integer_t          max_priority,
1040         task_pend_token_t  pend_token)
1041 {
1042         thread_mtx_lock(thread);
1043
1044         if (!thread->active || thread->policy_reset) {
1045                 thread_mtx_unlock(thread);
1046                 return;
1047         }
1048
1049         spl_t s = splsched();
1050         thread_lock(thread);
1051
1052         __unused
1053         integer_t old_max_priority = thread->max_priority;
1054
1055         thread->task_priority = priority;
1056         thread->max_priority = max_priority;
1057
1058 #if CONFIG_EMBEDDED
1059         /*
1060          * When backgrounding a thread, iOS has the semantic that
1061          * realtime and fixed priority threads should be demoted
1062          * to timeshare background threads.
1063          *
1064          * On OSX, realtime and fixed priority threads don't lose their mode.
1065          *
1066          * TODO: Do this inside the thread policy update routine in order to avoid double
1067          * remove/reinsert for a runnable thread
1068          */
1069         if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1070                 sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1071         } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1072                 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1073         }
1074 #endif /* CONFIG_EMBEDDED */
1075
1076         thread_policy_update_spinlocked(thread, TRUE, pend_token);
1077
1078         thread_unlock(thread);
1079         splx(s);
1080
1081         thread_mtx_unlock(thread);
1082 }
1083
1084 /*
1085  * Reset thread to default state in preparation for termination
1086  * Called with thread mutex locked
1087  *
1088  * Always called on current thread, so we don't need a run queue remove
1089  */
1090 void
1091 thread_policy_reset(
1092         thread_t                thread)
1093 {
1094         spl_t           s;
1095
1096         assert(thread == current_thread());
1097
1098         s = splsched();
1099         thread_lock(thread);
1100
1101         if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1102                 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1103         }
1104
1105         if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1106                 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1107         }
1108
1109         /* At this point, the various demotions should be inactive */
1110         assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1111         assert(!(thread->sched_flags & TH_SFLAG_THROTTLED));
1112         assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1113
1114         /* Reset thread back to task-default basepri and mode  */
1115         sched_mode_t newmode = SCHED(initial_thread_sched_mode)(thread->task);
1116
1117         sched_set_thread_mode(thread, newmode);
1118
1119         thread->importance = 0;
1120
1121         /* Prevent further changes to thread base priority or mode */
1122         thread->policy_reset = 1;
1123
1124         sched_set_thread_base_priority(thread, thread->task_priority);
1125
1126         thread_unlock(thread);
1127         splx(s);
1128 }
1129
1130 kern_return_t
1131 thread_policy_get(
1132         thread_t                                thread,
1133         thread_policy_flavor_t  flavor,
1134         thread_policy_t                 policy_info,
1135         mach_msg_type_number_t  *count,
1136         boolean_t                               *get_default)
1137 {
1138         kern_return_t                   result = KERN_SUCCESS;
1139
1140         if (thread == THREAD_NULL) {
1141                 return KERN_INVALID_ARGUMENT;
1142         }
1143
1144         thread_mtx_lock(thread);
1145         if (!thread->active) {
1146                 thread_mtx_unlock(thread);
1147
1148                 return KERN_TERMINATED;
1149         }
1150
1151         switch (flavor) {
1152         case THREAD_EXTENDED_POLICY:
1153         {
1154                 boolean_t               timeshare = TRUE;
1155
1156                 if (!(*get_default)) {
1157                         spl_t s = splsched();
1158                         thread_lock(thread);
1159
1160                         if ((thread->sched_mode != TH_MODE_REALTIME) &&
1161                             (thread->saved_mode != TH_MODE_REALTIME)) {
1162                                 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1163                                         timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1164                                 } else {
1165                                         timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1166                                 }
1167                         } else {
1168                                 *get_default = TRUE;
1169                         }
1170
1171                         thread_unlock(thread);
1172                         splx(s);
1173                 }
1174
1175                 if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1176                         thread_extended_policy_t        info;
1177
1178                         info = (thread_extended_policy_t)policy_info;
1179                         info->timeshare = timeshare;
1180                 }
1181
1182                 break;
1183         }
1184
1185         case THREAD_TIME_CONSTRAINT_POLICY:
1186         {
1187                 thread_time_constraint_policy_t         info;
1188
1189                 if (*count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
1190                         result = KERN_INVALID_ARGUMENT;
1191                         break;
1192                 }
1193
1194                 info = (thread_time_constraint_policy_t)policy_info;
1195
1196                 if (!(*get_default)) {
1197                         spl_t s = splsched();
1198                         thread_lock(thread);
1199
1200                         if ((thread->sched_mode == TH_MODE_REALTIME) ||
1201                             (thread->saved_mode == TH_MODE_REALTIME)) {
1202                                 info->period = thread->realtime.period;
1203                                 info->computation = thread->realtime.computation;
1204                                 info->constraint = thread->realtime.constraint;
1205                                 info->preemptible = thread->realtime.preemptible;
1206                         } else {
1207                                 *get_default = TRUE;
1208                         }
1209
1210                         thread_unlock(thread);
1211                         splx(s);
1212                 }
1213
1214                 if (*get_default) {
1215                         info->period = 0;
1216                         info->computation = default_timeshare_computation;
1217                         info->constraint = default_timeshare_constraint;
1218                         info->preemptible = TRUE;
1219                 }
1220
1221                 break;
1222         }
1223
1224         case THREAD_PRECEDENCE_POLICY:
1225         {
1226                 thread_precedence_policy_t              info;
1227
1228                 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1229                         result = KERN_INVALID_ARGUMENT;
1230                         break;
1231                 }
1232
1233                 info = (thread_precedence_policy_t)policy_info;
1234
1235                 if (!(*get_default)) {
1236                         spl_t s = splsched();
1237                         thread_lock(thread);
1238
1239                         info->importance = thread->importance;
1240
1241                         thread_unlock(thread);
1242                         splx(s);
1243                 } else {
1244                         info->importance = 0;
1245                 }
1246
1247                 break;
1248         }
1249
1250         case THREAD_AFFINITY_POLICY:
1251         {
1252                 thread_affinity_policy_t                info;
1253
1254                 if (!thread_affinity_is_supported()) {
1255                         result = KERN_NOT_SUPPORTED;
1256                         break;
1257                 }
1258                 if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1259                         result = KERN_INVALID_ARGUMENT;
1260                         break;
1261                 }
1262
1263                 info = (thread_affinity_policy_t)policy_info;
1264
1265                 if (!(*get_default)) {
1266                         info->affinity_tag = thread_affinity_get(thread);
1267                 } else {
1268                         info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1269                 }
1270
1271                 break;
1272         }
1273
1274         case THREAD_POLICY_STATE:
1275         {
1276                 thread_policy_state_t           info;
1277
1278                 if (*count < THREAD_POLICY_STATE_COUNT) {
1279                         result = KERN_INVALID_ARGUMENT;
1280                         break;
1281                 }
1282
1283                 /* Only root can get this info */
1284                 if (current_task()->sec_token.val[0] != 0) {
1285                         result = KERN_PROTECTION_FAILURE;
1286                         break;
1287                 }
1288
1289                 info = (thread_policy_state_t)(void*)policy_info;
1290
1291                 if (!(*get_default)) {
1292                         info->flags = 0;
1293
1294                         spl_t s = splsched();
1295                         thread_lock(thread);
1296
1297                         info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1298
1299                         info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1300                         info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1301
1302                         info->thps_user_promotions          = 0;
1303                         info->thps_user_promotion_basepri   = thread->user_promotion_basepri;
1304                         info->thps_ipc_overrides            = thread->ipc_overrides;
1305
1306                         proc_get_thread_policy_bitfield(thread, info);
1307
1308                         thread_unlock(thread);
1309                         splx(s);
1310                 } else {
1311                         info->requested = 0;
1312                         info->effective = 0;
1313                         info->pending = 0;
1314                 }
1315
1316                 break;
1317         }
1318
1319         case THREAD_LATENCY_QOS_POLICY:
1320         {
1321                 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1322                 thread_latency_qos_t plqos;
1323
1324                 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1325                         result = KERN_INVALID_ARGUMENT;
1326                         break;
1327                 }
1328
1329                 if (*get_default) {
1330                         plqos = 0;
1331                 } else {
1332                         plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1333                 }
1334
1335                 info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1336         }
1337         break;
1338
1339         case THREAD_THROUGHPUT_QOS_POLICY:
1340         {
1341                 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1342                 thread_throughput_qos_t ptqos;
1343
1344                 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1345                         result = KERN_INVALID_ARGUMENT;
1346                         break;
1347                 }
1348
1349                 if (*get_default) {
1350                         ptqos = 0;
1351                 } else {
1352                         ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1353                 }
1354
1355                 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1356         }
1357         break;
1358
1359         case THREAD_QOS_POLICY:
1360         {
1361                 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1362
1363                 if (*count < THREAD_QOS_POLICY_COUNT) {
1364                         result = KERN_INVALID_ARGUMENT;
1365                         break;
1366                 }
1367
1368                 if (!(*get_default)) {
1369                         int relprio_value = 0;
1370                         info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1371                             TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1372
1373                         info->tier_importance = -relprio_value;
1374                 } else {
1375                         info->qos_tier = THREAD_QOS_UNSPECIFIED;
1376                         info->tier_importance = 0;
1377                 }
1378
1379                 break;
1380         }
1381
1382         default:
1383                 result = KERN_INVALID_ARGUMENT;
1384                 break;
1385         }
1386
1387         thread_mtx_unlock(thread);
1388
1389         return result;
1390 }
1391
1392 void
1393 thread_policy_create(thread_t thread)
1394 {
1395         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1396             (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1397             thread_tid(thread), theffective_0(thread),
1398             theffective_1(thread), thread->base_pri, 0);
1399
1400         /* We pass a pend token but ignore it */
1401         struct task_pend_token pend_token = {};
1402
1403         thread_policy_update_internal_spinlocked(thread, TRUE, &pend_token);
1404
1405         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1406             (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1407             thread_tid(thread), theffective_0(thread),
1408             theffective_1(thread), thread->base_pri, 0);
1409 }
1410
1411 static void
1412 thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token)
1413 {
1414         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1415             (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1416             thread_tid(thread), theffective_0(thread),
1417             theffective_1(thread), thread->base_pri, 0);
1418
1419         thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1420
1421         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1422             (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1423             thread_tid(thread), theffective_0(thread),
1424             theffective_1(thread), thread->base_pri, 0);
1425 }
1426
1427
1428
1429 /*
1430  * One thread state update function TO RULE THEM ALL
1431  *
1432  * This function updates the thread effective policy fields
1433  * and pushes the results to the relevant subsystems.
1434  *
1435  * Returns TRUE if a pended action needs to be run.
1436  *
1437  * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1438  */
1439 static void
1440 thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority,
1441     task_pend_token_t pend_token)
1442 {
1443         /*
1444          * Step 1:
1445          *  Gather requested policy and effective task state
1446          */
1447
1448         struct thread_requested_policy requested = thread->requested_policy;
1449         struct task_effective_policy task_effective = thread->task->effective_policy;
1450
1451         /*
1452          * Step 2:
1453          *  Calculate new effective policies from requested policy, task and thread state
1454          *  Rules:
1455          *      Don't change requested, it won't take effect
1456          */
1457
1458         struct thread_effective_policy next = {};
1459
1460         next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1461
1462         uint32_t next_qos = requested.thrp_qos;
1463
1464         if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1465                 next_qos = MAX(requested.thrp_qos_override, next_qos);
1466                 next_qos = MAX(requested.thrp_qos_promote, next_qos);
1467                 next_qos = MAX(requested.thrp_qos_ipc_override, next_qos);
1468                 next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1469         }
1470
1471         next.thep_qos = next_qos;
1472
1473         /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1474         if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1475                 if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1476                         next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1477                 } else {
1478                         next.thep_qos = task_effective.tep_qos_clamp;
1479                 }
1480         }
1481
1482         /*
1483          * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1484          * This allows QoS promotions to work properly even after the process is unclamped.
1485          */
1486         next.thep_qos_promote = next.thep_qos;
1487
1488         /* The ceiling only applies to threads that are in the QoS world */
1489         if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1490             next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1491                 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1492         }
1493
1494         /* Apply the sync ipc qos override */
1495         assert(requested.thrp_qos_sync_ipc_override == THREAD_QOS_UNSPECIFIED);
1496
1497         /*
1498          * The QoS relative priority is only applicable when the original programmer's
1499          * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1500          * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1501          * since otherwise it would be lower than unclamped threads. Similarly, in the
1502          * presence of boosting, the programmer doesn't know what other actors
1503          * are boosting the thread.
1504          */
1505         if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1506             (requested.thrp_qos == next.thep_qos) &&
1507             (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1508                 next.thep_qos_relprio = requested.thrp_qos_relprio;
1509         } else {
1510                 next.thep_qos_relprio = 0;
1511         }
1512
1513         /* Calculate DARWIN_BG */
1514         boolean_t wants_darwinbg        = FALSE;
1515         boolean_t wants_all_sockets_bg  = FALSE; /* Do I want my existing sockets to be bg */
1516
1517         /*
1518          * If DARWIN_BG has been requested at either level, it's engaged.
1519          * darwinbg threads always create bg sockets,
1520          * but only some types of darwinbg change the sockets
1521          * after they're created
1522          */
1523         if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1524                 wants_all_sockets_bg = wants_darwinbg = TRUE;
1525         }
1526
1527         if (requested.thrp_pidbind_bg) {
1528                 wants_all_sockets_bg = wants_darwinbg = TRUE;
1529         }
1530
1531         if (task_effective.tep_darwinbg) {
1532                 wants_darwinbg = TRUE;
1533         }
1534
1535         if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1536             next.thep_qos == THREAD_QOS_MAINTENANCE) {
1537                 wants_darwinbg = TRUE;
1538         }
1539
1540         /* Calculate side effects of DARWIN_BG */
1541
1542         if (wants_darwinbg) {
1543                 next.thep_darwinbg = 1;
1544         }
1545
1546         if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1547                 next.thep_new_sockets_bg = 1;
1548         }
1549
1550         /* Don't use task_effective.tep_all_sockets_bg here */
1551         if (wants_all_sockets_bg) {
1552                 next.thep_all_sockets_bg = 1;
1553         }
1554
1555         /* darwinbg implies background QOS (or lower) */
1556         if (next.thep_darwinbg &&
1557             (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1558                 next.thep_qos = THREAD_QOS_BACKGROUND;
1559                 next.thep_qos_relprio = 0;
1560         }
1561
1562         /* Calculate IO policy */
1563
1564         int iopol = THROTTLE_LEVEL_TIER0;
1565
1566         /* Factor in the task's IO policy */
1567         if (next.thep_darwinbg) {
1568                 iopol = MAX(iopol, task_effective.tep_bg_iotier);
1569         }
1570
1571         iopol = MAX(iopol, task_effective.tep_io_tier);
1572
1573         /* Look up the associated IO tier value for the QoS class */
1574         iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1575
1576         iopol = MAX(iopol, requested.thrp_int_iotier);
1577         iopol = MAX(iopol, requested.thrp_ext_iotier);
1578
1579         next.thep_io_tier = iopol;
1580
1581         /*
1582          * If a QoS override is causing IO to go into a lower tier, we also set
1583          * the passive bit so that a thread doesn't end up stuck in its own throttle
1584          * window when the override goes away.
1585          */
1586         boolean_t qos_io_override_active = FALSE;
1587         if (thread_qos_policy_params.qos_iotier[next.thep_qos] <
1588             thread_qos_policy_params.qos_iotier[requested.thrp_qos]) {
1589                 qos_io_override_active = TRUE;
1590         }
1591
1592         /* Calculate Passive IO policy */
1593         if (requested.thrp_ext_iopassive ||
1594             requested.thrp_int_iopassive ||
1595             qos_io_override_active ||
1596             task_effective.tep_io_passive) {
1597                 next.thep_io_passive = 1;
1598         }
1599
1600         /* Calculate timer QOS */
1601         uint32_t latency_qos = requested.thrp_latency_qos;
1602
1603         latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1604         latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1605
1606         next.thep_latency_qos = latency_qos;
1607
1608         /* Calculate throughput QOS */
1609         uint32_t through_qos = requested.thrp_through_qos;
1610
1611         through_qos = MAX(through_qos, task_effective.tep_through_qos);
1612         through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1613
1614         next.thep_through_qos = through_qos;
1615
1616         if (task_effective.tep_terminated || requested.thrp_terminated) {
1617                 /* Shoot down the throttles that slow down exit or response to SIGTERM */
1618                 next.thep_terminated    = 1;
1619                 next.thep_darwinbg      = 0;
1620                 next.thep_io_tier       = THROTTLE_LEVEL_TIER0;
1621                 next.thep_qos           = THREAD_QOS_UNSPECIFIED;
1622                 next.thep_latency_qos   = LATENCY_QOS_TIER_UNSPECIFIED;
1623                 next.thep_through_qos   = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1624         }
1625
1626         /*
1627          * Step 3:
1628          *  Swap out old policy for new policy
1629          */
1630
1631         struct thread_effective_policy prev = thread->effective_policy;
1632
1633         thread_update_qos_cpu_time_locked(thread);
1634
1635         /* This is the point where the new values become visible to other threads */
1636         thread->effective_policy = next;
1637
1638         /*
1639          * Step 4:
1640          *  Pend updates that can't be done while holding the thread lock
1641          */
1642
1643         if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1644                 pend_token->tpt_update_sockets = 1;
1645         }
1646
1647         /* TODO: Doesn't this only need to be done if the throttle went up? */
1648         if (prev.thep_io_tier != next.thep_io_tier) {
1649                 pend_token->tpt_update_throttle = 1;
1650         }
1651
1652         /*
1653          * Check for the attributes that sfi_thread_classify() consults,
1654          *  and trigger SFI re-evaluation.
1655          */
1656         if (prev.thep_qos != next.thep_qos ||
1657             prev.thep_darwinbg != next.thep_darwinbg) {
1658                 pend_token->tpt_update_thread_sfi = 1;
1659         }
1660
1661         /*
1662          * Step 5:
1663          *  Update other subsystems as necessary if something has changed
1664          */
1665
1666         /* Check for the attributes that thread_recompute_priority() consults */
1667         if (prev.thep_qos != next.thep_qos ||
1668             prev.thep_qos_relprio != next.thep_qos_relprio ||
1669             prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1670             prev.thep_terminated != next.thep_terminated ||
1671             pend_token->tpt_force_recompute_pri == 1 ||
1672             recompute_priority) {
1673                 thread_recompute_priority(thread);
1674         }
1675 }
1676
1677
1678 /*
1679  * Initiate a thread policy state transition on a thread with its TID
1680  * Useful if you cannot guarantee the thread won't get terminated
1681  * Precondition: No locks are held
1682  * Will take task lock - using the non-tid variant is faster
1683  * if you already have a thread ref.
1684  */
1685 void
1686 proc_set_thread_policy_with_tid(task_t     task,
1687     uint64_t   tid,
1688     int        category,
1689     int        flavor,
1690     int        value)
1691 {
1692         /* takes task lock, returns ref'ed thread or NULL */
1693         thread_t thread = task_findtid(task, tid);
1694
1695         if (thread == THREAD_NULL) {
1696                 return;
1697         }
1698
1699         proc_set_thread_policy(thread, category, flavor, value);
1700
1701         thread_deallocate(thread);
1702 }
1703
1704 /*
1705  * Initiate a thread policy transition on a thread
1706  * This path supports networking transitions (i.e. darwinbg transitions)
1707  * Precondition: No locks are held
1708  */
1709 void
1710 proc_set_thread_policy(thread_t   thread,
1711     int        category,
1712     int        flavor,
1713     int        value)
1714 {
1715         struct task_pend_token pend_token = {};
1716
1717         thread_mtx_lock(thread);
1718
1719         proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
1720
1721         thread_mtx_unlock(thread);
1722
1723         thread_policy_update_complete_unlocked(thread, &pend_token);
1724 }
1725
1726 /*
1727  * Do the things that can't be done while holding a thread mutex.
1728  * These are set up to call back into thread policy to get the latest value,
1729  * so they don't have to be synchronized with the update.
1730  * The only required semantic is 'call this sometime after updating effective policy'
1731  *
1732  * Precondition: Thread mutex is not held
1733  *
1734  * This may be called with the task lock held, but in that case it won't be
1735  * called with tpt_update_sockets set.
1736  */
1737 void
1738 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1739 {
1740 #ifdef MACH_BSD
1741         if (pend_token->tpt_update_sockets) {
1742                 proc_apply_task_networkbg(thread->task->bsd_info, thread);
1743         }
1744 #endif /* MACH_BSD */
1745
1746         if (pend_token->tpt_update_throttle) {
1747                 rethrottle_thread(thread->uthread);
1748         }
1749
1750         if (pend_token->tpt_update_thread_sfi) {
1751                 sfi_reevaluate(thread);
1752         }
1753 }
1754
1755 /*
1756  * Set and update thread policy
1757  * Thread mutex might be held
1758  */
1759 static void
1760 proc_set_thread_policy_locked(thread_t          thread,
1761     int               category,
1762     int               flavor,
1763     int               value,
1764     int               value2,
1765     task_pend_token_t pend_token)
1766 {
1767         spl_t s = splsched();
1768         thread_lock(thread);
1769
1770         proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1771
1772         thread_unlock(thread);
1773         splx(s);
1774 }
1775
1776 /*
1777  * Set and update thread policy
1778  * Thread spinlock is held
1779  */
1780 static void
1781 proc_set_thread_policy_spinlocked(thread_t          thread,
1782     int               category,
1783     int               flavor,
1784     int               value,
1785     int               value2,
1786     task_pend_token_t pend_token)
1787 {
1788         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1789             (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1790             thread_tid(thread), threquested_0(thread),
1791             threquested_1(thread), value, 0);
1792
1793         thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2);
1794
1795         thread_policy_update_spinlocked(thread, FALSE, pend_token);
1796
1797         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1798             (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1799             thread_tid(thread), threquested_0(thread),
1800             threquested_1(thread), tpending(pend_token), 0);
1801 }
1802
1803 /*
1804  * Set the requested state for a specific flavor to a specific value.
1805  */
1806 static void
1807 thread_set_requested_policy_spinlocked(thread_t     thread,
1808     int          category,
1809     int          flavor,
1810     int          value,
1811     int          value2)
1812 {
1813         int tier, passive;
1814
1815         struct thread_requested_policy requested = thread->requested_policy;
1816
1817         switch (flavor) {
1818         /* Category: EXTERNAL and INTERNAL, thread and task */
1819
1820         case TASK_POLICY_DARWIN_BG:
1821                 if (category == TASK_POLICY_EXTERNAL) {
1822                         requested.thrp_ext_darwinbg = value;
1823                 } else {
1824                         requested.thrp_int_darwinbg = value;
1825                 }
1826                 break;
1827
1828         case TASK_POLICY_IOPOL:
1829                 proc_iopol_to_tier(value, &tier, &passive);
1830                 if (category == TASK_POLICY_EXTERNAL) {
1831                         requested.thrp_ext_iotier  = tier;
1832                         requested.thrp_ext_iopassive = passive;
1833                 } else {
1834                         requested.thrp_int_iotier  = tier;
1835                         requested.thrp_int_iopassive = passive;
1836                 }
1837                 break;
1838
1839         case TASK_POLICY_IO:
1840                 if (category == TASK_POLICY_EXTERNAL) {
1841                         requested.thrp_ext_iotier = value;
1842                 } else {
1843                         requested.thrp_int_iotier = value;
1844                 }
1845                 break;
1846
1847         case TASK_POLICY_PASSIVE_IO:
1848                 if (category == TASK_POLICY_EXTERNAL) {
1849                         requested.thrp_ext_iopassive = value;
1850                 } else {
1851                         requested.thrp_int_iopassive = value;
1852                 }
1853                 break;
1854
1855         /* Category: ATTRIBUTE, thread only */
1856
1857         case TASK_POLICY_PIDBIND_BG:
1858                 assert(category == TASK_POLICY_ATTRIBUTE);
1859                 requested.thrp_pidbind_bg = value;
1860                 break;
1861
1862         case TASK_POLICY_LATENCY_QOS:
1863                 assert(category == TASK_POLICY_ATTRIBUTE);
1864                 requested.thrp_latency_qos = value;
1865                 break;
1866
1867         case TASK_POLICY_THROUGH_QOS:
1868                 assert(category == TASK_POLICY_ATTRIBUTE);
1869                 requested.thrp_through_qos = value;
1870                 break;
1871
1872         case TASK_POLICY_QOS:
1873                 assert(category == TASK_POLICY_ATTRIBUTE);
1874                 requested.thrp_qos = value;
1875                 break;
1876
1877         case TASK_POLICY_QOS_OVERRIDE:
1878                 assert(category == TASK_POLICY_ATTRIBUTE);
1879                 requested.thrp_qos_override = value;
1880                 break;
1881
1882         case TASK_POLICY_QOS_AND_RELPRIO:
1883                 assert(category == TASK_POLICY_ATTRIBUTE);
1884                 requested.thrp_qos = value;
1885                 requested.thrp_qos_relprio = value2;
1886                 DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
1887                 break;
1888
1889         case TASK_POLICY_QOS_WORKQ_OVERRIDE:
1890                 assert(category == TASK_POLICY_ATTRIBUTE);
1891                 requested.thrp_qos_workq_override = value;
1892                 break;
1893
1894         case TASK_POLICY_QOS_PROMOTE:
1895                 assert(category == TASK_POLICY_ATTRIBUTE);
1896                 requested.thrp_qos_promote = value;
1897                 break;
1898
1899         case TASK_POLICY_QOS_IPC_OVERRIDE:
1900                 assert(category == TASK_POLICY_ATTRIBUTE);
1901                 requested.thrp_qos_ipc_override = value;
1902                 break;
1903
1904         case TASK_POLICY_TERMINATED:
1905                 assert(category == TASK_POLICY_ATTRIBUTE);
1906                 requested.thrp_terminated = value;
1907                 break;
1908
1909         default:
1910                 panic("unknown task policy: %d %d %d", category, flavor, value);
1911                 break;
1912         }
1913
1914         thread->requested_policy = requested;
1915 }
1916
1917 /*
1918  * Gets what you set. Effective values may be different.
1919  * Precondition: No locks are held
1920  */
1921 int
1922 proc_get_thread_policy(thread_t   thread,
1923     int        category,
1924     int        flavor)
1925 {
1926         int value = 0;
1927         thread_mtx_lock(thread);
1928         value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
1929         thread_mtx_unlock(thread);
1930         return value;
1931 }
1932
1933 static int
1934 proc_get_thread_policy_locked(thread_t   thread,
1935     int        category,
1936     int        flavor,
1937     int*       value2)
1938 {
1939         int value = 0;
1940
1941         spl_t s = splsched();
1942         thread_lock(thread);
1943
1944         value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
1945
1946         thread_unlock(thread);
1947         splx(s);
1948
1949         return value;
1950 }
1951
1952 /*
1953  * Gets what you set. Effective values may be different.
1954  */
1955 static int
1956 thread_get_requested_policy_spinlocked(thread_t thread,
1957     int      category,
1958     int      flavor,
1959     int*     value2)
1960 {
1961         int value = 0;
1962
1963         struct thread_requested_policy requested = thread->requested_policy;
1964
1965         switch (flavor) {
1966         case TASK_POLICY_DARWIN_BG:
1967                 if (category == TASK_POLICY_EXTERNAL) {
1968                         value = requested.thrp_ext_darwinbg;
1969                 } else {
1970                         value = requested.thrp_int_darwinbg;
1971                 }
1972                 break;
1973         case TASK_POLICY_IOPOL:
1974                 if (category == TASK_POLICY_EXTERNAL) {
1975                         value = proc_tier_to_iopol(requested.thrp_ext_iotier,
1976                             requested.thrp_ext_iopassive);
1977                 } else {
1978                         value = proc_tier_to_iopol(requested.thrp_int_iotier,
1979                             requested.thrp_int_iopassive);
1980                 }
1981                 break;
1982         case TASK_POLICY_IO:
1983                 if (category == TASK_POLICY_EXTERNAL) {
1984                         value = requested.thrp_ext_iotier;
1985                 } else {
1986                         value = requested.thrp_int_iotier;
1987                 }
1988                 break;
1989         case TASK_POLICY_PASSIVE_IO:
1990                 if (category == TASK_POLICY_EXTERNAL) {
1991                         value = requested.thrp_ext_iopassive;
1992                 } else {
1993                         value = requested.thrp_int_iopassive;
1994                 }
1995                 break;
1996         case TASK_POLICY_QOS:
1997                 assert(category == TASK_POLICY_ATTRIBUTE);
1998                 value = requested.thrp_qos;
1999                 break;
2000         case TASK_POLICY_QOS_OVERRIDE:
2001                 assert(category == TASK_POLICY_ATTRIBUTE);
2002                 value = requested.thrp_qos_override;
2003                 break;
2004         case TASK_POLICY_LATENCY_QOS:
2005                 assert(category == TASK_POLICY_ATTRIBUTE);
2006                 value = requested.thrp_latency_qos;
2007                 break;
2008         case TASK_POLICY_THROUGH_QOS:
2009                 assert(category == TASK_POLICY_ATTRIBUTE);
2010                 value = requested.thrp_through_qos;
2011                 break;
2012         case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2013                 assert(category == TASK_POLICY_ATTRIBUTE);
2014                 value = requested.thrp_qos_workq_override;
2015                 break;
2016         case TASK_POLICY_QOS_AND_RELPRIO:
2017                 assert(category == TASK_POLICY_ATTRIBUTE);
2018                 assert(value2 != NULL);
2019                 value = requested.thrp_qos;
2020                 *value2 = requested.thrp_qos_relprio;
2021                 break;
2022         case TASK_POLICY_QOS_PROMOTE:
2023                 assert(category == TASK_POLICY_ATTRIBUTE);
2024                 value = requested.thrp_qos_promote;
2025                 break;
2026         case TASK_POLICY_QOS_IPC_OVERRIDE:
2027                 assert(category == TASK_POLICY_ATTRIBUTE);
2028                 value = requested.thrp_qos_ipc_override;
2029                 break;
2030         case TASK_POLICY_TERMINATED:
2031                 assert(category == TASK_POLICY_ATTRIBUTE);
2032                 value = requested.thrp_terminated;
2033                 break;
2034
2035         default:
2036                 panic("unknown policy_flavor %d", flavor);
2037                 break;
2038         }
2039
2040         return value;
2041 }
2042
2043 /*
2044  * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2045  *
2046  * NOTE: This accessor does not take the task or thread lock.
2047  * Notifications of state updates need to be externally synchronized with state queries.
2048  * This routine *MUST* remain interrupt safe, as it is potentially invoked
2049  * within the context of a timer interrupt.
2050  *
2051  * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2052  *      Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2053  *      I don't think that cost is worth not having the right answer.
2054  */
2055 int
2056 proc_get_effective_thread_policy(thread_t thread,
2057     int      flavor)
2058 {
2059         int value = 0;
2060
2061         switch (flavor) {
2062         case TASK_POLICY_DARWIN_BG:
2063                 /*
2064                  * This call is used within the timer layer, as well as
2065                  * prioritizing requests to the graphics system.
2066                  * It also informs SFI and originator-bg-state.
2067                  * Returns 1 for background mode, 0 for normal mode
2068                  */
2069
2070                 value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2071                 break;
2072         case TASK_POLICY_IO:
2073                 /*
2074                  * The I/O system calls here to find out what throttling tier to apply to an operation.
2075                  * Returns THROTTLE_LEVEL_* values
2076                  */
2077                 value = thread->effective_policy.thep_io_tier;
2078                 if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2079                         value = MIN(value, thread->iotier_override);
2080                 }
2081                 break;
2082         case TASK_POLICY_PASSIVE_IO:
2083                 /*
2084                  * The I/O system calls here to find out whether an operation should be passive.
2085                  * (i.e. not cause operations with lower throttle tiers to be throttled)
2086                  * Returns 1 for passive mode, 0 for normal mode
2087                  *
2088                  * If an override is causing IO to go into a lower tier, we also set
2089                  * the passive bit so that a thread doesn't end up stuck in its own throttle
2090                  * window when the override goes away.
2091                  */
2092                 value = thread->effective_policy.thep_io_passive ? 1 : 0;
2093                 if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2094                     thread->iotier_override < thread->effective_policy.thep_io_tier) {
2095                         value = 1;
2096                 }
2097                 break;
2098         case TASK_POLICY_ALL_SOCKETS_BG:
2099                 /*
2100                  * do_background_socket() calls this to determine whether
2101                  * it should change the thread's sockets
2102                  * Returns 1 for background mode, 0 for normal mode
2103                  * This consults both thread and task so un-DBGing a thread while the task is BG
2104                  * doesn't get you out of the network throttle.
2105                  */
2106                 value = (thread->effective_policy.thep_all_sockets_bg ||
2107                     thread->task->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2108                 break;
2109         case TASK_POLICY_NEW_SOCKETS_BG:
2110                 /*
2111                  * socreate() calls this to determine if it should mark a new socket as background
2112                  * Returns 1 for background mode, 0 for normal mode
2113                  */
2114                 value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2115                 break;
2116         case TASK_POLICY_LATENCY_QOS:
2117                 /*
2118                  * timer arming calls into here to find out the timer coalescing level
2119                  * Returns a latency QoS tier (0-6)
2120                  */
2121                 value = thread->effective_policy.thep_latency_qos;
2122                 break;
2123         case TASK_POLICY_THROUGH_QOS:
2124                 /*
2125                  * This value is passed into the urgency callout from the scheduler
2126                  * to the performance management subsystem.
2127                  *
2128                  * Returns a throughput QoS tier (0-6)
2129                  */
2130                 value = thread->effective_policy.thep_through_qos;
2131                 break;
2132         case TASK_POLICY_QOS:
2133                 /*
2134                  * This is communicated to the performance management layer and SFI.
2135                  *
2136                  * Returns a QoS policy tier
2137                  */
2138                 value = thread->effective_policy.thep_qos;
2139                 break;
2140         default:
2141                 panic("unknown thread policy flavor %d", flavor);
2142                 break;
2143         }
2144
2145         return value;
2146 }
2147
2148
2149 /*
2150  * (integer_t) casts limit the number of bits we can fit here
2151  * this interface is deprecated and replaced by the _EXT struct ?
2152  */
2153 static void
2154 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2155 {
2156         uint64_t bits = 0;
2157         struct thread_requested_policy requested = thread->requested_policy;
2158
2159         bits |= (requested.thrp_int_darwinbg    ? POLICY_REQ_INT_DARWIN_BG  : 0);
2160         bits |= (requested.thrp_ext_darwinbg    ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2161         bits |= (requested.thrp_int_iotier      ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2162         bits |= (requested.thrp_ext_iotier      ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2163         bits |= (requested.thrp_int_iopassive   ? POLICY_REQ_INT_PASSIVE_IO : 0);
2164         bits |= (requested.thrp_ext_iopassive   ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2165
2166         bits |= (requested.thrp_qos             ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2167         bits |= (requested.thrp_qos_override    ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT)   : 0);
2168
2169         bits |= (requested.thrp_pidbind_bg      ? POLICY_REQ_PIDBIND_BG     : 0);
2170
2171         bits |= (requested.thrp_latency_qos     ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2172         bits |= (requested.thrp_through_qos     ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2173
2174         info->requested = (integer_t) bits;
2175         bits = 0;
2176
2177         struct thread_effective_policy effective = thread->effective_policy;
2178
2179         bits |= (effective.thep_darwinbg        ? POLICY_EFF_DARWIN_BG      : 0);
2180
2181         bits |= (effective.thep_io_tier         ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2182         bits |= (effective.thep_io_passive      ? POLICY_EFF_IO_PASSIVE     : 0);
2183         bits |= (effective.thep_all_sockets_bg  ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2184         bits |= (effective.thep_new_sockets_bg  ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2185
2186         bits |= (effective.thep_qos             ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2187
2188         bits |= (effective.thep_latency_qos     ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2189         bits |= (effective.thep_through_qos     ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2190
2191         info->effective = (integer_t)bits;
2192         bits = 0;
2193
2194         info->pending = 0;
2195 }
2196
2197 /*
2198  * Sneakily trace either the task and thread requested
2199  * or just the thread requested, depending on if we have enough room.
2200  * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2201  *
2202  *                                LP32            LP64
2203  * threquested_0(thread)          thread[0]       task[0]
2204  * threquested_1(thread)          thread[1]       thread[0]
2205  *
2206  */
2207
2208 uintptr_t
2209 threquested_0(thread_t thread)
2210 {
2211         static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2212
2213         uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2214
2215         return raw[0];
2216 }
2217
2218 uintptr_t
2219 threquested_1(thread_t thread)
2220 {
2221 #if defined __LP64__
2222         return *(uintptr_t*)&thread->task->requested_policy;
2223 #else
2224         uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2225         return raw[1];
2226 #endif
2227 }
2228
2229 uintptr_t
2230 theffective_0(thread_t thread)
2231 {
2232         static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2233
2234         uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2235         return raw[0];
2236 }
2237
2238 uintptr_t
2239 theffective_1(thread_t thread)
2240 {
2241 #if defined __LP64__
2242         return *(uintptr_t*)&thread->task->effective_policy;
2243 #else
2244         uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2245         return raw[1];
2246 #endif
2247 }
2248
2249
2250 /*
2251  * Set an override on the thread which is consulted with a
2252  * higher priority than the task/thread policy. This should
2253  * only be set for temporary grants until the thread
2254  * returns to the userspace boundary
2255  *
2256  * We use atomic operations to swap in the override, with
2257  * the assumption that the thread itself can
2258  * read the override and clear it on return to userspace.
2259  *
2260  * No locking is performed, since it is acceptable to see
2261  * a stale override for one loop through throttle_lowpri_io().
2262  * However a thread reference must be held on the thread.
2263  */
2264
2265 void
2266 set_thread_iotier_override(thread_t thread, int policy)
2267 {
2268         int current_override;
2269
2270         /* Let most aggressive I/O policy win until user boundary */
2271         do {
2272                 current_override = thread->iotier_override;
2273
2274                 if (current_override != THROTTLE_LEVEL_NONE) {
2275                         policy = MIN(current_override, policy);
2276                 }
2277
2278                 if (current_override == policy) {
2279                         /* no effective change */
2280                         return;
2281                 }
2282         } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2283
2284         /*
2285          * Since the thread may be currently throttled,
2286          * re-evaluate tiers and potentially break out
2287          * of an msleep
2288          */
2289         rethrottle_thread(thread->uthread);
2290 }
2291
2292 /*
2293  * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2294  * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2295  * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2296  * priority thread. In these cases, we attempt to propagate the priority token, as long
2297  * as the subsystem informs us of the relationships between the threads. The userspace
2298  * synchronization subsystem should maintain the information of owner->resource and
2299  * resource->waiters itself.
2300  */
2301
2302 /*
2303  * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2304  * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2305  * to be handled specially in the future, but for now it's fine to slam
2306  * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2307  */
2308 static void
2309 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2310 {
2311         if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2312                 /* Map all input resource/type to a single one */
2313                 *resource = USER_ADDR_NULL;
2314                 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2315         } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2316                 /* no transform */
2317         } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2318                 /* Map all mutex overrides to a single one, to avoid memory overhead */
2319                 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2320                         *resource = USER_ADDR_NULL;
2321                 }
2322         }
2323 }
2324
2325 /* This helper routine finds an existing override if known. Locking should be done by caller */
2326 static struct thread_qos_override *
2327 find_qos_override(thread_t thread,
2328     user_addr_t resource,
2329     int resource_type)
2330 {
2331         struct thread_qos_override *override;
2332
2333         override = thread->overrides;
2334         while (override) {
2335                 if (override->override_resource == resource &&
2336                     override->override_resource_type == resource_type) {
2337                         return override;
2338                 }
2339
2340                 override = override->override_next;
2341         }
2342
2343         return NULL;
2344 }
2345
2346 static void
2347 find_and_decrement_qos_override(thread_t       thread,
2348     user_addr_t    resource,
2349     int            resource_type,
2350     boolean_t      reset,
2351     struct thread_qos_override **free_override_list)
2352 {
2353         struct thread_qos_override *override, *override_prev;
2354
2355         override_prev = NULL;
2356         override = thread->overrides;
2357         while (override) {
2358                 struct thread_qos_override *override_next = override->override_next;
2359
2360                 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2361                     (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2362                         if (reset) {
2363                                 override->override_contended_resource_count = 0;
2364                         } else {
2365                                 override->override_contended_resource_count--;
2366                         }
2367
2368                         if (override->override_contended_resource_count == 0) {
2369                                 if (override_prev == NULL) {
2370                                         thread->overrides = override_next;
2371                                 } else {
2372                                         override_prev->override_next = override_next;
2373                                 }
2374
2375                                 /* Add to out-param for later zfree */
2376                                 override->override_next = *free_override_list;
2377                                 *free_override_list = override;
2378                         } else {
2379                                 override_prev = override;
2380                         }
2381
2382                         if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2383                                 return;
2384                         }
2385                 } else {
2386                         override_prev = override;
2387                 }
2388
2389                 override = override_next;
2390         }
2391 }
2392
2393 /* This helper recalculates the current requested override using the policy selected at boot */
2394 static int
2395 calculate_requested_qos_override(thread_t thread)
2396 {
2397         if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2398                 return THREAD_QOS_UNSPECIFIED;
2399         }
2400
2401         /* iterate over all overrides and calculate MAX */
2402         struct thread_qos_override *override;
2403         int qos_override = THREAD_QOS_UNSPECIFIED;
2404
2405         override = thread->overrides;
2406         while (override) {
2407                 qos_override = MAX(qos_override, override->override_qos);
2408                 override = override->override_next;
2409         }
2410
2411         return qos_override;
2412 }
2413
2414 /*
2415  * Returns:
2416  * - 0 on success
2417  * - EINVAL if some invalid input was passed
2418  */
2419 static int
2420 proc_thread_qos_add_override_internal(thread_t         thread,
2421     int              override_qos,
2422     boolean_t        first_override_for_resource,
2423     user_addr_t      resource,
2424     int              resource_type)
2425 {
2426         struct task_pend_token pend_token = {};
2427         int rc = 0;
2428
2429         thread_mtx_lock(thread);
2430
2431         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2432             thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2433
2434         DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2435             uint64_t, thread->requested_policy.thrp_qos,
2436             uint64_t, thread->effective_policy.thep_qos,
2437             int, override_qos, boolean_t, first_override_for_resource);
2438
2439         struct thread_qos_override *override;
2440         struct thread_qos_override *override_new = NULL;
2441         int new_qos_override, prev_qos_override;
2442         int new_effective_qos;
2443
2444         canonicalize_resource_and_type(&resource, &resource_type);
2445
2446         override = find_qos_override(thread, resource, resource_type);
2447         if (first_override_for_resource && !override) {
2448                 /* We need to allocate a new object. Drop the thread lock and
2449                  * recheck afterwards in case someone else added the override
2450                  */
2451                 thread_mtx_unlock(thread);
2452                 override_new = zalloc(thread_qos_override_zone);
2453                 thread_mtx_lock(thread);
2454                 override = find_qos_override(thread, resource, resource_type);
2455         }
2456         if (first_override_for_resource && override) {
2457                 /* Someone else already allocated while the thread lock was dropped */
2458                 override->override_contended_resource_count++;
2459         } else if (!override && override_new) {
2460                 override = override_new;
2461                 override_new = NULL;
2462                 override->override_next = thread->overrides;
2463                 /* since first_override_for_resource was TRUE */
2464                 override->override_contended_resource_count = 1;
2465                 override->override_resource = resource;
2466                 override->override_resource_type = resource_type;
2467                 override->override_qos = THREAD_QOS_UNSPECIFIED;
2468                 thread->overrides = override;
2469         }
2470
2471         if (override) {
2472                 if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2473                         override->override_qos = override_qos;
2474                 } else {
2475                         override->override_qos = MAX(override->override_qos, override_qos);
2476                 }
2477         }
2478
2479         /* Determine how to combine the various overrides into a single current
2480          * requested override
2481          */
2482         new_qos_override = calculate_requested_qos_override(thread);
2483
2484         prev_qos_override = proc_get_thread_policy_locked(thread,
2485             TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2486
2487         if (new_qos_override != prev_qos_override) {
2488                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2489                     TASK_POLICY_QOS_OVERRIDE,
2490                     new_qos_override, 0, &pend_token);
2491         }
2492
2493         new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2494
2495         thread_mtx_unlock(thread);
2496
2497         thread_policy_update_complete_unlocked(thread, &pend_token);
2498
2499         if (override_new) {
2500                 zfree(thread_qos_override_zone, override_new);
2501         }
2502
2503         DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2504             int, new_qos_override, int, new_effective_qos, int, rc);
2505
2506         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2507             new_qos_override, resource, resource_type, 0, 0);
2508
2509         return rc;
2510 }
2511
2512 int
2513 proc_thread_qos_add_override(task_t           task,
2514     thread_t         thread,
2515     uint64_t         tid,
2516     int              override_qos,
2517     boolean_t        first_override_for_resource,
2518     user_addr_t      resource,
2519     int              resource_type)
2520 {
2521         boolean_t has_thread_reference = FALSE;
2522         int rc = 0;
2523
2524         if (thread == THREAD_NULL) {
2525                 thread = task_findtid(task, tid);
2526                 /* returns referenced thread */
2527
2528                 if (thread == THREAD_NULL) {
2529                         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2530                             tid, 0, 0xdead, 0, 0);
2531                         return ESRCH;
2532                 }
2533                 has_thread_reference = TRUE;
2534         } else {
2535                 assert(thread->task == task);
2536         }
2537         rc = proc_thread_qos_add_override_internal(thread, override_qos,
2538             first_override_for_resource, resource, resource_type);
2539         if (has_thread_reference) {
2540                 thread_deallocate(thread);
2541         }
2542
2543         return rc;
2544 }
2545
2546 static void
2547 proc_thread_qos_remove_override_internal(thread_t       thread,
2548     user_addr_t    resource,
2549     int            resource_type,
2550     boolean_t      reset)
2551 {
2552         struct task_pend_token pend_token = {};
2553
2554         struct thread_qos_override *deferred_free_override_list = NULL;
2555         int new_qos_override, prev_qos_override, new_effective_qos;
2556
2557         thread_mtx_lock(thread);
2558
2559         canonicalize_resource_and_type(&resource, &resource_type);
2560
2561         find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2562
2563         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2564             thread_tid(thread), resource, reset, 0, 0);
2565
2566         DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2567             uint64_t, thread->requested_policy.thrp_qos,
2568             uint64_t, thread->effective_policy.thep_qos);
2569
2570         /* Determine how to combine the various overrides into a single current requested override */
2571         new_qos_override = calculate_requested_qos_override(thread);
2572
2573         spl_t s = splsched();
2574         thread_lock(thread);
2575
2576         /*
2577          * The override chain and therefore the value of the current override is locked with thread mutex,
2578          * so we can do a get/set without races.  However, the rest of thread policy is locked under the spinlock.
2579          * This means you can't change the current override from a spinlock-only setter.
2580          */
2581         prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2582
2583         if (new_qos_override != prev_qos_override) {
2584                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2585         }
2586
2587         new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2588
2589         thread_unlock(thread);
2590         splx(s);
2591
2592         thread_mtx_unlock(thread);
2593
2594         thread_policy_update_complete_unlocked(thread, &pend_token);
2595
2596         while (deferred_free_override_list) {
2597                 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2598
2599                 zfree(thread_qos_override_zone, deferred_free_override_list);
2600                 deferred_free_override_list = override_next;
2601         }
2602
2603         DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2604             int, new_qos_override, int, new_effective_qos);
2605
2606         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2607             thread_tid(thread), 0, 0, 0, 0);
2608 }
2609
2610 int
2611 proc_thread_qos_remove_override(task_t      task,
2612     thread_t    thread,
2613     uint64_t    tid,
2614     user_addr_t resource,
2615     int         resource_type)
2616 {
2617         boolean_t has_thread_reference = FALSE;
2618
2619         if (thread == THREAD_NULL) {
2620                 thread = task_findtid(task, tid);
2621                 /* returns referenced thread */
2622
2623                 if (thread == THREAD_NULL) {
2624                         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2625                             tid, 0, 0xdead, 0, 0);
2626                         return ESRCH;
2627                 }
2628                 has_thread_reference = TRUE;
2629         } else {
2630                 assert(task == thread->task);
2631         }
2632
2633         proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2634
2635         if (has_thread_reference) {
2636                 thread_deallocate(thread);
2637         }
2638
2639         return 0;
2640 }
2641
2642 /* Deallocate before thread termination */
2643 void
2644 proc_thread_qos_deallocate(thread_t thread)
2645 {
2646         /* This thread must have no more IPC overrides. */
2647         assert(thread->ipc_overrides == 0);
2648         assert(thread->requested_policy.thrp_qos_ipc_override == THREAD_QOS_UNSPECIFIED);
2649         assert(thread->sync_ipc_overrides == 0);
2650         assert(thread->requested_policy.thrp_qos_sync_ipc_override == THREAD_QOS_UNSPECIFIED);
2651
2652         /*
2653          * Clear out any lingering override objects.
2654          */
2655         struct thread_qos_override *override;
2656
2657         thread_mtx_lock(thread);
2658         override = thread->overrides;
2659         thread->overrides = NULL;
2660         thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2661         /* We don't need to re-evaluate thread policy here because the thread has already exited */
2662         thread_mtx_unlock(thread);
2663
2664         while (override) {
2665                 struct thread_qos_override *override_next = override->override_next;
2666
2667                 zfree(thread_qos_override_zone, override);
2668                 override = override_next;
2669         }
2670 }
2671
2672 /*
2673  * Set up the primordial thread's QoS
2674  */
2675 void
2676 task_set_main_thread_qos(task_t task, thread_t thread)
2677 {
2678         struct task_pend_token pend_token = {};
2679
2680         assert(thread->task == task);
2681
2682         thread_mtx_lock(thread);
2683
2684         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2685             (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2686             thread_tid(thread), threquested_0(thread), threquested_1(thread),
2687             thread->requested_policy.thrp_qos, 0);
2688
2689         int primordial_qos = task_compute_main_thread_qos(task);
2690
2691         proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS,
2692             primordial_qos, 0, &pend_token);
2693
2694         thread_mtx_unlock(thread);
2695
2696         thread_policy_update_complete_unlocked(thread, &pend_token);
2697
2698         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2699             (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2700             thread_tid(thread), threquested_0(thread), threquested_1(thread),
2701             primordial_qos, 0);
2702 }
2703
2704 /*
2705  * KPI for pthread kext
2706  *
2707  * Return a good guess at what the initial manager QoS will be
2708  * Dispatch can override this in userspace if it so chooses
2709  */
2710 int
2711 task_get_default_manager_qos(task_t task)
2712 {
2713         int primordial_qos = task_compute_main_thread_qos(task);
2714
2715         if (primordial_qos == THREAD_QOS_LEGACY) {
2716                 primordial_qos = THREAD_QOS_USER_INITIATED;
2717         }
2718
2719         return primordial_qos;
2720 }
2721
2722 /*
2723  * Check if the user promotion on thread has changed
2724  * and apply it.
2725  *
2726  * thread locked on entry, might drop the thread lock
2727  * and reacquire it.
2728  */
2729 boolean_t
2730 thread_recompute_user_promotion_locked(thread_t thread)
2731 {
2732         boolean_t needs_update = FALSE;
2733         struct task_pend_token pend_token = {};
2734         int user_promotion_basepri = MIN(thread_get_inheritor_turnstile_priority(thread), MAXPRI_USER);
2735         int old_base_pri = thread->base_pri;
2736         thread_qos_t qos_promotion;
2737
2738         /* Check if user promotion has changed */
2739         if (thread->user_promotion_basepri == user_promotion_basepri) {
2740                 return needs_update;
2741         } else {
2742                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2743                     (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
2744                     thread_tid(thread),
2745                     user_promotion_basepri,
2746                     thread->user_promotion_basepri,
2747                     0, 0);
2748         }
2749
2750         /* Update the user promotion base pri */
2751         thread->user_promotion_basepri = user_promotion_basepri;
2752         pend_token.tpt_force_recompute_pri = 1;
2753
2754         if (user_promotion_basepri <= MAXPRI_THROTTLE) {
2755                 qos_promotion = THREAD_QOS_UNSPECIFIED;
2756         } else {
2757                 qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
2758         }
2759
2760         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2761             TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
2762
2763         if (thread_get_waiting_turnstile(thread) &&
2764             thread->base_pri != old_base_pri) {
2765                 needs_update = TRUE;
2766         }
2767
2768         thread_unlock(thread);
2769
2770         thread_policy_update_complete_unlocked(thread, &pend_token);
2771
2772         thread_lock(thread);
2773
2774         return needs_update;
2775 }
2776
2777 /*
2778  * Convert the thread user promotion base pri to qos for threads in qos world.
2779  * For priority above UI qos, the qos would be set to UI.
2780  */
2781 thread_qos_t
2782 thread_user_promotion_qos_for_pri(int priority)
2783 {
2784         int qos;
2785         for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
2786                 if (thread_qos_policy_params.qos_pri[qos] <= priority) {
2787                         return qos;
2788                 }
2789         }
2790         return THREAD_QOS_MAINTENANCE;
2791 }
2792
2793 /*
2794  * Set the thread's QoS IPC override
2795  * Owned by the IPC subsystem
2796  *
2797  * May be called with spinlocks held, but not spinlocks
2798  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
2799  *
2800  * One 'add' must be balanced by one 'drop'.
2801  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
2802  * Before the thread is deallocated, there must be 0 remaining overrides.
2803  */
2804 static void
2805 thread_ipc_override(thread_t    thread,
2806     uint32_t    qos_override,
2807     boolean_t   is_new_override)
2808 {
2809         struct task_pend_token pend_token = {};
2810         boolean_t needs_update;
2811
2812         spl_t s = splsched();
2813         thread_lock(thread);
2814
2815         uint32_t old_override = thread->requested_policy.thrp_qos_ipc_override;
2816
2817         assert(qos_override > THREAD_QOS_UNSPECIFIED);
2818         assert(qos_override < THREAD_QOS_LAST);
2819
2820         if (is_new_override) {
2821                 if (thread->ipc_overrides++ == 0) {
2822                         /* This add is the first override for this thread */
2823                         assert(old_override == THREAD_QOS_UNSPECIFIED);
2824                 } else {
2825                         /* There are already other overrides in effect for this thread */
2826                         assert(old_override > THREAD_QOS_UNSPECIFIED);
2827                 }
2828         } else {
2829                 /* There must be at least one override (the previous add call) in effect */
2830                 assert(thread->ipc_overrides > 0);
2831                 assert(old_override > THREAD_QOS_UNSPECIFIED);
2832         }
2833
2834         /*
2835          * We can't allow lowering if there are several IPC overrides because
2836          * the caller can't possibly know the whole truth
2837          */
2838         if (thread->ipc_overrides == 1) {
2839                 needs_update = qos_override != old_override;
2840         } else {
2841                 needs_update = qos_override > old_override;
2842         }
2843
2844         if (needs_update) {
2845                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2846                     TASK_POLICY_QOS_IPC_OVERRIDE,
2847                     qos_override, 0, &pend_token);
2848                 assert(pend_token.tpt_update_sockets == 0);
2849         }
2850
2851         thread_unlock(thread);
2852         splx(s);
2853
2854         thread_policy_update_complete_unlocked(thread, &pend_token);
2855 }
2856
2857 void
2858 thread_add_ipc_override(thread_t    thread,
2859     uint32_t    qos_override)
2860 {
2861         thread_ipc_override(thread, qos_override, TRUE);
2862 }
2863
2864 void
2865 thread_update_ipc_override(thread_t     thread,
2866     uint32_t     qos_override)
2867 {
2868         thread_ipc_override(thread, qos_override, FALSE);
2869 }
2870
2871 void
2872 thread_drop_ipc_override(thread_t thread)
2873 {
2874         struct task_pend_token pend_token = {};
2875
2876         spl_t s = splsched();
2877         thread_lock(thread);
2878
2879         assert(thread->ipc_overrides > 0);
2880
2881         if (--thread->ipc_overrides == 0) {
2882                 /*
2883                  * There are no more overrides for this thread, so we should
2884                  * clear out the saturated override value
2885                  */
2886
2887                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2888                     TASK_POLICY_QOS_IPC_OVERRIDE, THREAD_QOS_UNSPECIFIED,
2889                     0, &pend_token);
2890         }
2891
2892         thread_unlock(thread);
2893         splx(s);
2894
2895         thread_policy_update_complete_unlocked(thread, &pend_token);
2896 }
2897
2898 /* Get current requested qos / relpri, may be called from spinlock context */
2899 thread_qos_t
2900 thread_get_requested_qos(thread_t thread, int *relpri)
2901 {
2902         int relprio_value = 0;
2903         thread_qos_t qos;
2904
2905         qos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2906             TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
2907         if (relpri) {
2908                 *relpri = -relprio_value;
2909         }
2910         return qos;
2911 }
2912
2913 /*
2914  * This function will promote the thread priority
2915  * since exec could block other threads calling
2916  * proc_find on the proc. This boost must be removed
2917  * via call to thread_clear_exec_promotion.
2918  *
2919  * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
2920  */
2921 void
2922 thread_set_exec_promotion(thread_t thread)
2923 {
2924         spl_t s = splsched();
2925         thread_lock(thread);
2926
2927         sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
2928
2929         thread_unlock(thread);
2930         splx(s);
2931 }
2932
2933 /*
2934  * This function will clear the exec thread
2935  * promotion set on the thread by thread_set_exec_promotion.
2936  */
2937 void
2938 thread_clear_exec_promotion(thread_t thread)
2939 {
2940         spl_t s = splsched();
2941         thread_lock(thread);
2942
2943         sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
2944
2945         thread_unlock(thread);
2946         splx(s);
2947 }