osfmk/kern/thread_policy.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <mach/mach_types.h>
  30 #include <mach/thread_act_server.h>
  31
  32 #include <kern/kern_types.h>
  33 #include <kern/processor.h>
  34 #include <kern/thread.h>
  35 #include <kern/affinity.h>
  36 #include <mach/task_policy.h>
  37 #include <kern/sfi.h>
  38 #include <kern/policy_internal.h>
  39 #include <sys/errno.h>
  40 #include <sys/ulock.h>
  41
  42 #include <mach/machine/sdt.h>
  43
  44 #ifdef MACH_BSD
  45 extern int      proc_selfpid(void);
  46 extern char *   proc_name_address(void *p);
  47 extern void     rethrottle_thread(void * uthread);
  48 #endif /* MACH_BSD */
  49
  50 #define QOS_EXTRACT(q)        ((q) & 0xff)
  51
  52 uint32_t qos_override_mode;
  53 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
  54 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
  55 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
  56 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
  57
  58 extern zone_t thread_qos_override_zone;
  59
  60 static void
  61 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
  62
  63 /*
  64  * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
  65  * to threads that don't have a QoS class set.
  66  */
  67 const qos_policy_params_t thread_qos_policy_params = {
  68         /*
  69          * This table defines the starting base priority of the thread,
  70          * which will be modified by the thread importance and the task max priority
  71          * before being applied.
  72          */
  73         .qos_pri[THREAD_QOS_UNSPECIFIED]                = 0, /* not consulted */
  74         .qos_pri[THREAD_QOS_USER_INTERACTIVE]           = BASEPRI_BACKGROUND, /* i.e. 46 */
  75         .qos_pri[THREAD_QOS_USER_INITIATED]             = BASEPRI_USER_INITIATED,
  76         .qos_pri[THREAD_QOS_LEGACY]                     = BASEPRI_DEFAULT,
  77         .qos_pri[THREAD_QOS_UTILITY]                    = BASEPRI_UTILITY,
  78         .qos_pri[THREAD_QOS_BACKGROUND]                 = MAXPRI_THROTTLE,
  79         .qos_pri[THREAD_QOS_MAINTENANCE]                = MAXPRI_THROTTLE,
  80
  81         /*
  82          * This table defines the highest IO priority that a thread marked with this
  83          * QoS class can have.
  84          */
  85         .qos_iotier[THREAD_QOS_UNSPECIFIED]             = THROTTLE_LEVEL_TIER0,
  86         .qos_iotier[THREAD_QOS_USER_INTERACTIVE]        = THROTTLE_LEVEL_TIER0,
  87         .qos_iotier[THREAD_QOS_USER_INITIATED]          = THROTTLE_LEVEL_TIER0,
  88         .qos_iotier[THREAD_QOS_LEGACY]                  = THROTTLE_LEVEL_TIER0,
  89         .qos_iotier[THREAD_QOS_UTILITY]                 = THROTTLE_LEVEL_TIER1,
  90         .qos_iotier[THREAD_QOS_BACKGROUND]              = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
  91         .qos_iotier[THREAD_QOS_MAINTENANCE]             = THROTTLE_LEVEL_TIER3,
  92
  93         /*
  94          * This table defines the highest QoS level that
  95          * a thread marked with this QoS class can have.
  96          */
  97
  98         .qos_through_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
  99         .qos_through_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
 100         .qos_through_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
 101         .qos_through_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
 102         .qos_through_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
 103         .qos_through_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
 104         .qos_through_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
 105
 106         .qos_latency_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
 107         .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(LATENCY_QOS_TIER_0),
 108         .qos_latency_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(LATENCY_QOS_TIER_1),
 109         .qos_latency_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(LATENCY_QOS_TIER_1),
 110         .qos_latency_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 111         .qos_latency_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 112         .qos_latency_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 113 };
 114
 115 static void
 116 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
 117
 118 static int
 119 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
 120
 121 static void
 122 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
 123
 124 static void
 125 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 126
 127 static void
 128 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 129
 130 static void
 131 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2);
 132
 133 static int
 134 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
 135
 136 static int
 137 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
 138
 139 static void
 140 thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
 141
 142 static void
 143 thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
 144
 145 void
 146 thread_policy_init(void) {
 147         if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
 148                 printf("QOS override mode: 0x%08x\n", qos_override_mode);
 149         } else {
 150                 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
 151         }
 152 }
 153
 154 boolean_t
 155 thread_has_qos_policy(thread_t thread) {
 156         return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
 157 }
 158
 159
 160 static void
 161 thread_remove_qos_policy_locked(thread_t thread,
 162                                 task_pend_token_t pend_token)
 163 {
 164
 165         __unused int prev_qos = thread->requested_policy.thrp_qos;
 166
 167         DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
 168
 169         proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
 170                                       THREAD_QOS_UNSPECIFIED, 0, pend_token);
 171 }
 172
 173 kern_return_t
 174 thread_remove_qos_policy(thread_t thread)
 175 {
 176         struct task_pend_token pend_token = {};
 177
 178         thread_mtx_lock(thread);
 179         if (!thread->active) {
 180                 thread_mtx_unlock(thread);
 181                 return KERN_TERMINATED;
 182         }
 183
 184         thread_remove_qos_policy_locked(thread, &pend_token);
 185
 186         thread_mtx_unlock(thread);
 187
 188         thread_policy_update_complete_unlocked(thread, &pend_token);
 189
 190         return KERN_SUCCESS;
 191 }
 192
 193
 194 boolean_t
 195 thread_is_static_param(thread_t thread)
 196 {
 197         if (thread->static_param) {
 198                 DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
 199                 return TRUE;
 200         }
 201         return FALSE;
 202 }
 203
 204 /*
 205  * Relative priorities can range between 0REL and -15REL. These
 206  * map to QoS-specific ranges, to create non-overlapping priority
 207  * ranges.
 208  */
 209 static int
 210 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
 211 {
 212         int next_lower_qos;
 213
 214         /* Fast path, since no validation or scaling is needed */
 215         if (qos_relprio == 0) return 0;
 216
 217         switch (qos) {
 218                 case THREAD_QOS_USER_INTERACTIVE:
 219                         next_lower_qos = THREAD_QOS_USER_INITIATED;
 220                         break;
 221                 case THREAD_QOS_USER_INITIATED:
 222                         next_lower_qos = THREAD_QOS_LEGACY;
 223                         break;
 224                 case THREAD_QOS_LEGACY:
 225                         next_lower_qos = THREAD_QOS_UTILITY;
 226                         break;
 227                 case THREAD_QOS_UTILITY:
 228                         next_lower_qos = THREAD_QOS_BACKGROUND;
 229                         break;
 230                 case THREAD_QOS_MAINTENANCE:
 231                 case THREAD_QOS_BACKGROUND:
 232                         next_lower_qos = 0;
 233                         break;
 234                 default:
 235                         panic("Unrecognized QoS %d", qos);
 236                         return 0;
 237         }
 238
 239         int prio_range_max = thread_qos_policy_params.qos_pri[qos];
 240         int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
 241
 242         /*
 243          * We now have the valid range that the scaled relative priority can map to. Note
 244          * that the lower bound is exclusive, but the upper bound is inclusive. If the
 245          * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
 246          * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
 247          * remainder.
 248          */
 249         int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
 250
 251         return scaled_relprio;
 252 }
 253
 254 /*
 255  * flag set by -qos-policy-allow boot-arg to allow
 256  * testing thread qos policy from userspace
 257  */
 258 boolean_t allow_qos_policy_set = FALSE;
 259
 260 kern_return_t
 261 thread_policy_set(
 262         thread_t                                thread,
 263         thread_policy_flavor_t  flavor,
 264         thread_policy_t                 policy_info,
 265         mach_msg_type_number_t  count)
 266 {
 267         thread_qos_policy_data_t req_qos;
 268         kern_return_t kr;
 269
 270         req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
 271
 272         if (thread == THREAD_NULL)
 273                 return (KERN_INVALID_ARGUMENT);
 274
 275         if (allow_qos_policy_set == FALSE) {
 276                 if (thread_is_static_param(thread))
 277                         return (KERN_POLICY_STATIC);
 278
 279                 if (flavor == THREAD_QOS_POLICY)
 280                         return (KERN_INVALID_ARGUMENT);
 281         }
 282
 283         /* Threads without static_param set reset their QoS when other policies are applied. */
 284         if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
 285                 /* Store the existing tier, if we fail this call it is used to reset back. */
 286                 req_qos.qos_tier = thread->requested_policy.thrp_qos;
 287                 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
 288
 289                 kr = thread_remove_qos_policy(thread);
 290                 if (kr != KERN_SUCCESS) {
 291                         return kr;
 292                 }
 293         }
 294
 295         kr = thread_policy_set_internal(thread, flavor, policy_info, count);
 296
 297         /* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */
 298         if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
 299                 if (kr != KERN_SUCCESS) {
 300                         /* Reset back to our original tier as the set failed. */
 301                         (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
 302                 }
 303         }
 304
 305         return kr;
 306 }
 307
 308 kern_return_t
 309 thread_policy_set_internal(
 310                            thread_t                     thread,
 311                            thread_policy_flavor_t       flavor,
 312                            thread_policy_t              policy_info,
 313                            mach_msg_type_number_t       count)
 314 {
 315         kern_return_t result = KERN_SUCCESS;
 316         struct task_pend_token pend_token = {};
 317
 318         thread_mtx_lock(thread);
 319         if (!thread->active) {
 320                 thread_mtx_unlock(thread);
 321
 322                 return (KERN_TERMINATED);
 323         }
 324
 325         switch (flavor) {
 326
 327         case THREAD_EXTENDED_POLICY:
 328         {
 329                 boolean_t timeshare = TRUE;
 330
 331                 if (count >= THREAD_EXTENDED_POLICY_COUNT) {
 332                         thread_extended_policy_t info;
 333
 334                         info = (thread_extended_policy_t)policy_info;
 335                         timeshare = info->timeshare;
 336                 }
 337
 338                 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
 339
 340                 spl_t s = splsched();
 341                 thread_lock(thread);
 342
 343                 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
 344
 345                 thread_unlock(thread);
 346                 splx(s);
 347
 348                 pend_token.tpt_update_thread_sfi = 1;
 349
 350                 break;
 351         }
 352
 353         case THREAD_TIME_CONSTRAINT_POLICY:
 354         {
 355                 thread_time_constraint_policy_t info;
 356
 357                 if (count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
 358                         result = KERN_INVALID_ARGUMENT;
 359                         break;
 360                 }
 361
 362                 info = (thread_time_constraint_policy_t)policy_info;
 363                 if (info->constraint  < info->computation   ||
 364                     info->computation > max_rt_quantum      ||
 365                     info->computation < min_rt_quantum      ) {
 366                         result = KERN_INVALID_ARGUMENT;
 367                         break;
 368                 }
 369
 370                 spl_t s = splsched();
 371                 thread_lock(thread);
 372
 373                 thread->realtime.period         = info->period;
 374                 thread->realtime.computation    = info->computation;
 375                 thread->realtime.constraint     = info->constraint;
 376                 thread->realtime.preemptible    = info->preemptible;
 377
 378                 thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
 379
 380                 thread_unlock(thread);
 381                 splx(s);
 382
 383                 pend_token.tpt_update_thread_sfi = 1;
 384
 385                 break;
 386         }
 387
 388         case THREAD_PRECEDENCE_POLICY:
 389         {
 390                 thread_precedence_policy_t info;
 391
 392                 if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
 393                         result = KERN_INVALID_ARGUMENT;
 394                         break;
 395                 }
 396                 info = (thread_precedence_policy_t)policy_info;
 397
 398                 spl_t s = splsched();
 399                 thread_lock(thread);
 400
 401                 thread->importance = info->importance;
 402
 403                 thread_recompute_priority(thread);
 404
 405                 thread_unlock(thread);
 406                 splx(s);
 407
 408                 break;
 409         }
 410
 411         case THREAD_AFFINITY_POLICY:
 412         {
 413                 thread_affinity_policy_t info;
 414
 415                 if (!thread_affinity_is_supported()) {
 416                         result = KERN_NOT_SUPPORTED;
 417                         break;
 418                 }
 419                 if (count < THREAD_AFFINITY_POLICY_COUNT) {
 420                         result = KERN_INVALID_ARGUMENT;
 421                         break;
 422                 }
 423
 424                 info = (thread_affinity_policy_t) policy_info;
 425                 /*
 426                  * Unlock the thread mutex here and
 427                  * return directly after calling thread_affinity_set().
 428                  * This is necessary for correct lock ordering because
 429                  * thread_affinity_set() takes the task lock.
 430                  */
 431                 thread_mtx_unlock(thread);
 432                 return thread_affinity_set(thread, info->affinity_tag);
 433         }
 434
 435 #if CONFIG_EMBEDDED
 436         case THREAD_BACKGROUND_POLICY:
 437         {
 438                 thread_background_policy_t info;
 439
 440                 if (count < THREAD_BACKGROUND_POLICY_COUNT) {
 441                         result = KERN_INVALID_ARGUMENT;
 442                         break;
 443                 }
 444
 445                 if (thread->task != current_task()) {
 446                         result = KERN_PROTECTION_FAILURE;
 447                         break;
 448                 }
 449
 450                 info = (thread_background_policy_t) policy_info;
 451
 452                 int enable;
 453
 454                 if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG)
 455                         enable = TASK_POLICY_ENABLE;
 456                 else
 457                         enable = TASK_POLICY_DISABLE;
 458
 459                 int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
 460
 461                 proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
 462
 463                 break;
 464         }
 465 #endif /* CONFIG_EMBEDDED */
 466
 467         case THREAD_THROUGHPUT_QOS_POLICY:
 468         {
 469                 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
 470                 thread_throughput_qos_t tqos;
 471
 472                 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
 473                         result = KERN_INVALID_ARGUMENT;
 474                         break;
 475                 }
 476
 477                 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS)
 478                         break;
 479
 480                 tqos = qos_extract(info->thread_throughput_qos_tier);
 481
 482                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
 483                                               TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
 484
 485                 break;
 486         }
 487
 488         case THREAD_LATENCY_QOS_POLICY:
 489         {
 490                 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
 491                 thread_latency_qos_t lqos;
 492
 493                 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
 494                         result = KERN_INVALID_ARGUMENT;
 495                         break;
 496                 }
 497
 498                 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS)
 499                         break;
 500
 501                 lqos = qos_extract(info->thread_latency_qos_tier);
 502
 503                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
 504                                               TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
 505
 506                 break;
 507         }
 508
 509         case THREAD_QOS_POLICY:
 510         {
 511                 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
 512
 513                 if (count < THREAD_QOS_POLICY_COUNT) {
 514                         result = KERN_INVALID_ARGUMENT;
 515                         break;
 516                 }
 517
 518                 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
 519                         result = KERN_INVALID_ARGUMENT;
 520                         break;
 521                 }
 522
 523                 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
 524                         result = KERN_INVALID_ARGUMENT;
 525                         break;
 526                 }
 527
 528                 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
 529                         result = KERN_INVALID_ARGUMENT;
 530                         break;
 531                 }
 532
 533                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
 534                                               info->qos_tier, -info->tier_importance, &pend_token);
 535
 536                 break;
 537         }
 538
 539         default:
 540                 result = KERN_INVALID_ARGUMENT;
 541                 break;
 542         }
 543
 544         thread_mtx_unlock(thread);
 545
 546         thread_policy_update_complete_unlocked(thread, &pend_token);
 547
 548         return (result);
 549 }
 550
 551 /*
 552  * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
 553  * Both result in FIXED mode scheduling.
 554  */
 555 static sched_mode_t
 556 convert_policy_to_sched_mode(integer_t policy) {
 557         switch (policy) {
 558                 case POLICY_TIMESHARE:
 559                         return TH_MODE_TIMESHARE;
 560                 case POLICY_RR:
 561                 case POLICY_FIFO:
 562                         return TH_MODE_FIXED;
 563                 default:
 564                         panic("unexpected sched policy: %d", policy);
 565                         return TH_MODE_NONE;
 566         }
 567 }
 568
 569 /*
 570  * Called either with the thread mutex locked
 571  * or from the pthread kext in a 'safe place'.
 572  */
 573 static kern_return_t
 574 thread_set_mode_and_absolute_pri_internal(thread_t              thread,
 575                                           sched_mode_t          mode,
 576                                           integer_t             priority,
 577                                           task_pend_token_t     pend_token)
 578 {
 579         kern_return_t kr = KERN_SUCCESS;
 580
 581         spl_t s = splsched();
 582         thread_lock(thread);
 583
 584         /* This path isn't allowed to change a thread out of realtime. */
 585         if ((thread->sched_mode == TH_MODE_REALTIME) ||
 586             (thread->saved_mode == TH_MODE_REALTIME)) {
 587                 kr = KERN_FAILURE;
 588                 goto unlock;
 589         }
 590
 591         if (thread->policy_reset) {
 592                 kr = KERN_SUCCESS;
 593                 goto unlock;
 594         }
 595
 596         sched_mode_t old_mode = thread->sched_mode;
 597
 598         /*
 599          * Reverse engineer and apply the correct importance value
 600          * from the requested absolute priority value.
 601          *
 602          * TODO: Store the absolute priority value instead
 603          */
 604
 605         if (priority >= thread->max_priority)
 606                 priority = thread->max_priority - thread->task_priority;
 607         else if (priority >= MINPRI_KERNEL)
 608                 priority -=  MINPRI_KERNEL;
 609         else if (priority >= MINPRI_RESERVED)
 610                 priority -=  MINPRI_RESERVED;
 611         else
 612                 priority -= BASEPRI_DEFAULT;
 613
 614         priority += thread->task_priority;
 615
 616         if (priority > thread->max_priority)
 617                 priority = thread->max_priority;
 618         else if (priority < MINPRI)
 619                 priority = MINPRI;
 620
 621         thread->importance = priority - thread->task_priority;
 622
 623         thread_set_user_sched_mode_and_recompute_pri(thread, mode);
 624
 625         if (mode != old_mode)
 626                 pend_token->tpt_update_thread_sfi = 1;
 627
 628 unlock:
 629         thread_unlock(thread);
 630         splx(s);
 631
 632         return kr;
 633 }
 634
 635 uint8_t
 636 thread_workq_pri_for_qos(thread_qos_t qos)
 637 {
 638         assert(qos < THREAD_QOS_LAST);
 639         return (uint8_t)thread_qos_policy_params.qos_pri[qos];
 640 }
 641
 642 thread_qos_t
 643 thread_workq_qos_for_pri(int priority)
 644 {
 645         int qos;
 646         if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
 647                 // indicate that workq should map >UI threads to workq's
 648                 // internal notation for above-UI work.
 649                 return THREAD_QOS_UNSPECIFIED;
 650         }
 651         for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
 652                 // map a given priority up to the next nearest qos band.
 653                 if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
 654                         return qos;
 655                 }
 656         }
 657         return THREAD_QOS_MAINTENANCE;
 658 }
 659
 660 /*
 661  * private interface for pthread workqueues
 662  *
 663  * Set scheduling policy & absolute priority for thread
 664  * May be called with spinlocks held
 665  * Thread mutex lock is not held
 666  */
 667 void
 668 thread_reset_workq_qos(thread_t thread, uint32_t qos)
 669 {
 670         struct task_pend_token pend_token = {};
 671
 672         assert(qos < THREAD_QOS_LAST);
 673
 674         spl_t s = splsched();
 675         thread_lock(thread);
 676
 677         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 678                         TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
 679         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 680                         TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
 681                         &pend_token);
 682
 683         assert(pend_token.tpt_update_sockets == 0);
 684
 685         thread_unlock(thread);
 686         splx(s);
 687
 688         thread_policy_update_complete_unlocked(thread, &pend_token);
 689 }
 690
 691 /*
 692  * private interface for pthread workqueues
 693  *
 694  * Set scheduling policy & absolute priority for thread
 695  * May be called with spinlocks held
 696  * Thread mutex lock is held
 697  */
 698 void
 699 thread_set_workq_override(thread_t thread, uint32_t qos)
 700 {
 701         struct task_pend_token pend_token = {};
 702
 703         assert(qos < THREAD_QOS_LAST);
 704
 705         spl_t s = splsched();
 706         thread_lock(thread);
 707
 708         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 709                         TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
 710
 711         assert(pend_token.tpt_update_sockets == 0);
 712
 713         thread_unlock(thread);
 714         splx(s);
 715
 716         thread_policy_update_complete_unlocked(thread, &pend_token);
 717 }
 718
 719 /*
 720  * private interface for pthread workqueues
 721  *
 722  * Set scheduling policy & absolute priority for thread
 723  * May be called with spinlocks held
 724  * Thread mutex lock is not held
 725  */
 726 void
 727 thread_set_workq_pri(thread_t  thread,
 728                      thread_qos_t qos,
 729                      integer_t priority,
 730                      integer_t policy)
 731 {
 732         struct task_pend_token pend_token = {};
 733         sched_mode_t mode = convert_policy_to_sched_mode(policy);
 734
 735         assert(qos < THREAD_QOS_LAST);
 736         assert(thread->static_param);
 737
 738         if (!thread->static_param || !thread->active)
 739                 return;
 740
 741         spl_t s = splsched();
 742         thread_lock(thread);
 743
 744         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 745                         TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
 746         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 747                         TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
 748                         0, &pend_token);
 749
 750         thread_unlock(thread);
 751         splx(s);
 752
 753         /* Concern: this doesn't hold the mutex... */
 754
 755         __assert_only kern_return_t kr;
 756         kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
 757                         &pend_token);
 758         assert(kr == KERN_SUCCESS);
 759
 760         if (pend_token.tpt_update_thread_sfi)
 761                 sfi_reevaluate(thread);
 762 }
 763
 764 /*
 765  * thread_set_mode_and_absolute_pri:
 766  *
 767  * Set scheduling policy & absolute priority for thread, for deprecated
 768  * thread_set_policy and thread_policy interfaces.
 769  *
 770  * Called with nothing locked.
 771  */
 772 kern_return_t
 773 thread_set_mode_and_absolute_pri(thread_t   thread,
 774                                  integer_t  policy,
 775                                  integer_t  priority)
 776 {
 777         kern_return_t kr = KERN_SUCCESS;
 778         struct task_pend_token pend_token = {};
 779
 780         sched_mode_t mode = convert_policy_to_sched_mode(policy);
 781
 782         thread_mtx_lock(thread);
 783
 784         if (!thread->active) {
 785                 kr = KERN_TERMINATED;
 786                 goto unlock;
 787         }
 788
 789         if (thread_is_static_param(thread)) {
 790                 kr = KERN_POLICY_STATIC;
 791                 goto unlock;
 792         }
 793
 794         /* Setting legacy policies on threads kills the current QoS */
 795         if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED)
 796                 thread_remove_qos_policy_locked(thread, &pend_token);
 797
 798         kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
 799
 800 unlock:
 801         thread_mtx_unlock(thread);
 802
 803         thread_policy_update_complete_unlocked(thread, &pend_token);
 804
 805         return (kr);
 806 }
 807
 808 /*
 809  * Set the thread's requested mode and recompute priority
 810  * Called with thread mutex and thread locked
 811  *
 812  * TODO: Mitigate potential problems caused by moving thread to end of runq
 813  * whenever its priority is recomputed
 814  *      Only remove when it actually changes? Attempt to re-insert at appropriate location?
 815  */
 816 static void
 817 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
 818 {
 819         if (thread->policy_reset)
 820                 return;
 821
 822         boolean_t removed = thread_run_queue_remove(thread);
 823
 824         /*
 825          * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
 826          * That way there's zero confusion over which the user wants
 827          * and which the kernel wants.
 828          */
 829         if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK)
 830                 thread->saved_mode = mode;
 831         else
 832                 sched_set_thread_mode(thread, mode);
 833
 834         thread_recompute_priority(thread);
 835
 836         if (removed)
 837                 thread_run_queue_reinsert(thread, SCHED_TAILQ);
 838 }
 839
 840 /* called at splsched with thread lock locked */
 841 static void
 842 thread_update_qos_cpu_time_locked(thread_t thread)
 843 {
 844         task_t task = thread->task;
 845         uint64_t timer_sum, timer_delta;
 846
 847         /*
 848          * This is only as accurate as the distance between
 849          * last context switch (embedded) or last user/kernel boundary transition (desktop)
 850          * because user_timer and system_timer are only updated then.
 851          *
 852          * TODO: Consider running a timer_update operation here to update it first.
 853          *       Maybe doable with interrupts disabled from current thread.
 854          *       If the thread is on a different core, may not be easy to get right.
 855          *
 856          * TODO: There should be a function for this in timer.c
 857          */
 858
 859         timer_sum = timer_grab(&thread->user_timer);
 860         timer_sum += timer_grab(&thread->system_timer);
 861         timer_delta = timer_sum - thread->vtimer_qos_save;
 862
 863         thread->vtimer_qos_save = timer_sum;
 864
 865         uint64_t* task_counter = NULL;
 866
 867         /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
 868         switch (thread->effective_policy.thep_qos) {
 869                 case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default;          break;
 870                 case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance;      break;
 871                 case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background;       break;
 872                 case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility;          break;
 873                 case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy;           break;
 874                 case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;   break;
 875                 case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
 876                 default:
 877                         panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
 878         }
 879
 880         OSAddAtomic64(timer_delta, task_counter);
 881
 882         /* Update the task-level qos stats atomically, because we don't have the task lock. */
 883         switch (thread->requested_policy.thrp_qos) {
 884                 case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default;          break;
 885                 case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance;      break;
 886                 case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background;       break;
 887                 case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility;          break;
 888                 case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy;           break;
 889                 case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;   break;
 890                 case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
 891                 default:
 892                         panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
 893         }
 894
 895         OSAddAtomic64(timer_delta, task_counter);
 896 }
 897
 898 /*
 899  * called with no thread locks held
 900  * may hold task lock
 901  */
 902 void
 903 thread_update_qos_cpu_time(thread_t thread)
 904 {
 905         thread_mtx_lock(thread);
 906
 907         spl_t s = splsched();
 908         thread_lock(thread);
 909
 910         thread_update_qos_cpu_time_locked(thread);
 911
 912         thread_unlock(thread);
 913         splx(s);
 914
 915         thread_mtx_unlock(thread);
 916 }
 917
 918 /*
 919  * Calculate base priority from thread attributes, and set it on the thread
 920  *
 921  * Called with thread_lock and thread mutex held.
 922  */
 923 void
 924 thread_recompute_priority(
 925         thread_t                thread)
 926 {
 927         integer_t               priority;
 928
 929         if (thread->policy_reset)
 930                 return;
 931
 932         if (thread->sched_mode == TH_MODE_REALTIME) {
 933                 sched_set_thread_base_priority(thread, BASEPRI_RTQUEUES);
 934                 return;
 935         } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
 936                 int qos = thread->effective_policy.thep_qos;
 937                 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
 938                 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
 939                 int qos_scaled_relprio;
 940
 941                 assert(qos >= 0 && qos < THREAD_QOS_LAST);
 942                 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
 943
 944                 priority = thread_qos_policy_params.qos_pri[qos];
 945                 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
 946
 947                 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
 948                         /* Bump priority 46 to 47 when in a frontmost app */
 949                         qos_scaled_relprio += 1;
 950                 }
 951
 952                 /* TODO: factor in renice priority here? */
 953
 954                 priority += qos_scaled_relprio;
 955         } else {
 956                 if (thread->importance > MAXPRI)
 957                         priority = MAXPRI;
 958                 else if (thread->importance < -MAXPRI)
 959                         priority = -MAXPRI;
 960                 else
 961                         priority = thread->importance;
 962
 963                 priority += thread->task_priority;
 964         }
 965
 966         priority = MAX(priority, thread->user_promotion_basepri);
 967
 968         /*
 969          * Clamp priority back into the allowed range for this task.
 970          *  The initial priority value could be out of this range due to:
 971          *      Task clamped to BG or Utility (max-pri is 4, or 20)
 972          *      Task is user task (max-pri is 63)
 973          *      Task is kernel task (max-pri is 95)
 974          * Note that thread->importance is user-settable to any integer
 975          * via THREAD_PRECEDENCE_POLICY.
 976          */
 977         if (priority > thread->max_priority)
 978                 priority = thread->max_priority;
 979         else if (priority < MINPRI)
 980                 priority = MINPRI;
 981
 982         if (thread->saved_mode == TH_MODE_REALTIME &&
 983             thread->sched_flags & TH_SFLAG_FAILSAFE)
 984                 priority = DEPRESSPRI;
 985
 986         if (thread->effective_policy.thep_terminated == TRUE) {
 987                 /*
 988                  * We temporarily want to override the expected priority to
 989                  * ensure that the thread exits in a timely manner.
 990                  * Note that this is allowed to exceed thread->max_priority
 991                  * so that the thread is no longer clamped to background
 992                  * during the final exit phase.
 993                  */
 994                 if (priority < thread->task_priority)
 995                         priority = thread->task_priority;
 996                 if (priority < BASEPRI_DEFAULT)
 997                         priority = BASEPRI_DEFAULT;
 998         }
 999
1000 #if CONFIG_EMBEDDED
1001         /* No one can have a base priority less than MAXPRI_THROTTLE */
1002         if (priority < MAXPRI_THROTTLE)
1003                 priority = MAXPRI_THROTTLE;
1004 #endif /* CONFIG_EMBEDDED */
1005
1006         sched_set_thread_base_priority(thread, priority);
1007 }
1008
1009 /* Called with the task lock held, but not the thread mutex or spinlock */
1010 void
1011 thread_policy_update_tasklocked(
1012                                 thread_t           thread,
1013                                 integer_t          priority,
1014                                 integer_t          max_priority,
1015                                 task_pend_token_t  pend_token)
1016 {
1017         thread_mtx_lock(thread);
1018
1019         if (!thread->active || thread->policy_reset) {
1020                 thread_mtx_unlock(thread);
1021                 return;
1022         }
1023
1024         spl_t s = splsched();
1025         thread_lock(thread);
1026
1027         __unused
1028         integer_t old_max_priority = thread->max_priority;
1029
1030         thread->task_priority = priority;
1031         thread->max_priority = max_priority;
1032
1033 #if CONFIG_EMBEDDED
1034         /*
1035          * When backgrounding a thread, iOS has the semantic that
1036          * realtime and fixed priority threads should be demoted
1037          * to timeshare background threads.
1038          *
1039          * On OSX, realtime and fixed priority threads don't lose their mode.
1040          *
1041          * TODO: Do this inside the thread policy update routine in order to avoid double
1042          * remove/reinsert for a runnable thread
1043          */
1044         if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1045                 sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1046         } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1047                 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1048         }
1049 #endif /* CONFIG_EMBEDDED */
1050
1051         thread_policy_update_spinlocked(thread, TRUE, pend_token);
1052
1053         thread_unlock(thread);
1054         splx(s);
1055
1056         thread_mtx_unlock(thread);
1057 }
1058
1059 /*
1060  * Reset thread to default state in preparation for termination
1061  * Called with thread mutex locked
1062  *
1063  * Always called on current thread, so we don't need a run queue remove
1064  */
1065 void
1066 thread_policy_reset(
1067         thread_t                thread)
1068 {
1069         spl_t           s;
1070
1071         assert(thread == current_thread());
1072
1073         s = splsched();
1074         thread_lock(thread);
1075
1076         if (thread->sched_flags & TH_SFLAG_FAILSAFE)
1077                 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1078
1079         if (thread->sched_flags & TH_SFLAG_THROTTLED)
1080                 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1081
1082         /* At this point, the various demotions should be inactive */
1083         assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1084         assert(!(thread->sched_flags & TH_SFLAG_THROTTLED));
1085         assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1086
1087         /* Reset thread back to task-default basepri and mode  */
1088         sched_mode_t newmode = SCHED(initial_thread_sched_mode)(thread->task);
1089
1090         sched_set_thread_mode(thread, newmode);
1091
1092         thread->importance = 0;
1093
1094         /* Prevent further changes to thread base priority or mode */
1095         thread->policy_reset = 1;
1096
1097         sched_set_thread_base_priority(thread, thread->task_priority);
1098
1099         thread_unlock(thread);
1100         splx(s);
1101 }
1102
1103 kern_return_t
1104 thread_policy_get(
1105         thread_t                                thread,
1106         thread_policy_flavor_t  flavor,
1107         thread_policy_t                 policy_info,
1108         mach_msg_type_number_t  *count,
1109         boolean_t                               *get_default)
1110 {
1111         kern_return_t                   result = KERN_SUCCESS;
1112
1113         if (thread == THREAD_NULL)
1114                 return (KERN_INVALID_ARGUMENT);
1115
1116         thread_mtx_lock(thread);
1117         if (!thread->active) {
1118                 thread_mtx_unlock(thread);
1119
1120                 return (KERN_TERMINATED);
1121         }
1122
1123         switch (flavor) {
1124
1125         case THREAD_EXTENDED_POLICY:
1126         {
1127                 boolean_t               timeshare = TRUE;
1128
1129                 if (!(*get_default)) {
1130                         spl_t s = splsched();
1131                         thread_lock(thread);
1132
1133                         if (     (thread->sched_mode != TH_MODE_REALTIME)       &&
1134                                          (thread->saved_mode != TH_MODE_REALTIME)                       ) {
1135                                 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK))
1136                                         timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1137                                 else
1138                                         timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1139                         }
1140                         else
1141                                 *get_default = TRUE;
1142
1143                         thread_unlock(thread);
1144                         splx(s);
1145                 }
1146
1147                 if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1148                         thread_extended_policy_t        info;
1149
1150                         info = (thread_extended_policy_t)policy_info;
1151                         info->timeshare = timeshare;
1152                 }
1153
1154                 break;
1155         }
1156
1157         case THREAD_TIME_CONSTRAINT_POLICY:
1158         {
1159                 thread_time_constraint_policy_t         info;
1160
1161                 if (*count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
1162                         result = KERN_INVALID_ARGUMENT;
1163                         break;
1164                 }
1165
1166                 info = (thread_time_constraint_policy_t)policy_info;
1167
1168                 if (!(*get_default)) {
1169                         spl_t s = splsched();
1170                         thread_lock(thread);
1171
1172                         if (    (thread->sched_mode == TH_MODE_REALTIME)        ||
1173                                         (thread->saved_mode == TH_MODE_REALTIME)                ) {
1174                                 info->period = thread->realtime.period;
1175                                 info->computation = thread->realtime.computation;
1176                                 info->constraint = thread->realtime.constraint;
1177                                 info->preemptible = thread->realtime.preemptible;
1178                         }
1179                         else
1180                                 *get_default = TRUE;
1181
1182                         thread_unlock(thread);
1183                         splx(s);
1184                 }
1185
1186                 if (*get_default) {
1187                         info->period = 0;
1188                         info->computation = default_timeshare_computation;
1189                         info->constraint = default_timeshare_constraint;
1190                         info->preemptible = TRUE;
1191                 }
1192
1193                 break;
1194         }
1195
1196         case THREAD_PRECEDENCE_POLICY:
1197         {
1198                 thread_precedence_policy_t              info;
1199
1200                 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1201                         result = KERN_INVALID_ARGUMENT;
1202                         break;
1203                 }
1204
1205                 info = (thread_precedence_policy_t)policy_info;
1206
1207                 if (!(*get_default)) {
1208                         spl_t s = splsched();
1209                         thread_lock(thread);
1210
1211                         info->importance = thread->importance;
1212
1213                         thread_unlock(thread);
1214                         splx(s);
1215                 }
1216                 else
1217                         info->importance = 0;
1218
1219                 break;
1220         }
1221
1222         case THREAD_AFFINITY_POLICY:
1223         {
1224                 thread_affinity_policy_t                info;
1225
1226                 if (!thread_affinity_is_supported()) {
1227                         result = KERN_NOT_SUPPORTED;
1228                         break;
1229                 }
1230                 if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1231                         result = KERN_INVALID_ARGUMENT;
1232                         break;
1233                 }
1234
1235                 info = (thread_affinity_policy_t)policy_info;
1236
1237                 if (!(*get_default))
1238                         info->affinity_tag = thread_affinity_get(thread);
1239                 else
1240                         info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1241
1242                 break;
1243         }
1244
1245         case THREAD_POLICY_STATE:
1246         {
1247                 thread_policy_state_t           info;
1248
1249                 if (*count < THREAD_POLICY_STATE_COUNT) {
1250                         result = KERN_INVALID_ARGUMENT;
1251                         break;
1252                 }
1253
1254                 /* Only root can get this info */
1255                 if (current_task()->sec_token.val[0] != 0) {
1256                         result = KERN_PROTECTION_FAILURE;
1257                         break;
1258                 }
1259
1260                 info = (thread_policy_state_t)(void*)policy_info;
1261
1262                 if (!(*get_default)) {
1263                         info->flags = 0;
1264
1265                         spl_t s = splsched();
1266                         thread_lock(thread);
1267
1268                         info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1269
1270                         info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1271                         info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1272
1273                         info->thps_user_promotions          = 0;
1274                         info->thps_user_promotion_basepri   = thread->user_promotion_basepri;
1275                         info->thps_ipc_overrides            = thread->ipc_overrides;
1276
1277                         proc_get_thread_policy_bitfield(thread, info);
1278
1279                         thread_unlock(thread);
1280                         splx(s);
1281                 } else {
1282                         info->requested = 0;
1283                         info->effective = 0;
1284                         info->pending = 0;
1285                 }
1286
1287                 break;
1288         }
1289
1290         case THREAD_LATENCY_QOS_POLICY:
1291         {
1292                 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1293                 thread_latency_qos_t plqos;
1294
1295                 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1296                         result = KERN_INVALID_ARGUMENT;
1297                         break;
1298                 }
1299
1300                 if (*get_default) {
1301                         plqos = 0;
1302                 } else {
1303                         plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1304                 }
1305
1306                 info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1307         }
1308         break;
1309
1310         case THREAD_THROUGHPUT_QOS_POLICY:
1311         {
1312                 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1313                 thread_throughput_qos_t ptqos;
1314
1315                 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1316                         result = KERN_INVALID_ARGUMENT;
1317                         break;
1318                 }
1319
1320                 if (*get_default) {
1321                         ptqos = 0;
1322                 } else {
1323                         ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1324                 }
1325
1326                 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1327         }
1328         break;
1329
1330         case THREAD_QOS_POLICY:
1331         {
1332                 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1333
1334                 if (*count < THREAD_QOS_POLICY_COUNT) {
1335                         result = KERN_INVALID_ARGUMENT;
1336                         break;
1337                 }
1338
1339                 if (!(*get_default)) {
1340                         int relprio_value = 0;
1341                         info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1342                                                                        TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1343
1344                         info->tier_importance = -relprio_value;
1345                 } else {
1346                         info->qos_tier = THREAD_QOS_UNSPECIFIED;
1347                         info->tier_importance = 0;
1348                 }
1349
1350                 break;
1351         }
1352
1353         default:
1354                 result = KERN_INVALID_ARGUMENT;
1355                 break;
1356         }
1357
1358         thread_mtx_unlock(thread);
1359
1360         return (result);
1361 }
1362
1363 void
1364 thread_policy_create(thread_t thread)
1365 {
1366         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1367                                   (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1368                                   thread_tid(thread), theffective_0(thread),
1369                                   theffective_1(thread), thread->base_pri, 0);
1370
1371         /* We pass a pend token but ignore it */
1372         struct task_pend_token pend_token = {};
1373
1374         thread_policy_update_internal_spinlocked(thread, TRUE, &pend_token);
1375
1376         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1377                                   (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1378                                   thread_tid(thread), theffective_0(thread),
1379                                   theffective_1(thread), thread->base_pri, 0);
1380 }
1381
1382 static void
1383 thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token)
1384 {
1385         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1386                                   (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1387                                   thread_tid(thread), theffective_0(thread),
1388                                   theffective_1(thread), thread->base_pri, 0);
1389
1390         thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1391
1392         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1393                                   (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1394                                   thread_tid(thread), theffective_0(thread),
1395                                   theffective_1(thread), thread->base_pri, 0);
1396 }
1397
1398
1399
1400 /*
1401  * One thread state update function TO RULE THEM ALL
1402  *
1403  * This function updates the thread effective policy fields
1404  * and pushes the results to the relevant subsystems.
1405  *
1406  * Returns TRUE if a pended action needs to be run.
1407  *
1408  * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1409  */
1410 static void
1411 thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority,
1412                                          task_pend_token_t pend_token)
1413 {
1414         /*
1415          * Step 1:
1416          *  Gather requested policy and effective task state
1417          */
1418
1419         struct thread_requested_policy requested = thread->requested_policy;
1420         struct task_effective_policy task_effective = thread->task->effective_policy;
1421
1422         /*
1423          * Step 2:
1424          *  Calculate new effective policies from requested policy, task and thread state
1425          *  Rules:
1426          *      Don't change requested, it won't take effect
1427          */
1428
1429         struct thread_effective_policy next = {};
1430
1431         next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1432
1433         uint32_t next_qos = requested.thrp_qos;
1434
1435         if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1436                 next_qos = MAX(requested.thrp_qos_override, next_qos);
1437                 next_qos = MAX(requested.thrp_qos_promote, next_qos);
1438                 next_qos = MAX(requested.thrp_qos_ipc_override, next_qos);
1439                 next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1440         }
1441
1442         next.thep_qos = next_qos;
1443
1444         /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1445         if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1446                 if (next.thep_qos != THREAD_QOS_UNSPECIFIED)
1447                         next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1448                 else
1449                         next.thep_qos = task_effective.tep_qos_clamp;
1450         }
1451
1452         /*
1453          * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1454          * This allows QoS promotions to work properly even after the process is unclamped.
1455          */
1456         next.thep_qos_promote = next.thep_qos;
1457
1458         /* The ceiling only applies to threads that are in the QoS world */
1459         if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1460             next.thep_qos                  != THREAD_QOS_UNSPECIFIED) {
1461                 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1462         }
1463
1464         /* Apply the sync ipc qos override */
1465         assert(requested.thrp_qos_sync_ipc_override == THREAD_QOS_UNSPECIFIED);
1466
1467         /*
1468          * The QoS relative priority is only applicable when the original programmer's
1469          * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1470          * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1471          * since otherwise it would be lower than unclamped threads. Similarly, in the
1472          * presence of boosting, the programmer doesn't know what other actors
1473          * are boosting the thread.
1474          */
1475         if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1476             (requested.thrp_qos == next.thep_qos) &&
1477             (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1478                 next.thep_qos_relprio = requested.thrp_qos_relprio;
1479         } else {
1480                 next.thep_qos_relprio = 0;
1481         }
1482
1483         /* Calculate DARWIN_BG */
1484         boolean_t wants_darwinbg        = FALSE;
1485         boolean_t wants_all_sockets_bg  = FALSE; /* Do I want my existing sockets to be bg */
1486
1487         /*
1488          * If DARWIN_BG has been requested at either level, it's engaged.
1489          * darwinbg threads always create bg sockets,
1490          * but only some types of darwinbg change the sockets
1491          * after they're created
1492          */
1493         if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg)
1494                 wants_all_sockets_bg = wants_darwinbg = TRUE;
1495
1496         if (requested.thrp_pidbind_bg)
1497                 wants_all_sockets_bg = wants_darwinbg = TRUE;
1498
1499         if (task_effective.tep_darwinbg)
1500                 wants_darwinbg = TRUE;
1501
1502         if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1503             next.thep_qos == THREAD_QOS_MAINTENANCE)
1504                 wants_darwinbg = TRUE;
1505
1506         /* Calculate side effects of DARWIN_BG */
1507
1508         if (wants_darwinbg)
1509                 next.thep_darwinbg = 1;
1510
1511         if (next.thep_darwinbg || task_effective.tep_new_sockets_bg)
1512                 next.thep_new_sockets_bg = 1;
1513
1514         /* Don't use task_effective.tep_all_sockets_bg here */
1515         if (wants_all_sockets_bg)
1516                 next.thep_all_sockets_bg = 1;
1517
1518         /* darwinbg implies background QOS (or lower) */
1519         if (next.thep_darwinbg &&
1520             (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1521                 next.thep_qos = THREAD_QOS_BACKGROUND;
1522                 next.thep_qos_relprio = 0;
1523         }
1524
1525         /* Calculate IO policy */
1526
1527         int iopol = THROTTLE_LEVEL_TIER0;
1528
1529         /* Factor in the task's IO policy */
1530         if (next.thep_darwinbg)
1531                 iopol = MAX(iopol, task_effective.tep_bg_iotier);
1532
1533         iopol = MAX(iopol, task_effective.tep_io_tier);
1534
1535         /* Look up the associated IO tier value for the QoS class */
1536         iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1537
1538         iopol = MAX(iopol, requested.thrp_int_iotier);
1539         iopol = MAX(iopol, requested.thrp_ext_iotier);
1540
1541         next.thep_io_tier = iopol;
1542
1543         /*
1544          * If a QoS override is causing IO to go into a lower tier, we also set
1545          * the passive bit so that a thread doesn't end up stuck in its own throttle
1546          * window when the override goes away.
1547          */
1548         boolean_t qos_io_override_active = FALSE;
1549         if (thread_qos_policy_params.qos_iotier[next.thep_qos] <
1550             thread_qos_policy_params.qos_iotier[requested.thrp_qos])
1551                 qos_io_override_active = TRUE;
1552
1553         /* Calculate Passive IO policy */
1554         if (requested.thrp_ext_iopassive    ||
1555             requested.thrp_int_iopassive    ||
1556             qos_io_override_active          ||
1557             task_effective.tep_io_passive   )
1558                 next.thep_io_passive = 1;
1559
1560         /* Calculate timer QOS */
1561         uint32_t latency_qos = requested.thrp_latency_qos;
1562
1563         latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1564         latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1565
1566         next.thep_latency_qos = latency_qos;
1567
1568         /* Calculate throughput QOS */
1569         uint32_t through_qos = requested.thrp_through_qos;
1570
1571         through_qos = MAX(through_qos, task_effective.tep_through_qos);
1572         through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1573
1574         next.thep_through_qos = through_qos;
1575
1576         if (task_effective.tep_terminated || requested.thrp_terminated) {
1577                 /* Shoot down the throttles that slow down exit or response to SIGTERM */
1578                 next.thep_terminated    = 1;
1579                 next.thep_darwinbg      = 0;
1580                 next.thep_io_tier       = THROTTLE_LEVEL_TIER0;
1581                 next.thep_qos           = THREAD_QOS_UNSPECIFIED;
1582                 next.thep_latency_qos   = LATENCY_QOS_TIER_UNSPECIFIED;
1583                 next.thep_through_qos   = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1584         }
1585
1586         /*
1587          * Step 3:
1588          *  Swap out old policy for new policy
1589          */
1590
1591         struct thread_effective_policy prev = thread->effective_policy;
1592
1593         thread_update_qos_cpu_time_locked(thread);
1594
1595         /* This is the point where the new values become visible to other threads */
1596         thread->effective_policy = next;
1597
1598         /*
1599          * Step 4:
1600          *  Pend updates that can't be done while holding the thread lock
1601          */
1602
1603         if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg)
1604                 pend_token->tpt_update_sockets = 1;
1605
1606         /* TODO: Doesn't this only need to be done if the throttle went up? */
1607         if (prev.thep_io_tier != next.thep_io_tier)
1608                 pend_token->tpt_update_throttle = 1;
1609
1610         /*
1611          * Check for the attributes that sfi_thread_classify() consults,
1612          *  and trigger SFI re-evaluation.
1613          */
1614         if (prev.thep_qos      != next.thep_qos         ||
1615             prev.thep_darwinbg != next.thep_darwinbg    )
1616                 pend_token->tpt_update_thread_sfi = 1;
1617
1618         /*
1619          * Step 5:
1620          *  Update other subsystems as necessary if something has changed
1621          */
1622
1623         /* Check for the attributes that thread_recompute_priority() consults */
1624         if (prev.thep_qos               != next.thep_qos                ||
1625             prev.thep_qos_relprio       != next.thep_qos_relprio        ||
1626             prev.thep_qos_ui_is_urgent  != next.thep_qos_ui_is_urgent   ||
1627             prev.thep_terminated        != next.thep_terminated         ||
1628             pend_token->tpt_force_recompute_pri == 1                    ||
1629             recompute_priority) {
1630                 thread_recompute_priority(thread);
1631         }
1632 }
1633
1634
1635 /*
1636  * Initiate a thread policy state transition on a thread with its TID
1637  * Useful if you cannot guarantee the thread won't get terminated
1638  * Precondition: No locks are held
1639  * Will take task lock - using the non-tid variant is faster
1640  * if you already have a thread ref.
1641  */
1642 void
1643 proc_set_thread_policy_with_tid(task_t     task,
1644                                 uint64_t   tid,
1645                                 int        category,
1646                                 int        flavor,
1647                                 int        value)
1648 {
1649         /* takes task lock, returns ref'ed thread or NULL */
1650         thread_t thread = task_findtid(task, tid);
1651
1652         if (thread == THREAD_NULL)
1653                 return;
1654
1655         proc_set_thread_policy(thread, category, flavor, value);
1656
1657         thread_deallocate(thread);
1658 }
1659
1660 /*
1661  * Initiate a thread policy transition on a thread
1662  * This path supports networking transitions (i.e. darwinbg transitions)
1663  * Precondition: No locks are held
1664  */
1665 void
1666 proc_set_thread_policy(thread_t   thread,
1667                        int        category,
1668                        int        flavor,
1669                        int        value)
1670 {
1671         struct task_pend_token pend_token = {};
1672
1673         thread_mtx_lock(thread);
1674
1675         proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
1676
1677         thread_mtx_unlock(thread);
1678
1679         thread_policy_update_complete_unlocked(thread, &pend_token);
1680 }
1681
1682 /*
1683  * Do the things that can't be done while holding a thread mutex.
1684  * These are set up to call back into thread policy to get the latest value,
1685  * so they don't have to be synchronized with the update.
1686  * The only required semantic is 'call this sometime after updating effective policy'
1687  *
1688  * Precondition: Thread mutex is not held
1689  *
1690  * This may be called with the task lock held, but in that case it won't be
1691  * called with tpt_update_sockets set.
1692  */
1693 void
1694 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1695 {
1696 #ifdef MACH_BSD
1697         if (pend_token->tpt_update_sockets)
1698                 proc_apply_task_networkbg(thread->task->bsd_info, thread);
1699 #endif /* MACH_BSD */
1700
1701         if (pend_token->tpt_update_throttle)
1702                 rethrottle_thread(thread->uthread);
1703
1704         if (pend_token->tpt_update_thread_sfi)
1705                 sfi_reevaluate(thread);
1706 }
1707
1708 /*
1709  * Set and update thread policy
1710  * Thread mutex might be held
1711  */
1712 static void
1713 proc_set_thread_policy_locked(thread_t          thread,
1714                               int               category,
1715                               int               flavor,
1716                               int               value,
1717                               int               value2,
1718                               task_pend_token_t pend_token)
1719 {
1720         spl_t s = splsched();
1721         thread_lock(thread);
1722
1723         proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1724
1725         thread_unlock(thread);
1726         splx(s);
1727 }
1728
1729 /*
1730  * Set and update thread policy
1731  * Thread spinlock is held
1732  */
1733 static void
1734 proc_set_thread_policy_spinlocked(thread_t          thread,
1735                                   int               category,
1736                                   int               flavor,
1737                                   int               value,
1738                                   int               value2,
1739                                   task_pend_token_t pend_token)
1740 {
1741         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1742                                   (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1743                                   thread_tid(thread), threquested_0(thread),
1744                                   threquested_1(thread), value, 0);
1745
1746         thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2);
1747
1748         thread_policy_update_spinlocked(thread, FALSE, pend_token);
1749
1750         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1751                                   (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1752                                   thread_tid(thread), threquested_0(thread),
1753                                   threquested_1(thread), tpending(pend_token), 0);
1754 }
1755
1756 /*
1757  * Set the requested state for a specific flavor to a specific value.
1758  */
1759 static void
1760 thread_set_requested_policy_spinlocked(thread_t     thread,
1761                                        int          category,
1762                                        int          flavor,
1763                                        int          value,
1764                                        int          value2)
1765 {
1766         int tier, passive;
1767
1768         struct thread_requested_policy requested = thread->requested_policy;
1769
1770         switch (flavor) {
1771
1772         /* Category: EXTERNAL and INTERNAL, thread and task */
1773
1774                 case TASK_POLICY_DARWIN_BG:
1775                         if (category == TASK_POLICY_EXTERNAL)
1776                                 requested.thrp_ext_darwinbg = value;
1777                         else
1778                                 requested.thrp_int_darwinbg = value;
1779                         break;
1780
1781                 case TASK_POLICY_IOPOL:
1782                         proc_iopol_to_tier(value, &tier, &passive);
1783                         if (category == TASK_POLICY_EXTERNAL) {
1784                                 requested.thrp_ext_iotier  = tier;
1785                                 requested.thrp_ext_iopassive = passive;
1786                         } else {
1787                                 requested.thrp_int_iotier  = tier;
1788                                 requested.thrp_int_iopassive = passive;
1789                         }
1790                         break;
1791
1792                 case TASK_POLICY_IO:
1793                         if (category == TASK_POLICY_EXTERNAL)
1794                                 requested.thrp_ext_iotier = value;
1795                         else
1796                                 requested.thrp_int_iotier = value;
1797                         break;
1798
1799                 case TASK_POLICY_PASSIVE_IO:
1800                         if (category == TASK_POLICY_EXTERNAL)
1801                                 requested.thrp_ext_iopassive = value;
1802                         else
1803                                 requested.thrp_int_iopassive = value;
1804                         break;
1805
1806         /* Category: ATTRIBUTE, thread only */
1807
1808                 case TASK_POLICY_PIDBIND_BG:
1809                         assert(category == TASK_POLICY_ATTRIBUTE);
1810                         requested.thrp_pidbind_bg = value;
1811                         break;
1812
1813                 case TASK_POLICY_LATENCY_QOS:
1814                         assert(category == TASK_POLICY_ATTRIBUTE);
1815                         requested.thrp_latency_qos = value;
1816                         break;
1817
1818                 case TASK_POLICY_THROUGH_QOS:
1819                         assert(category == TASK_POLICY_ATTRIBUTE);
1820                         requested.thrp_through_qos = value;
1821                         break;
1822
1823                 case TASK_POLICY_QOS:
1824                         assert(category == TASK_POLICY_ATTRIBUTE);
1825                         requested.thrp_qos = value;
1826                         break;
1827
1828                 case TASK_POLICY_QOS_OVERRIDE:
1829                         assert(category == TASK_POLICY_ATTRIBUTE);
1830                         requested.thrp_qos_override = value;
1831                         break;
1832
1833                 case TASK_POLICY_QOS_AND_RELPRIO:
1834                         assert(category == TASK_POLICY_ATTRIBUTE);
1835                         requested.thrp_qos = value;
1836                         requested.thrp_qos_relprio = value2;
1837                         DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
1838                         break;
1839
1840                 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
1841                         assert(category == TASK_POLICY_ATTRIBUTE);
1842                         requested.thrp_qos_workq_override = value;
1843                         break;
1844
1845                 case TASK_POLICY_QOS_PROMOTE:
1846                         assert(category == TASK_POLICY_ATTRIBUTE);
1847                         requested.thrp_qos_promote = value;
1848                         break;
1849
1850                 case TASK_POLICY_QOS_IPC_OVERRIDE:
1851                         assert(category == TASK_POLICY_ATTRIBUTE);
1852                         requested.thrp_qos_ipc_override = value;
1853                         break;
1854
1855                 case TASK_POLICY_TERMINATED:
1856                         assert(category == TASK_POLICY_ATTRIBUTE);
1857                         requested.thrp_terminated = value;
1858                         break;
1859
1860                 default:
1861                         panic("unknown task policy: %d %d %d", category, flavor, value);
1862                         break;
1863         }
1864
1865         thread->requested_policy = requested;
1866 }
1867
1868 /*
1869  * Gets what you set. Effective values may be different.
1870  * Precondition: No locks are held
1871  */
1872 int
1873 proc_get_thread_policy(thread_t   thread,
1874                        int        category,
1875                        int        flavor)
1876 {
1877         int value = 0;
1878         thread_mtx_lock(thread);
1879         value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
1880         thread_mtx_unlock(thread);
1881         return value;
1882 }
1883
1884 static int
1885 proc_get_thread_policy_locked(thread_t   thread,
1886                               int        category,
1887                               int        flavor,
1888                               int*       value2)
1889 {
1890         int value = 0;
1891
1892         spl_t s = splsched();
1893         thread_lock(thread);
1894
1895         value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
1896
1897         thread_unlock(thread);
1898         splx(s);
1899
1900         return value;
1901 }
1902
1903 /*
1904  * Gets what you set. Effective values may be different.
1905  */
1906 static int
1907 thread_get_requested_policy_spinlocked(thread_t thread,
1908                                        int      category,
1909                                        int      flavor,
1910                                        int*     value2)
1911 {
1912         int value = 0;
1913
1914         struct thread_requested_policy requested = thread->requested_policy;
1915
1916         switch (flavor) {
1917                 case TASK_POLICY_DARWIN_BG:
1918                         if (category == TASK_POLICY_EXTERNAL)
1919                                 value = requested.thrp_ext_darwinbg;
1920                         else
1921                                 value = requested.thrp_int_darwinbg;
1922                         break;
1923                 case TASK_POLICY_IOPOL:
1924                         if (category == TASK_POLICY_EXTERNAL)
1925                                 value = proc_tier_to_iopol(requested.thrp_ext_iotier,
1926                                                            requested.thrp_ext_iopassive);
1927                         else
1928                                 value = proc_tier_to_iopol(requested.thrp_int_iotier,
1929                                                            requested.thrp_int_iopassive);
1930                         break;
1931                 case TASK_POLICY_IO:
1932                         if (category == TASK_POLICY_EXTERNAL)
1933                                 value = requested.thrp_ext_iotier;
1934                         else
1935                                 value = requested.thrp_int_iotier;
1936                         break;
1937                 case TASK_POLICY_PASSIVE_IO:
1938                         if (category == TASK_POLICY_EXTERNAL)
1939                                 value = requested.thrp_ext_iopassive;
1940                         else
1941                                 value = requested.thrp_int_iopassive;
1942                         break;
1943                 case TASK_POLICY_QOS:
1944                         assert(category == TASK_POLICY_ATTRIBUTE);
1945                         value = requested.thrp_qos;
1946                         break;
1947                 case TASK_POLICY_QOS_OVERRIDE:
1948                         assert(category == TASK_POLICY_ATTRIBUTE);
1949                         value = requested.thrp_qos_override;
1950                         break;
1951                 case TASK_POLICY_LATENCY_QOS:
1952                         assert(category == TASK_POLICY_ATTRIBUTE);
1953                         value = requested.thrp_latency_qos;
1954                         break;
1955                 case TASK_POLICY_THROUGH_QOS:
1956                         assert(category == TASK_POLICY_ATTRIBUTE);
1957                         value = requested.thrp_through_qos;
1958                         break;
1959                 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
1960                         assert(category == TASK_POLICY_ATTRIBUTE);
1961                         value = requested.thrp_qos_workq_override;
1962                         break;
1963                 case TASK_POLICY_QOS_AND_RELPRIO:
1964                         assert(category == TASK_POLICY_ATTRIBUTE);
1965                         assert(value2 != NULL);
1966                         value = requested.thrp_qos;
1967                         *value2 = requested.thrp_qos_relprio;
1968                         break;
1969                 case TASK_POLICY_QOS_PROMOTE:
1970                         assert(category == TASK_POLICY_ATTRIBUTE);
1971                         value = requested.thrp_qos_promote;
1972                         break;
1973                 case TASK_POLICY_QOS_IPC_OVERRIDE:
1974                         assert(category == TASK_POLICY_ATTRIBUTE);
1975                         value = requested.thrp_qos_ipc_override;
1976                         break;
1977                 case TASK_POLICY_TERMINATED:
1978                         assert(category == TASK_POLICY_ATTRIBUTE);
1979                         value = requested.thrp_terminated;
1980                         break;
1981
1982                 default:
1983                         panic("unknown policy_flavor %d", flavor);
1984                         break;
1985         }
1986
1987         return value;
1988 }
1989
1990 /*
1991  * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
1992  *
1993  * NOTE: This accessor does not take the task or thread lock.
1994  * Notifications of state updates need to be externally synchronized with state queries.
1995  * This routine *MUST* remain interrupt safe, as it is potentially invoked
1996  * within the context of a timer interrupt.
1997  *
1998  * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
1999  *      Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2000  *      I don't think that cost is worth not having the right answer.
2001  */
2002 int
2003 proc_get_effective_thread_policy(thread_t thread,
2004                                  int      flavor)
2005 {
2006         int value = 0;
2007
2008         switch (flavor) {
2009                 case TASK_POLICY_DARWIN_BG:
2010                         /*
2011                          * This call is used within the timer layer, as well as
2012                          * prioritizing requests to the graphics system.
2013                          * It also informs SFI and originator-bg-state.
2014                          * Returns 1 for background mode, 0 for normal mode
2015                          */
2016
2017                         value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2018                         break;
2019                 case TASK_POLICY_IO:
2020                         /*
2021                          * The I/O system calls here to find out what throttling tier to apply to an operation.
2022                          * Returns THROTTLE_LEVEL_* values
2023                          */
2024                         value = thread->effective_policy.thep_io_tier;
2025                         if (thread->iotier_override != THROTTLE_LEVEL_NONE)
2026                                 value = MIN(value, thread->iotier_override);
2027                         break;
2028                 case TASK_POLICY_PASSIVE_IO:
2029                         /*
2030                          * The I/O system calls here to find out whether an operation should be passive.
2031                          * (i.e. not cause operations with lower throttle tiers to be throttled)
2032                          * Returns 1 for passive mode, 0 for normal mode
2033                          *
2034                          * If an override is causing IO to go into a lower tier, we also set
2035                          * the passive bit so that a thread doesn't end up stuck in its own throttle
2036                          * window when the override goes away.
2037                          */
2038                         value = thread->effective_policy.thep_io_passive ? 1 : 0;
2039                         if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2040                             thread->iotier_override < thread->effective_policy.thep_io_tier)
2041                                 value = 1;
2042                         break;
2043                 case TASK_POLICY_ALL_SOCKETS_BG:
2044                         /*
2045                          * do_background_socket() calls this to determine whether
2046                          * it should change the thread's sockets
2047                          * Returns 1 for background mode, 0 for normal mode
2048                          * This consults both thread and task so un-DBGing a thread while the task is BG
2049                          * doesn't get you out of the network throttle.
2050                          */
2051                         value = (thread->effective_policy.thep_all_sockets_bg ||
2052                                  thread->task->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2053                         break;
2054                 case TASK_POLICY_NEW_SOCKETS_BG:
2055                         /*
2056                          * socreate() calls this to determine if it should mark a new socket as background
2057                          * Returns 1 for background mode, 0 for normal mode
2058                          */
2059                         value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2060                         break;
2061                 case TASK_POLICY_LATENCY_QOS:
2062                         /*
2063                          * timer arming calls into here to find out the timer coalescing level
2064                          * Returns a latency QoS tier (0-6)
2065                          */
2066                         value = thread->effective_policy.thep_latency_qos;
2067                         break;
2068                 case TASK_POLICY_THROUGH_QOS:
2069                         /*
2070                          * This value is passed into the urgency callout from the scheduler
2071                          * to the performance management subsystem.
2072                          *
2073                          * Returns a throughput QoS tier (0-6)
2074                          */
2075                         value = thread->effective_policy.thep_through_qos;
2076                         break;
2077                 case TASK_POLICY_QOS:
2078                         /*
2079                          * This is communicated to the performance management layer and SFI.
2080                          *
2081                          * Returns a QoS policy tier
2082                          */
2083                         value = thread->effective_policy.thep_qos;
2084                         break;
2085                 default:
2086                         panic("unknown thread policy flavor %d", flavor);
2087                         break;
2088         }
2089
2090         return value;
2091 }
2092
2093
2094 /*
2095  * (integer_t) casts limit the number of bits we can fit here
2096  * this interface is deprecated and replaced by the _EXT struct ?
2097  */
2098 static void
2099 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2100 {
2101         uint64_t bits = 0;
2102         struct thread_requested_policy requested = thread->requested_policy;
2103
2104         bits |= (requested.thrp_int_darwinbg    ? POLICY_REQ_INT_DARWIN_BG  : 0);
2105         bits |= (requested.thrp_ext_darwinbg    ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2106         bits |= (requested.thrp_int_iotier      ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2107         bits |= (requested.thrp_ext_iotier      ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2108         bits |= (requested.thrp_int_iopassive   ? POLICY_REQ_INT_PASSIVE_IO : 0);
2109         bits |= (requested.thrp_ext_iopassive   ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2110
2111         bits |= (requested.thrp_qos             ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2112         bits |= (requested.thrp_qos_override    ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT)   : 0);
2113
2114         bits |= (requested.thrp_pidbind_bg      ? POLICY_REQ_PIDBIND_BG     : 0);
2115
2116         bits |= (requested.thrp_latency_qos     ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2117         bits |= (requested.thrp_through_qos     ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2118
2119         info->requested = (integer_t) bits;
2120         bits = 0;
2121
2122         struct thread_effective_policy effective = thread->effective_policy;
2123
2124         bits |= (effective.thep_darwinbg        ? POLICY_EFF_DARWIN_BG      : 0);
2125
2126         bits |= (effective.thep_io_tier         ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2127         bits |= (effective.thep_io_passive      ? POLICY_EFF_IO_PASSIVE     : 0);
2128         bits |= (effective.thep_all_sockets_bg  ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2129         bits |= (effective.thep_new_sockets_bg  ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2130
2131         bits |= (effective.thep_qos             ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2132
2133         bits |= (effective.thep_latency_qos     ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2134         bits |= (effective.thep_through_qos     ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2135
2136         info->effective = (integer_t)bits;
2137         bits = 0;
2138
2139         info->pending = 0;
2140 }
2141
2142 /*
2143  * Sneakily trace either the task and thread requested
2144  * or just the thread requested, depending on if we have enough room.
2145  * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2146  *
2147  *                                LP32            LP64
2148  * threquested_0(thread)          thread[0]       task[0]
2149  * threquested_1(thread)          thread[1]       thread[0]
2150  *
2151  */
2152
2153 uintptr_t
2154 threquested_0(thread_t thread)
2155 {
2156         static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2157
2158         uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2159
2160         return raw[0];
2161 }
2162
2163 uintptr_t
2164 threquested_1(thread_t thread)
2165 {
2166 #if defined __LP64__
2167         return *(uintptr_t*)&thread->task->requested_policy;
2168 #else
2169         uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2170         return raw[1];
2171 #endif
2172 }
2173
2174 uintptr_t
2175 theffective_0(thread_t thread)
2176 {
2177         static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2178
2179         uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2180         return raw[0];
2181 }
2182
2183 uintptr_t
2184 theffective_1(thread_t thread)
2185 {
2186 #if defined __LP64__
2187         return *(uintptr_t*)&thread->task->effective_policy;
2188 #else
2189         uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2190         return raw[1];
2191 #endif
2192 }
2193
2194
2195 /*
2196  * Set an override on the thread which is consulted with a
2197  * higher priority than the task/thread policy. This should
2198  * only be set for temporary grants until the thread
2199  * returns to the userspace boundary
2200  *
2201  * We use atomic operations to swap in the override, with
2202  * the assumption that the thread itself can
2203  * read the override and clear it on return to userspace.
2204  *
2205  * No locking is performed, since it is acceptable to see
2206  * a stale override for one loop through throttle_lowpri_io().
2207  * However a thread reference must be held on the thread.
2208  */
2209
2210 void set_thread_iotier_override(thread_t thread, int policy)
2211 {
2212         int current_override;
2213
2214         /* Let most aggressive I/O policy win until user boundary */
2215         do {
2216                 current_override = thread->iotier_override;
2217
2218                 if (current_override != THROTTLE_LEVEL_NONE)
2219                         policy = MIN(current_override, policy);
2220
2221                 if (current_override == policy) {
2222                         /* no effective change */
2223                         return;
2224                 }
2225         } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2226
2227         /*
2228          * Since the thread may be currently throttled,
2229          * re-evaluate tiers and potentially break out
2230          * of an msleep
2231          */
2232         rethrottle_thread(thread->uthread);
2233 }
2234
2235 /*
2236  * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2237  * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2238  * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2239  * priority thread. In these cases, we attempt to propagate the priority token, as long
2240  * as the subsystem informs us of the relationships between the threads. The userspace
2241  * synchronization subsystem should maintain the information of owner->resource and
2242  * resource->waiters itself.
2243  */
2244
2245 /*
2246  * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2247  * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2248  * to be handled specially in the future, but for now it's fine to slam
2249  * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2250  */
2251 static void canonicalize_resource_and_type(user_addr_t *resource, int *resource_type) {
2252         if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2253                 /* Map all input resource/type to a single one */
2254                 *resource = USER_ADDR_NULL;
2255                 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2256         } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2257                 /* no transform */
2258         } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2259                 /* Map all mutex overrides to a single one, to avoid memory overhead */
2260                 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2261                         *resource = USER_ADDR_NULL;
2262                 }
2263         }
2264 }
2265
2266 /* This helper routine finds an existing override if known. Locking should be done by caller */
2267 static struct thread_qos_override *
2268 find_qos_override(thread_t thread,
2269                   user_addr_t resource,
2270                   int resource_type)
2271 {
2272         struct thread_qos_override *override;
2273
2274         override = thread->overrides;
2275         while (override) {
2276                 if (override->override_resource == resource &&
2277                     override->override_resource_type == resource_type) {
2278                         return override;
2279                 }
2280
2281                 override = override->override_next;
2282         }
2283
2284         return NULL;
2285 }
2286
2287 static void
2288 find_and_decrement_qos_override(thread_t       thread,
2289                                 user_addr_t    resource,
2290                                 int            resource_type,
2291                                 boolean_t      reset,
2292                                 struct thread_qos_override **free_override_list)
2293 {
2294         struct thread_qos_override *override, *override_prev;
2295
2296         override_prev = NULL;
2297         override = thread->overrides;
2298         while (override) {
2299                 struct thread_qos_override *override_next = override->override_next;
2300
2301                 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource  || override->override_resource == resource) &&
2302                     (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2303
2304                         if (reset) {
2305                                 override->override_contended_resource_count = 0;
2306                         } else {
2307                                 override->override_contended_resource_count--;
2308                         }
2309
2310                         if (override->override_contended_resource_count == 0) {
2311                                 if (override_prev == NULL) {
2312                                         thread->overrides = override_next;
2313                                 } else {
2314                                         override_prev->override_next = override_next;
2315                                 }
2316
2317                                 /* Add to out-param for later zfree */
2318                                 override->override_next = *free_override_list;
2319                                 *free_override_list = override;
2320                         } else {
2321                                 override_prev = override;
2322                         }
2323
2324                         if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2325                                 return;
2326                         }
2327                 } else {
2328                         override_prev = override;
2329                 }
2330
2331                 override = override_next;
2332         }
2333 }
2334
2335 /* This helper recalculates the current requested override using the policy selected at boot */
2336 static int
2337 calculate_requested_qos_override(thread_t thread)
2338 {
2339         if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2340                 return THREAD_QOS_UNSPECIFIED;
2341         }
2342
2343         /* iterate over all overrides and calculate MAX */
2344         struct thread_qos_override *override;
2345         int qos_override = THREAD_QOS_UNSPECIFIED;
2346
2347         override = thread->overrides;
2348         while (override) {
2349                 qos_override = MAX(qos_override, override->override_qos);
2350                 override = override->override_next;
2351         }
2352
2353         return qos_override;
2354 }
2355
2356 /*
2357  * Returns:
2358  * - 0 on success
2359  * - EINVAL if some invalid input was passed
2360  */
2361 static int
2362 proc_thread_qos_add_override_internal(thread_t         thread,
2363                                       int              override_qos,
2364                                       boolean_t        first_override_for_resource,
2365                                       user_addr_t      resource,
2366                                       int              resource_type)
2367 {
2368         struct task_pend_token pend_token = {};
2369         int rc = 0;
2370
2371         thread_mtx_lock(thread);
2372
2373         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2374                                                   thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2375
2376         DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2377                         uint64_t, thread->requested_policy.thrp_qos,
2378                         uint64_t, thread->effective_policy.thep_qos,
2379                         int, override_qos, boolean_t, first_override_for_resource);
2380
2381         struct thread_qos_override *override;
2382         struct thread_qos_override *override_new = NULL;
2383         int new_qos_override, prev_qos_override;
2384         int new_effective_qos;
2385
2386         canonicalize_resource_and_type(&resource, &resource_type);
2387
2388         override = find_qos_override(thread, resource, resource_type);
2389         if (first_override_for_resource && !override) {
2390                 /* We need to allocate a new object. Drop the thread lock and
2391                  * recheck afterwards in case someone else added the override
2392                  */
2393                 thread_mtx_unlock(thread);
2394                 override_new = zalloc(thread_qos_override_zone);
2395                 thread_mtx_lock(thread);
2396                 override = find_qos_override(thread, resource, resource_type);
2397         }
2398         if (first_override_for_resource && override) {
2399                 /* Someone else already allocated while the thread lock was dropped */
2400                 override->override_contended_resource_count++;
2401         } else if (!override && override_new) {
2402                 override = override_new;
2403                 override_new = NULL;
2404                 override->override_next = thread->overrides;
2405                 /* since first_override_for_resource was TRUE */
2406                 override->override_contended_resource_count = 1;
2407                 override->override_resource = resource;
2408                 override->override_resource_type = resource_type;
2409                 override->override_qos = THREAD_QOS_UNSPECIFIED;
2410                 thread->overrides = override;
2411         }
2412
2413         if (override) {
2414                 if (override->override_qos == THREAD_QOS_UNSPECIFIED)
2415                         override->override_qos = override_qos;
2416                 else
2417                         override->override_qos = MAX(override->override_qos, override_qos);
2418         }
2419
2420         /* Determine how to combine the various overrides into a single current
2421          * requested override
2422          */
2423         new_qos_override = calculate_requested_qos_override(thread);
2424
2425         prev_qos_override = proc_get_thread_policy_locked(thread,
2426                         TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2427
2428         if (new_qos_override != prev_qos_override) {
2429                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2430                                               TASK_POLICY_QOS_OVERRIDE,
2431                                               new_qos_override, 0, &pend_token);
2432         }
2433
2434         new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2435
2436         thread_mtx_unlock(thread);
2437
2438         thread_policy_update_complete_unlocked(thread, &pend_token);
2439
2440         if (override_new) {
2441                 zfree(thread_qos_override_zone, override_new);
2442         }
2443
2444         DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2445                       int, new_qos_override, int, new_effective_qos, int, rc);
2446
2447         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2448                                                   new_qos_override, resource, resource_type, 0, 0);
2449
2450         return rc;
2451 }
2452
2453 int
2454 proc_thread_qos_add_override(task_t           task,
2455                              thread_t         thread,
2456                              uint64_t         tid,
2457                              int              override_qos,
2458                              boolean_t        first_override_for_resource,
2459                              user_addr_t      resource,
2460                              int              resource_type)
2461 {
2462         boolean_t has_thread_reference = FALSE;
2463         int rc = 0;
2464
2465         if (thread == THREAD_NULL) {
2466                 thread = task_findtid(task, tid);
2467                 /* returns referenced thread */
2468
2469                 if (thread == THREAD_NULL) {
2470                         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2471                                                                   tid, 0, 0xdead, 0, 0);
2472                         return ESRCH;
2473                 }
2474                 has_thread_reference = TRUE;
2475         } else {
2476                 assert(thread->task == task);
2477         }
2478         rc = proc_thread_qos_add_override_internal(thread, override_qos,
2479                         first_override_for_resource, resource, resource_type);
2480         if (has_thread_reference) {
2481                 thread_deallocate(thread);
2482         }
2483
2484         return rc;
2485 }
2486
2487 static void
2488 proc_thread_qos_remove_override_internal(thread_t       thread,
2489                                          user_addr_t    resource,
2490                                          int            resource_type,
2491                                          boolean_t      reset)
2492 {
2493         struct task_pend_token pend_token = {};
2494
2495         struct thread_qos_override *deferred_free_override_list = NULL;
2496         int new_qos_override, prev_qos_override, new_effective_qos;
2497
2498         thread_mtx_lock(thread);
2499
2500         canonicalize_resource_and_type(&resource, &resource_type);
2501
2502         find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2503
2504         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2505                               thread_tid(thread), resource, reset, 0, 0);
2506
2507         DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2508                         uint64_t, thread->requested_policy.thrp_qos,
2509                         uint64_t, thread->effective_policy.thep_qos);
2510
2511         /* Determine how to combine the various overrides into a single current requested override */
2512         new_qos_override = calculate_requested_qos_override(thread);
2513
2514         spl_t s = splsched();
2515         thread_lock(thread);
2516
2517         /*
2518          * The override chain and therefore the value of the current override is locked with thread mutex,
2519          * so we can do a get/set without races.  However, the rest of thread policy is locked under the spinlock.
2520          * This means you can't change the current override from a spinlock-only setter.
2521          */
2522         prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2523
2524         if (new_qos_override != prev_qos_override)
2525                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2526
2527         new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2528
2529         thread_unlock(thread);
2530         splx(s);
2531
2532         thread_mtx_unlock(thread);
2533
2534         thread_policy_update_complete_unlocked(thread, &pend_token);
2535
2536         while (deferred_free_override_list) {
2537                 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2538
2539                 zfree(thread_qos_override_zone, deferred_free_override_list);
2540                 deferred_free_override_list = override_next;
2541         }
2542
2543         DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2544                       int, new_qos_override, int, new_effective_qos);
2545
2546         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2547                               thread_tid(thread), 0, 0, 0, 0);
2548 }
2549
2550 int
2551 proc_thread_qos_remove_override(task_t      task,
2552                                 thread_t    thread,
2553                                 uint64_t    tid,
2554                                 user_addr_t resource,
2555                                 int         resource_type)
2556 {
2557         boolean_t has_thread_reference = FALSE;
2558
2559         if (thread == THREAD_NULL) {
2560                 thread = task_findtid(task, tid);
2561                 /* returns referenced thread */
2562
2563                 if (thread == THREAD_NULL) {
2564                         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2565                                               tid, 0, 0xdead, 0, 0);
2566                         return ESRCH;
2567                 }
2568                 has_thread_reference = TRUE;
2569         } else {
2570                 assert(task == thread->task);
2571         }
2572
2573         proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2574
2575         if (has_thread_reference)
2576                 thread_deallocate(thread);
2577
2578         return 0;
2579 }
2580
2581 /* Deallocate before thread termination */
2582 void proc_thread_qos_deallocate(thread_t thread)
2583 {
2584         /* This thread must have no more IPC overrides. */
2585         assert(thread->ipc_overrides == 0);
2586         assert(thread->requested_policy.thrp_qos_ipc_override == THREAD_QOS_UNSPECIFIED);
2587         assert(thread->sync_ipc_overrides == 0);
2588         assert(thread->requested_policy.thrp_qos_sync_ipc_override == THREAD_QOS_UNSPECIFIED);
2589
2590         /*
2591          * Clear out any lingering override objects.
2592          */
2593         struct thread_qos_override *override;
2594
2595         thread_mtx_lock(thread);
2596         override = thread->overrides;
2597         thread->overrides = NULL;
2598         thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2599         /* We don't need to re-evaluate thread policy here because the thread has already exited */
2600         thread_mtx_unlock(thread);
2601
2602         while (override) {
2603                 struct thread_qos_override *override_next = override->override_next;
2604
2605                 zfree(thread_qos_override_zone, override);
2606                 override = override_next;
2607         }
2608 }
2609
2610 /*
2611  * Set up the primordial thread's QoS
2612  */
2613 void
2614 task_set_main_thread_qos(task_t task, thread_t thread) {
2615         struct task_pend_token pend_token = {};
2616
2617         assert(thread->task == task);
2618
2619         thread_mtx_lock(thread);
2620
2621         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2622                                   (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2623                                   thread_tid(thread), threquested_0(thread), threquested_1(thread),
2624                                   thread->requested_policy.thrp_qos, 0);
2625
2626         int primordial_qos = task_compute_main_thread_qos(task);
2627
2628         proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS,
2629                                       primordial_qos, 0, &pend_token);
2630
2631         thread_mtx_unlock(thread);
2632
2633         thread_policy_update_complete_unlocked(thread, &pend_token);
2634
2635         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2636                                   (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2637                                   thread_tid(thread), threquested_0(thread), threquested_1(thread),
2638                                   primordial_qos, 0);
2639 }
2640
2641 /*
2642  * KPI for pthread kext
2643  *
2644  * Return a good guess at what the initial manager QoS will be
2645  * Dispatch can override this in userspace if it so chooses
2646  */
2647 int
2648 task_get_default_manager_qos(task_t task)
2649 {
2650         int primordial_qos = task_compute_main_thread_qos(task);
2651
2652         if (primordial_qos == THREAD_QOS_LEGACY)
2653                 primordial_qos = THREAD_QOS_USER_INITIATED;
2654
2655         return primordial_qos;
2656 }
2657
2658 /*
2659  * Check if the user promotion on thread has changed
2660  * and apply it.
2661  *
2662  * thread locked on entry, might drop the thread lock
2663  * and reacquire it.
2664  */
2665 boolean_t
2666 thread_recompute_user_promotion_locked(thread_t thread)
2667 {
2668         boolean_t needs_update = FALSE;
2669         struct task_pend_token pend_token = {};
2670         int user_promotion_basepri = MIN(thread_get_inheritor_turnstile_priority(thread), MAXPRI_USER);
2671         int old_base_pri = thread->base_pri;
2672         thread_qos_t qos_promotion;
2673
2674         /* Check if user promotion has changed */
2675         if (thread->user_promotion_basepri == user_promotion_basepri) {
2676                 return needs_update;
2677         } else {
2678                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2679                         (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
2680                         thread_tid(thread),
2681                         user_promotion_basepri,
2682                         thread->user_promotion_basepri,
2683                         0, 0);
2684         }
2685
2686         /* Update the user promotion base pri */
2687         thread->user_promotion_basepri = user_promotion_basepri;
2688         pend_token.tpt_force_recompute_pri = 1;
2689
2690         if (user_promotion_basepri <= MAXPRI_THROTTLE) {
2691                 qos_promotion = THREAD_QOS_UNSPECIFIED;
2692         } else {
2693                 qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
2694         }
2695
2696         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2697                         TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
2698
2699         if (thread_get_waiting_turnstile(thread) &&
2700             thread->base_pri != old_base_pri) {
2701                 needs_update = TRUE;
2702         }
2703
2704         thread_unlock(thread);
2705
2706         thread_policy_update_complete_unlocked(thread, &pend_token);
2707
2708         thread_lock(thread);
2709
2710         return needs_update;
2711 }
2712
2713 /*
2714  * Convert the thread user promotion base pri to qos for threads in qos world.
2715  * For priority above UI qos, the qos would be set to UI.
2716  */
2717 thread_qos_t
2718 thread_user_promotion_qos_for_pri(int priority)
2719 {
2720         int qos;
2721         for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
2722                 if (thread_qos_policy_params.qos_pri[qos] <= priority) {
2723                         return qos;
2724                 }
2725         }
2726         return THREAD_QOS_MAINTENANCE;
2727 }
2728
2729 /*
2730  * Set the thread's QoS IPC override
2731  * Owned by the IPC subsystem
2732  *
2733  * May be called with spinlocks held, but not spinlocks
2734  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
2735  *
2736  * One 'add' must be balanced by one 'drop'.
2737  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
2738  * Before the thread is deallocated, there must be 0 remaining overrides.
2739  */
2740 static void
2741 thread_ipc_override(thread_t    thread,
2742                     uint32_t    qos_override,
2743                     boolean_t   is_new_override)
2744 {
2745         struct task_pend_token pend_token = {};
2746         boolean_t needs_update;
2747
2748         spl_t s = splsched();
2749         thread_lock(thread);
2750
2751         uint32_t old_override = thread->requested_policy.thrp_qos_ipc_override;
2752
2753         assert(qos_override > THREAD_QOS_UNSPECIFIED);
2754         assert(qos_override < THREAD_QOS_LAST);
2755
2756         if (is_new_override) {
2757                 if (thread->ipc_overrides++ == 0) {
2758                         /* This add is the first override for this thread */
2759                         assert(old_override == THREAD_QOS_UNSPECIFIED);
2760                 } else {
2761                         /* There are already other overrides in effect for this thread */
2762                         assert(old_override > THREAD_QOS_UNSPECIFIED);
2763                 }
2764         } else {
2765                 /* There must be at least one override (the previous add call) in effect */
2766                 assert(thread->ipc_overrides > 0);
2767                 assert(old_override > THREAD_QOS_UNSPECIFIED);
2768         }
2769
2770         /*
2771          * We can't allow lowering if there are several IPC overrides because
2772          * the caller can't possibly know the whole truth
2773          */
2774         if (thread->ipc_overrides == 1) {
2775                 needs_update = qos_override != old_override;
2776         } else {
2777                 needs_update = qos_override > old_override;
2778         }
2779
2780         if (needs_update) {
2781                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2782                                                   TASK_POLICY_QOS_IPC_OVERRIDE,
2783                                                   qos_override, 0, &pend_token);
2784                 assert(pend_token.tpt_update_sockets == 0);
2785         }
2786
2787         thread_unlock(thread);
2788         splx(s);
2789
2790         thread_policy_update_complete_unlocked(thread, &pend_token);
2791 }
2792
2793 void
2794 thread_add_ipc_override(thread_t    thread,
2795                         uint32_t    qos_override)
2796 {
2797         thread_ipc_override(thread, qos_override, TRUE);
2798 }
2799
2800 void
2801 thread_update_ipc_override(thread_t     thread,
2802                            uint32_t     qos_override)
2803 {
2804         thread_ipc_override(thread, qos_override, FALSE);
2805 }
2806
2807 void
2808 thread_drop_ipc_override(thread_t thread)
2809 {
2810         struct task_pend_token pend_token = {};
2811
2812         spl_t s = splsched();
2813         thread_lock(thread);
2814
2815         assert(thread->ipc_overrides > 0);
2816
2817         if (--thread->ipc_overrides == 0) {
2818                 /*
2819                  * There are no more overrides for this thread, so we should
2820                  * clear out the saturated override value
2821                  */
2822
2823                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2824                                                   TASK_POLICY_QOS_IPC_OVERRIDE, THREAD_QOS_UNSPECIFIED,
2825                                                   0, &pend_token);
2826         }
2827
2828         thread_unlock(thread);
2829         splx(s);
2830
2831         thread_policy_update_complete_unlocked(thread, &pend_token);
2832 }
2833
2834 /* Get current requested qos / relpri, may be called from spinlock context */
2835 thread_qos_t
2836 thread_get_requested_qos(thread_t thread, int *relpri)
2837 {
2838         int relprio_value = 0;
2839         thread_qos_t qos;
2840
2841         qos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2842                         TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
2843         if (relpri) *relpri = -relprio_value;
2844         return qos;
2845 }
2846
2847 /*
2848  * This function will promote the thread priority
2849  * since exec could block other threads calling
2850  * proc_find on the proc. This boost must be removed
2851  * via call to thread_clear_exec_promotion.
2852  *
2853  * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
2854  */
2855 void
2856 thread_set_exec_promotion(thread_t thread)
2857 {
2858         spl_t s = splsched();
2859         thread_lock(thread);
2860
2861         sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
2862
2863         thread_unlock(thread);
2864         splx(s);
2865 }
2866
2867 /*
2868  * This function will clear the exec thread
2869  * promotion set on the thread by thread_set_exec_promotion.
2870  */
2871 void
2872 thread_clear_exec_promotion(thread_t thread)
2873 {
2874         spl_t s = splsched();
2875         thread_lock(thread);
2876
2877         sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
2878
2879         thread_unlock(thread);
2880         splx(s);
2881 }
2882