osfmk/kern/thread_policy.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <mach/mach_types.h>
  30 #include <mach/thread_act_server.h>
  31
  32 #include <kern/kern_types.h>
  33 #include <kern/processor.h>
  34 #include <kern/thread.h>
  35 #include <kern/affinity.h>
  36 #include <mach/task_policy.h>
  37 #include <kern/sfi.h>
  38 #include <kern/policy_internal.h>
  39 #include <sys/errno.h>
  40 #include <sys/ulock.h>
  41
  42 #include <mach/machine/sdt.h>
  43
  44 #ifdef MACH_BSD
  45 extern int      proc_selfpid(void);
  46 extern char *   proc_name_address(void *p);
  47 extern void     rethrottle_thread(void * uthread);
  48 #endif /* MACH_BSD */
  49
  50 #define QOS_EXTRACT(q)        ((q) & 0xff)
  51
  52 uint32_t qos_override_mode;
  53 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
  54 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
  55 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
  56 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
  57
  58 extern zone_t thread_qos_override_zone;
  59
  60 static void
  61 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
  62
  63 /*
  64  * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
  65  * to threads that don't have a QoS class set.
  66  */
  67 const qos_policy_params_t thread_qos_policy_params = {
  68         /*
  69          * This table defines the starting base priority of the thread,
  70          * which will be modified by the thread importance and the task max priority
  71          * before being applied.
  72          */
  73         .qos_pri[THREAD_QOS_UNSPECIFIED]                = 0, /* not consulted */
  74         .qos_pri[THREAD_QOS_USER_INTERACTIVE]           = BASEPRI_BACKGROUND, /* i.e. 46 */
  75         .qos_pri[THREAD_QOS_USER_INITIATED]             = BASEPRI_USER_INITIATED,
  76         .qos_pri[THREAD_QOS_LEGACY]                     = BASEPRI_DEFAULT,
  77         .qos_pri[THREAD_QOS_UTILITY]                    = BASEPRI_UTILITY,
  78         .qos_pri[THREAD_QOS_BACKGROUND]                 = MAXPRI_THROTTLE,
  79         .qos_pri[THREAD_QOS_MAINTENANCE]                = MAXPRI_THROTTLE,
  80
  81         /*
  82          * This table defines the highest IO priority that a thread marked with this
  83          * QoS class can have.
  84          */
  85         .qos_iotier[THREAD_QOS_UNSPECIFIED]             = THROTTLE_LEVEL_TIER0,
  86         .qos_iotier[THREAD_QOS_USER_INTERACTIVE]        = THROTTLE_LEVEL_TIER0,
  87         .qos_iotier[THREAD_QOS_USER_INITIATED]          = THROTTLE_LEVEL_TIER0,
  88         .qos_iotier[THREAD_QOS_LEGACY]                  = THROTTLE_LEVEL_TIER0,
  89         .qos_iotier[THREAD_QOS_UTILITY]                 = THROTTLE_LEVEL_TIER1,
  90         .qos_iotier[THREAD_QOS_BACKGROUND]              = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
  91         .qos_iotier[THREAD_QOS_MAINTENANCE]             = THROTTLE_LEVEL_TIER3,
  92
  93         /*
  94          * This table defines the highest QoS level that
  95          * a thread marked with this QoS class can have.
  96          */
  97
  98         .qos_through_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
  99         .qos_through_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
 100         .qos_through_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
 101         .qos_through_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
 102         .qos_through_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
 103         .qos_through_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
 104         .qos_through_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
 105
 106         .qos_latency_qos[THREAD_QOS_UNSPECIFIED]        = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
 107         .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE]   = QOS_EXTRACT(LATENCY_QOS_TIER_0),
 108         .qos_latency_qos[THREAD_QOS_USER_INITIATED]     = QOS_EXTRACT(LATENCY_QOS_TIER_1),
 109         .qos_latency_qos[THREAD_QOS_LEGACY]             = QOS_EXTRACT(LATENCY_QOS_TIER_1),
 110         .qos_latency_qos[THREAD_QOS_UTILITY]            = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 111         .qos_latency_qos[THREAD_QOS_BACKGROUND]         = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 112         .qos_latency_qos[THREAD_QOS_MAINTENANCE]        = QOS_EXTRACT(LATENCY_QOS_TIER_3),
 113 };
 114
 115 static void
 116 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
 117
 118 static int
 119 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
 120
 121 static void
 122 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
 123
 124 static void
 125 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 126
 127 static void
 128 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 129
 130 static void
 131 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
 132
 133 static int
 134 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
 135
 136 static int
 137 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
 138
 139 static void
 140 thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
 141
 142 static void
 143 thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
 144
 145 void
 146 thread_policy_init(void)
 147 {
 148         if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
 149                 printf("QOS override mode: 0x%08x\n", qos_override_mode);
 150         } else {
 151                 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
 152         }
 153 }
 154
 155 boolean_t
 156 thread_has_qos_policy(thread_t thread)
 157 {
 158         return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
 159 }
 160
 161
 162 static void
 163 thread_remove_qos_policy_locked(thread_t thread,
 164     task_pend_token_t pend_token)
 165 {
 166         __unused int prev_qos = thread->requested_policy.thrp_qos;
 167
 168         DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
 169
 170         proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
 171             THREAD_QOS_UNSPECIFIED, 0, pend_token);
 172 }
 173
 174 kern_return_t
 175 thread_remove_qos_policy(thread_t thread)
 176 {
 177         struct task_pend_token pend_token = {};
 178
 179         thread_mtx_lock(thread);
 180         if (!thread->active) {
 181                 thread_mtx_unlock(thread);
 182                 return KERN_TERMINATED;
 183         }
 184
 185         thread_remove_qos_policy_locked(thread, &pend_token);
 186
 187         thread_mtx_unlock(thread);
 188
 189         thread_policy_update_complete_unlocked(thread, &pend_token);
 190
 191         return KERN_SUCCESS;
 192 }
 193
 194
 195 boolean_t
 196 thread_is_static_param(thread_t thread)
 197 {
 198         if (thread->static_param) {
 199                 DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
 200                 return TRUE;
 201         }
 202         return FALSE;
 203 }
 204
 205 /*
 206  * Relative priorities can range between 0REL and -15REL. These
 207  * map to QoS-specific ranges, to create non-overlapping priority
 208  * ranges.
 209  */
 210 static int
 211 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
 212 {
 213         int next_lower_qos;
 214
 215         /* Fast path, since no validation or scaling is needed */
 216         if (qos_relprio == 0) {
 217                 return 0;
 218         }
 219
 220         switch (qos) {
 221         case THREAD_QOS_USER_INTERACTIVE:
 222                 next_lower_qos = THREAD_QOS_USER_INITIATED;
 223                 break;
 224         case THREAD_QOS_USER_INITIATED:
 225                 next_lower_qos = THREAD_QOS_LEGACY;
 226                 break;
 227         case THREAD_QOS_LEGACY:
 228                 next_lower_qos = THREAD_QOS_UTILITY;
 229                 break;
 230         case THREAD_QOS_UTILITY:
 231                 next_lower_qos = THREAD_QOS_BACKGROUND;
 232                 break;
 233         case THREAD_QOS_MAINTENANCE:
 234         case THREAD_QOS_BACKGROUND:
 235                 next_lower_qos = 0;
 236                 break;
 237         default:
 238                 panic("Unrecognized QoS %d", qos);
 239                 return 0;
 240         }
 241
 242         int prio_range_max = thread_qos_policy_params.qos_pri[qos];
 243         int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
 244
 245         /*
 246          * We now have the valid range that the scaled relative priority can map to. Note
 247          * that the lower bound is exclusive, but the upper bound is inclusive. If the
 248          * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
 249          * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
 250          * remainder.
 251          */
 252         int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
 253
 254         return scaled_relprio;
 255 }
 256
 257 /*
 258  * flag set by -qos-policy-allow boot-arg to allow
 259  * testing thread qos policy from userspace
 260  */
 261 boolean_t allow_qos_policy_set = FALSE;
 262
 263 kern_return_t
 264 thread_policy_set(
 265         thread_t                                thread,
 266         thread_policy_flavor_t  flavor,
 267         thread_policy_t                 policy_info,
 268         mach_msg_type_number_t  count)
 269 {
 270         thread_qos_policy_data_t req_qos;
 271         kern_return_t kr;
 272
 273         req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
 274
 275         if (thread == THREAD_NULL) {
 276                 return KERN_INVALID_ARGUMENT;
 277         }
 278
 279         if (allow_qos_policy_set == FALSE) {
 280                 if (thread_is_static_param(thread)) {
 281                         return KERN_POLICY_STATIC;
 282                 }
 283
 284                 if (flavor == THREAD_QOS_POLICY) {
 285                         return KERN_INVALID_ARGUMENT;
 286                 }
 287         }
 288
 289         /* Threads without static_param set reset their QoS when other policies are applied. */
 290         if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
 291                 /* Store the existing tier, if we fail this call it is used to reset back. */
 292                 req_qos.qos_tier = thread->requested_policy.thrp_qos;
 293                 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
 294
 295                 kr = thread_remove_qos_policy(thread);
 296                 if (kr != KERN_SUCCESS) {
 297                         return kr;
 298                 }
 299         }
 300
 301         kr = thread_policy_set_internal(thread, flavor, policy_info, count);
 302
 303         /* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */
 304         if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
 305                 if (kr != KERN_SUCCESS) {
 306                         /* Reset back to our original tier as the set failed. */
 307                         (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
 308                 }
 309         }
 310
 311         return kr;
 312 }
 313
 314 kern_return_t
 315 thread_policy_set_internal(
 316         thread_t                     thread,
 317         thread_policy_flavor_t       flavor,
 318         thread_policy_t              policy_info,
 319         mach_msg_type_number_t       count)
 320 {
 321         kern_return_t result = KERN_SUCCESS;
 322         struct task_pend_token pend_token = {};
 323
 324         thread_mtx_lock(thread);
 325         if (!thread->active) {
 326                 thread_mtx_unlock(thread);
 327
 328                 return KERN_TERMINATED;
 329         }
 330
 331         switch (flavor) {
 332         case THREAD_EXTENDED_POLICY:
 333         {
 334                 boolean_t timeshare = TRUE;
 335
 336                 if (count >= THREAD_EXTENDED_POLICY_COUNT) {
 337                         thread_extended_policy_t info;
 338
 339                         info = (thread_extended_policy_t)policy_info;
 340                         timeshare = info->timeshare;
 341                 }
 342
 343                 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
 344
 345                 spl_t s = splsched();
 346                 thread_lock(thread);
 347
 348                 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
 349
 350                 thread_unlock(thread);
 351                 splx(s);
 352
 353                 pend_token.tpt_update_thread_sfi = 1;
 354
 355                 break;
 356         }
 357
 358         case THREAD_TIME_CONSTRAINT_POLICY:
 359         {
 360                 thread_time_constraint_policy_t info;
 361
 362                 if (count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
 363                         result = KERN_INVALID_ARGUMENT;
 364                         break;
 365                 }
 366
 367                 info = (thread_time_constraint_policy_t)policy_info;
 368                 if (info->constraint < info->computation ||
 369                     info->computation > max_rt_quantum ||
 370                     info->computation < min_rt_quantum) {
 371                         result = KERN_INVALID_ARGUMENT;
 372                         break;
 373                 }
 374
 375                 spl_t s = splsched();
 376                 thread_lock(thread);
 377
 378                 thread->realtime.period         = info->period;
 379                 thread->realtime.computation    = info->computation;
 380                 thread->realtime.constraint     = info->constraint;
 381                 thread->realtime.preemptible    = info->preemptible;
 382
 383                 thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
 384
 385                 thread_unlock(thread);
 386                 splx(s);
 387
 388                 pend_token.tpt_update_thread_sfi = 1;
 389
 390                 break;
 391         }
 392
 393         case THREAD_PRECEDENCE_POLICY:
 394         {
 395                 thread_precedence_policy_t info;
 396
 397                 if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
 398                         result = KERN_INVALID_ARGUMENT;
 399                         break;
 400                 }
 401                 info = (thread_precedence_policy_t)policy_info;
 402
 403                 spl_t s = splsched();
 404                 thread_lock(thread);
 405
 406                 thread->importance = info->importance;
 407
 408                 thread_recompute_priority(thread);
 409
 410                 thread_unlock(thread);
 411                 splx(s);
 412
 413                 break;
 414         }
 415
 416         case THREAD_AFFINITY_POLICY:
 417         {
 418                 thread_affinity_policy_t info;
 419
 420                 if (!thread_affinity_is_supported()) {
 421                         result = KERN_NOT_SUPPORTED;
 422                         break;
 423                 }
 424                 if (count < THREAD_AFFINITY_POLICY_COUNT) {
 425                         result = KERN_INVALID_ARGUMENT;
 426                         break;
 427                 }
 428
 429                 info = (thread_affinity_policy_t) policy_info;
 430                 /*
 431                  * Unlock the thread mutex here and
 432                  * return directly after calling thread_affinity_set().
 433                  * This is necessary for correct lock ordering because
 434                  * thread_affinity_set() takes the task lock.
 435                  */
 436                 thread_mtx_unlock(thread);
 437                 return thread_affinity_set(thread, info->affinity_tag);
 438         }
 439
 440 #if CONFIG_EMBEDDED
 441         case THREAD_BACKGROUND_POLICY:
 442         {
 443                 thread_background_policy_t info;
 444
 445                 if (count < THREAD_BACKGROUND_POLICY_COUNT) {
 446                         result = KERN_INVALID_ARGUMENT;
 447                         break;
 448                 }
 449
 450                 if (thread->task != current_task()) {
 451                         result = KERN_PROTECTION_FAILURE;
 452                         break;
 453                 }
 454
 455                 info = (thread_background_policy_t) policy_info;
 456
 457                 int enable;
 458
 459                 if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
 460                         enable = TASK_POLICY_ENABLE;
 461                 } else {
 462                         enable = TASK_POLICY_DISABLE;
 463                 }
 464
 465                 int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
 466
 467                 proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
 468
 469                 break;
 470         }
 471 #endif /* CONFIG_EMBEDDED */
 472
 473         case THREAD_THROUGHPUT_QOS_POLICY:
 474         {
 475                 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
 476                 thread_throughput_qos_t tqos;
 477
 478                 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
 479                         result = KERN_INVALID_ARGUMENT;
 480                         break;
 481                 }
 482
 483                 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
 484                         break;
 485                 }
 486
 487                 tqos = qos_extract(info->thread_throughput_qos_tier);
 488
 489                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
 490                     TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
 491
 492                 break;
 493         }
 494
 495         case THREAD_LATENCY_QOS_POLICY:
 496         {
 497                 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
 498                 thread_latency_qos_t lqos;
 499
 500                 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
 501                         result = KERN_INVALID_ARGUMENT;
 502                         break;
 503                 }
 504
 505                 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
 506                         break;
 507                 }
 508
 509                 lqos = qos_extract(info->thread_latency_qos_tier);
 510
 511                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
 512                     TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
 513
 514                 break;
 515         }
 516
 517         case THREAD_QOS_POLICY:
 518         {
 519                 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
 520
 521                 if (count < THREAD_QOS_POLICY_COUNT) {
 522                         result = KERN_INVALID_ARGUMENT;
 523                         break;
 524                 }
 525
 526                 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
 527                         result = KERN_INVALID_ARGUMENT;
 528                         break;
 529                 }
 530
 531                 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
 532                         result = KERN_INVALID_ARGUMENT;
 533                         break;
 534                 }
 535
 536                 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
 537                         result = KERN_INVALID_ARGUMENT;
 538                         break;
 539                 }
 540
 541                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
 542                     info->qos_tier, -info->tier_importance, &pend_token);
 543
 544                 break;
 545         }
 546
 547         default:
 548                 result = KERN_INVALID_ARGUMENT;
 549                 break;
 550         }
 551
 552         thread_mtx_unlock(thread);
 553
 554         thread_policy_update_complete_unlocked(thread, &pend_token);
 555
 556         return result;
 557 }
 558
 559 /*
 560  * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
 561  * Both result in FIXED mode scheduling.
 562  */
 563 static sched_mode_t
 564 convert_policy_to_sched_mode(integer_t policy)
 565 {
 566         switch (policy) {
 567         case POLICY_TIMESHARE:
 568                 return TH_MODE_TIMESHARE;
 569         case POLICY_RR:
 570         case POLICY_FIFO:
 571                 return TH_MODE_FIXED;
 572         default:
 573                 panic("unexpected sched policy: %d", policy);
 574                 return TH_MODE_NONE;
 575         }
 576 }
 577
 578 /*
 579  * Called either with the thread mutex locked
 580  * or from the pthread kext in a 'safe place'.
 581  */
 582 static kern_return_t
 583 thread_set_mode_and_absolute_pri_internal(thread_t              thread,
 584     sched_mode_t          mode,
 585     integer_t             priority,
 586     task_pend_token_t     pend_token)
 587 {
 588         kern_return_t kr = KERN_SUCCESS;
 589
 590         spl_t s = splsched();
 591         thread_lock(thread);
 592
 593         /* This path isn't allowed to change a thread out of realtime. */
 594         if ((thread->sched_mode == TH_MODE_REALTIME) ||
 595             (thread->saved_mode == TH_MODE_REALTIME)) {
 596                 kr = KERN_FAILURE;
 597                 goto unlock;
 598         }
 599
 600         if (thread->policy_reset) {
 601                 kr = KERN_SUCCESS;
 602                 goto unlock;
 603         }
 604
 605         sched_mode_t old_mode = thread->sched_mode;
 606
 607         /*
 608          * Reverse engineer and apply the correct importance value
 609          * from the requested absolute priority value.
 610          *
 611          * TODO: Store the absolute priority value instead
 612          */
 613
 614         if (priority >= thread->max_priority) {
 615                 priority = thread->max_priority - thread->task_priority;
 616         } else if (priority >= MINPRI_KERNEL) {
 617                 priority -=  MINPRI_KERNEL;
 618         } else if (priority >= MINPRI_RESERVED) {
 619                 priority -=  MINPRI_RESERVED;
 620         } else {
 621                 priority -= BASEPRI_DEFAULT;
 622         }
 623
 624         priority += thread->task_priority;
 625
 626         if (priority > thread->max_priority) {
 627                 priority = thread->max_priority;
 628         } else if (priority < MINPRI) {
 629                 priority = MINPRI;
 630         }
 631
 632         thread->importance = priority - thread->task_priority;
 633
 634         thread_set_user_sched_mode_and_recompute_pri(thread, mode);
 635
 636         if (mode != old_mode) {
 637                 pend_token->tpt_update_thread_sfi = 1;
 638         }
 639
 640 unlock:
 641         thread_unlock(thread);
 642         splx(s);
 643
 644         return kr;
 645 }
 646
 647 void
 648 thread_freeze_base_pri(thread_t thread)
 649 {
 650         assert(thread == current_thread());
 651
 652         spl_t s = splsched();
 653         thread_lock(thread);
 654
 655         assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
 656         thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
 657
 658         thread_unlock(thread);
 659         splx(s);
 660 }
 661
 662 bool
 663 thread_unfreeze_base_pri(thread_t thread)
 664 {
 665         assert(thread == current_thread());
 666         integer_t base_pri;
 667         ast_t ast = 0;
 668
 669         spl_t s = splsched();
 670         thread_lock(thread);
 671
 672         assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
 673         thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
 674
 675         base_pri = thread->req_base_pri;
 676         if (base_pri != thread->base_pri) {
 677                 /*
 678                  * This function returns "true" if the base pri change
 679                  * is the most likely cause for the preemption.
 680                  */
 681                 sched_set_thread_base_priority(thread, base_pri);
 682                 ast = ast_peek(AST_PREEMPT);
 683         }
 684
 685         thread_unlock(thread);
 686         splx(s);
 687
 688         return ast != 0;
 689 }
 690
 691 uint8_t
 692 thread_workq_pri_for_qos(thread_qos_t qos)
 693 {
 694         assert(qos < THREAD_QOS_LAST);
 695         return (uint8_t)thread_qos_policy_params.qos_pri[qos];
 696 }
 697
 698 thread_qos_t
 699 thread_workq_qos_for_pri(int priority)
 700 {
 701         int qos;
 702         if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
 703                 // indicate that workq should map >UI threads to workq's
 704                 // internal notation for above-UI work.
 705                 return THREAD_QOS_UNSPECIFIED;
 706         }
 707         for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
 708                 // map a given priority up to the next nearest qos band.
 709                 if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
 710                         return qos;
 711                 }
 712         }
 713         return THREAD_QOS_MAINTENANCE;
 714 }
 715
 716 /*
 717  * private interface for pthread workqueues
 718  *
 719  * Set scheduling policy & absolute priority for thread
 720  * May be called with spinlocks held
 721  * Thread mutex lock is not held
 722  */
 723 void
 724 thread_reset_workq_qos(thread_t thread, uint32_t qos)
 725 {
 726         struct task_pend_token pend_token = {};
 727
 728         assert(qos < THREAD_QOS_LAST);
 729
 730         spl_t s = splsched();
 731         thread_lock(thread);
 732
 733         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 734             TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
 735         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 736             TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
 737             &pend_token);
 738
 739         assert(pend_token.tpt_update_sockets == 0);
 740
 741         thread_unlock(thread);
 742         splx(s);
 743
 744         thread_policy_update_complete_unlocked(thread, &pend_token);
 745 }
 746
 747 /*
 748  * private interface for pthread workqueues
 749  *
 750  * Set scheduling policy & absolute priority for thread
 751  * May be called with spinlocks held
 752  * Thread mutex lock is held
 753  */
 754 void
 755 thread_set_workq_override(thread_t thread, uint32_t qos)
 756 {
 757         struct task_pend_token pend_token = {};
 758
 759         assert(qos < THREAD_QOS_LAST);
 760
 761         spl_t s = splsched();
 762         thread_lock(thread);
 763
 764         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 765             TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
 766
 767         assert(pend_token.tpt_update_sockets == 0);
 768
 769         thread_unlock(thread);
 770         splx(s);
 771
 772         thread_policy_update_complete_unlocked(thread, &pend_token);
 773 }
 774
 775 /*
 776  * private interface for pthread workqueues
 777  *
 778  * Set scheduling policy & absolute priority for thread
 779  * May be called with spinlocks held
 780  * Thread mutex lock is not held
 781  */
 782 void
 783 thread_set_workq_pri(thread_t  thread,
 784     thread_qos_t qos,
 785     integer_t priority,
 786     integer_t policy)
 787 {
 788         struct task_pend_token pend_token = {};
 789         sched_mode_t mode = convert_policy_to_sched_mode(policy);
 790
 791         assert(qos < THREAD_QOS_LAST);
 792         assert(thread->static_param);
 793
 794         if (!thread->static_param || !thread->active) {
 795                 return;
 796         }
 797
 798         spl_t s = splsched();
 799         thread_lock(thread);
 800
 801         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 802             TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
 803         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
 804             TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
 805             0, &pend_token);
 806
 807         thread_unlock(thread);
 808         splx(s);
 809
 810         /* Concern: this doesn't hold the mutex... */
 811
 812         __assert_only kern_return_t kr;
 813         kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
 814             &pend_token);
 815         assert(kr == KERN_SUCCESS);
 816
 817         if (pend_token.tpt_update_thread_sfi) {
 818                 sfi_reevaluate(thread);
 819         }
 820 }
 821
 822 /*
 823  * thread_set_mode_and_absolute_pri:
 824  *
 825  * Set scheduling policy & absolute priority for thread, for deprecated
 826  * thread_set_policy and thread_policy interfaces.
 827  *
 828  * Called with nothing locked.
 829  */
 830 kern_return_t
 831 thread_set_mode_and_absolute_pri(thread_t   thread,
 832     integer_t  policy,
 833     integer_t  priority)
 834 {
 835         kern_return_t kr = KERN_SUCCESS;
 836         struct task_pend_token pend_token = {};
 837
 838         sched_mode_t mode = convert_policy_to_sched_mode(policy);
 839
 840         thread_mtx_lock(thread);
 841
 842         if (!thread->active) {
 843                 kr = KERN_TERMINATED;
 844                 goto unlock;
 845         }
 846
 847         if (thread_is_static_param(thread)) {
 848                 kr = KERN_POLICY_STATIC;
 849                 goto unlock;
 850         }
 851
 852         /* Setting legacy policies on threads kills the current QoS */
 853         if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
 854                 thread_remove_qos_policy_locked(thread, &pend_token);
 855         }
 856
 857         kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
 858
 859 unlock:
 860         thread_mtx_unlock(thread);
 861
 862         thread_policy_update_complete_unlocked(thread, &pend_token);
 863
 864         return kr;
 865 }
 866
 867 /*
 868  * Set the thread's requested mode and recompute priority
 869  * Called with thread mutex and thread locked
 870  *
 871  * TODO: Mitigate potential problems caused by moving thread to end of runq
 872  * whenever its priority is recomputed
 873  *      Only remove when it actually changes? Attempt to re-insert at appropriate location?
 874  */
 875 static void
 876 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
 877 {
 878         if (thread->policy_reset) {
 879                 return;
 880         }
 881
 882         boolean_t removed = thread_run_queue_remove(thread);
 883
 884         /*
 885          * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
 886          * That way there's zero confusion over which the user wants
 887          * and which the kernel wants.
 888          */
 889         if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) {
 890                 thread->saved_mode = mode;
 891         } else {
 892                 sched_set_thread_mode(thread, mode);
 893         }
 894
 895         thread_recompute_priority(thread);
 896
 897         if (removed) {
 898                 thread_run_queue_reinsert(thread, SCHED_TAILQ);
 899         }
 900 }
 901
 902 /* called at splsched with thread lock locked */
 903 static void
 904 thread_update_qos_cpu_time_locked(thread_t thread)
 905 {
 906         task_t task = thread->task;
 907         uint64_t timer_sum, timer_delta;
 908
 909         /*
 910          * This is only as accurate as the distance between
 911          * last context switch (embedded) or last user/kernel boundary transition (desktop)
 912          * because user_timer and system_timer are only updated then.
 913          *
 914          * TODO: Consider running a timer_update operation here to update it first.
 915          *       Maybe doable with interrupts disabled from current thread.
 916          *       If the thread is on a different core, may not be easy to get right.
 917          *
 918          * TODO: There should be a function for this in timer.c
 919          */
 920
 921         timer_sum = timer_grab(&thread->user_timer);
 922         timer_sum += timer_grab(&thread->system_timer);
 923         timer_delta = timer_sum - thread->vtimer_qos_save;
 924
 925         thread->vtimer_qos_save = timer_sum;
 926
 927         uint64_t* task_counter = NULL;
 928
 929         /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
 930         switch (thread->effective_policy.thep_qos) {
 931         case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
 932         case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
 933         case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
 934         case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
 935         case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
 936         case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
 937         case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
 938         default:
 939                 panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
 940         }
 941
 942         OSAddAtomic64(timer_delta, task_counter);
 943
 944         /* Update the task-level qos stats atomically, because we don't have the task lock. */
 945         switch (thread->requested_policy.thrp_qos) {
 946         case THREAD_QOS_UNSPECIFIED:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
 947         case THREAD_QOS_MAINTENANCE:        task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
 948         case THREAD_QOS_BACKGROUND:         task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
 949         case THREAD_QOS_UTILITY:            task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
 950         case THREAD_QOS_LEGACY:             task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
 951         case THREAD_QOS_USER_INITIATED:     task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
 952         case THREAD_QOS_USER_INTERACTIVE:   task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
 953         default:
 954                 panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
 955         }
 956
 957         OSAddAtomic64(timer_delta, task_counter);
 958 }
 959
 960 /*
 961  * called with no thread locks held
 962  * may hold task lock
 963  */
 964 void
 965 thread_update_qos_cpu_time(thread_t thread)
 966 {
 967         thread_mtx_lock(thread);
 968
 969         spl_t s = splsched();
 970         thread_lock(thread);
 971
 972         thread_update_qos_cpu_time_locked(thread);
 973
 974         thread_unlock(thread);
 975         splx(s);
 976
 977         thread_mtx_unlock(thread);
 978 }
 979
 980 /*
 981  * Calculate base priority from thread attributes, and set it on the thread
 982  *
 983  * Called with thread_lock and thread mutex held.
 984  */
 985 extern thread_t vm_pageout_scan_thread;
 986 extern boolean_t vps_dynamic_priority_enabled;
 987
 988 void
 989 thread_recompute_priority(
 990         thread_t                thread)
 991 {
 992         integer_t               priority;
 993
 994         if (thread->policy_reset) {
 995                 return;
 996         }
 997
 998         if (thread->sched_mode == TH_MODE_REALTIME) {
 999                 sched_set_thread_base_priority(thread, BASEPRI_RTQUEUES);
1000                 return;
1001         } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1002                 int qos = thread->effective_policy.thep_qos;
1003                 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1004                 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1005                 int qos_scaled_relprio;
1006
1007                 assert(qos >= 0 && qos < THREAD_QOS_LAST);
1008                 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1009
1010                 priority = thread_qos_policy_params.qos_pri[qos];
1011                 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1012
1013                 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1014                         /* Bump priority 46 to 47 when in a frontmost app */
1015                         qos_scaled_relprio += 1;
1016                 }
1017
1018                 /* TODO: factor in renice priority here? */
1019
1020                 priority += qos_scaled_relprio;
1021         } else {
1022                 if (thread->importance > MAXPRI) {
1023                         priority = MAXPRI;
1024                 } else if (thread->importance < -MAXPRI) {
1025                         priority = -MAXPRI;
1026                 } else {
1027                         priority = thread->importance;
1028                 }
1029
1030                 priority += thread->task_priority;
1031         }
1032
1033         priority = MAX(priority, thread->user_promotion_basepri);
1034
1035         /*
1036          * Clamp priority back into the allowed range for this task.
1037          *  The initial priority value could be out of this range due to:
1038          *      Task clamped to BG or Utility (max-pri is 4, or 20)
1039          *      Task is user task (max-pri is 63)
1040          *      Task is kernel task (max-pri is 95)
1041          * Note that thread->importance is user-settable to any integer
1042          * via THREAD_PRECEDENCE_POLICY.
1043          */
1044         if (priority > thread->max_priority) {
1045                 priority = thread->max_priority;
1046         } else if (priority < MINPRI) {
1047                 priority = MINPRI;
1048         }
1049
1050         if (thread->saved_mode == TH_MODE_REALTIME &&
1051             thread->sched_flags & TH_SFLAG_FAILSAFE) {
1052                 priority = DEPRESSPRI;
1053         }
1054
1055         if (thread->effective_policy.thep_terminated == TRUE) {
1056                 /*
1057                  * We temporarily want to override the expected priority to
1058                  * ensure that the thread exits in a timely manner.
1059                  * Note that this is allowed to exceed thread->max_priority
1060                  * so that the thread is no longer clamped to background
1061                  * during the final exit phase.
1062                  */
1063                 if (priority < thread->task_priority) {
1064                         priority = thread->task_priority;
1065                 }
1066                 if (priority < BASEPRI_DEFAULT) {
1067                         priority = BASEPRI_DEFAULT;
1068                 }
1069         }
1070
1071 #if CONFIG_EMBEDDED
1072         /* No one can have a base priority less than MAXPRI_THROTTLE */
1073         if (priority < MAXPRI_THROTTLE) {
1074                 priority = MAXPRI_THROTTLE;
1075         }
1076 #endif /* CONFIG_EMBEDDED */
1077
1078         sched_set_thread_base_priority(thread, priority);
1079 }
1080
1081 /* Called with the task lock held, but not the thread mutex or spinlock */
1082 void
1083 thread_policy_update_tasklocked(
1084         thread_t           thread,
1085         integer_t          priority,
1086         integer_t          max_priority,
1087         task_pend_token_t  pend_token)
1088 {
1089         thread_mtx_lock(thread);
1090
1091         if (!thread->active || thread->policy_reset) {
1092                 thread_mtx_unlock(thread);
1093                 return;
1094         }
1095
1096         spl_t s = splsched();
1097         thread_lock(thread);
1098
1099         __unused
1100         integer_t old_max_priority = thread->max_priority;
1101
1102         thread->task_priority = priority;
1103         thread->max_priority = max_priority;
1104
1105 #if CONFIG_EMBEDDED
1106         /*
1107          * When backgrounding a thread, iOS has the semantic that
1108          * realtime and fixed priority threads should be demoted
1109          * to timeshare background threads.
1110          *
1111          * On OSX, realtime and fixed priority threads don't lose their mode.
1112          *
1113          * TODO: Do this inside the thread policy update routine in order to avoid double
1114          * remove/reinsert for a runnable thread
1115          */
1116         if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1117                 sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1118         } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1119                 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1120         }
1121 #endif /* CONFIG_EMBEDDED */
1122
1123         thread_policy_update_spinlocked(thread, TRUE, pend_token);
1124
1125         thread_unlock(thread);
1126         splx(s);
1127
1128         thread_mtx_unlock(thread);
1129 }
1130
1131 /*
1132  * Reset thread to default state in preparation for termination
1133  * Called with thread mutex locked
1134  *
1135  * Always called on current thread, so we don't need a run queue remove
1136  */
1137 void
1138 thread_policy_reset(
1139         thread_t                thread)
1140 {
1141         spl_t           s;
1142
1143         assert(thread == current_thread());
1144
1145         s = splsched();
1146         thread_lock(thread);
1147
1148         if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1149                 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1150         }
1151
1152         if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1153                 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1154         }
1155
1156         /* At this point, the various demotions should be inactive */
1157         assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1158         assert(!(thread->sched_flags & TH_SFLAG_THROTTLED));
1159         assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1160
1161         /* Reset thread back to task-default basepri and mode  */
1162         sched_mode_t newmode = SCHED(initial_thread_sched_mode)(thread->task);
1163
1164         sched_set_thread_mode(thread, newmode);
1165
1166         thread->importance = 0;
1167
1168         /* Prevent further changes to thread base priority or mode */
1169         thread->policy_reset = 1;
1170
1171         sched_set_thread_base_priority(thread, thread->task_priority);
1172
1173         thread_unlock(thread);
1174         splx(s);
1175 }
1176
1177 kern_return_t
1178 thread_policy_get(
1179         thread_t                                thread,
1180         thread_policy_flavor_t  flavor,
1181         thread_policy_t                 policy_info,
1182         mach_msg_type_number_t  *count,
1183         boolean_t                               *get_default)
1184 {
1185         kern_return_t                   result = KERN_SUCCESS;
1186
1187         if (thread == THREAD_NULL) {
1188                 return KERN_INVALID_ARGUMENT;
1189         }
1190
1191         thread_mtx_lock(thread);
1192         if (!thread->active) {
1193                 thread_mtx_unlock(thread);
1194
1195                 return KERN_TERMINATED;
1196         }
1197
1198         switch (flavor) {
1199         case THREAD_EXTENDED_POLICY:
1200         {
1201                 boolean_t               timeshare = TRUE;
1202
1203                 if (!(*get_default)) {
1204                         spl_t s = splsched();
1205                         thread_lock(thread);
1206
1207                         if ((thread->sched_mode != TH_MODE_REALTIME) &&
1208                             (thread->saved_mode != TH_MODE_REALTIME)) {
1209                                 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1210                                         timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1211                                 } else {
1212                                         timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1213                                 }
1214                         } else {
1215                                 *get_default = TRUE;
1216                         }
1217
1218                         thread_unlock(thread);
1219                         splx(s);
1220                 }
1221
1222                 if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1223                         thread_extended_policy_t        info;
1224
1225                         info = (thread_extended_policy_t)policy_info;
1226                         info->timeshare = timeshare;
1227                 }
1228
1229                 break;
1230         }
1231
1232         case THREAD_TIME_CONSTRAINT_POLICY:
1233         {
1234                 thread_time_constraint_policy_t         info;
1235
1236                 if (*count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
1237                         result = KERN_INVALID_ARGUMENT;
1238                         break;
1239                 }
1240
1241                 info = (thread_time_constraint_policy_t)policy_info;
1242
1243                 if (!(*get_default)) {
1244                         spl_t s = splsched();
1245                         thread_lock(thread);
1246
1247                         if ((thread->sched_mode == TH_MODE_REALTIME) ||
1248                             (thread->saved_mode == TH_MODE_REALTIME)) {
1249                                 info->period = thread->realtime.period;
1250                                 info->computation = thread->realtime.computation;
1251                                 info->constraint = thread->realtime.constraint;
1252                                 info->preemptible = thread->realtime.preemptible;
1253                         } else {
1254                                 *get_default = TRUE;
1255                         }
1256
1257                         thread_unlock(thread);
1258                         splx(s);
1259                 }
1260
1261                 if (*get_default) {
1262                         info->period = 0;
1263                         info->computation = default_timeshare_computation;
1264                         info->constraint = default_timeshare_constraint;
1265                         info->preemptible = TRUE;
1266                 }
1267
1268                 break;
1269         }
1270
1271         case THREAD_PRECEDENCE_POLICY:
1272         {
1273                 thread_precedence_policy_t              info;
1274
1275                 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1276                         result = KERN_INVALID_ARGUMENT;
1277                         break;
1278                 }
1279
1280                 info = (thread_precedence_policy_t)policy_info;
1281
1282                 if (!(*get_default)) {
1283                         spl_t s = splsched();
1284                         thread_lock(thread);
1285
1286                         info->importance = thread->importance;
1287
1288                         thread_unlock(thread);
1289                         splx(s);
1290                 } else {
1291                         info->importance = 0;
1292                 }
1293
1294                 break;
1295         }
1296
1297         case THREAD_AFFINITY_POLICY:
1298         {
1299                 thread_affinity_policy_t                info;
1300
1301                 if (!thread_affinity_is_supported()) {
1302                         result = KERN_NOT_SUPPORTED;
1303                         break;
1304                 }
1305                 if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1306                         result = KERN_INVALID_ARGUMENT;
1307                         break;
1308                 }
1309
1310                 info = (thread_affinity_policy_t)policy_info;
1311
1312                 if (!(*get_default)) {
1313                         info->affinity_tag = thread_affinity_get(thread);
1314                 } else {
1315                         info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1316                 }
1317
1318                 break;
1319         }
1320
1321         case THREAD_POLICY_STATE:
1322         {
1323                 thread_policy_state_t           info;
1324
1325                 if (*count < THREAD_POLICY_STATE_COUNT) {
1326                         result = KERN_INVALID_ARGUMENT;
1327                         break;
1328                 }
1329
1330                 /* Only root can get this info */
1331                 if (current_task()->sec_token.val[0] != 0) {
1332                         result = KERN_PROTECTION_FAILURE;
1333                         break;
1334                 }
1335
1336                 info = (thread_policy_state_t)(void*)policy_info;
1337
1338                 if (!(*get_default)) {
1339                         info->flags = 0;
1340
1341                         spl_t s = splsched();
1342                         thread_lock(thread);
1343
1344                         info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1345
1346                         info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1347                         info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1348
1349                         info->thps_user_promotions          = 0;
1350                         info->thps_user_promotion_basepri   = thread->user_promotion_basepri;
1351                         info->thps_ipc_overrides            = thread->kevent_overrides;
1352
1353                         proc_get_thread_policy_bitfield(thread, info);
1354
1355                         thread_unlock(thread);
1356                         splx(s);
1357                 } else {
1358                         info->requested = 0;
1359                         info->effective = 0;
1360                         info->pending = 0;
1361                 }
1362
1363                 break;
1364         }
1365
1366         case THREAD_LATENCY_QOS_POLICY:
1367         {
1368                 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1369                 thread_latency_qos_t plqos;
1370
1371                 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1372                         result = KERN_INVALID_ARGUMENT;
1373                         break;
1374                 }
1375
1376                 if (*get_default) {
1377                         plqos = 0;
1378                 } else {
1379                         plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1380                 }
1381
1382                 info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1383         }
1384         break;
1385
1386         case THREAD_THROUGHPUT_QOS_POLICY:
1387         {
1388                 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1389                 thread_throughput_qos_t ptqos;
1390
1391                 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1392                         result = KERN_INVALID_ARGUMENT;
1393                         break;
1394                 }
1395
1396                 if (*get_default) {
1397                         ptqos = 0;
1398                 } else {
1399                         ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1400                 }
1401
1402                 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1403         }
1404         break;
1405
1406         case THREAD_QOS_POLICY:
1407         {
1408                 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1409
1410                 if (*count < THREAD_QOS_POLICY_COUNT) {
1411                         result = KERN_INVALID_ARGUMENT;
1412                         break;
1413                 }
1414
1415                 if (!(*get_default)) {
1416                         int relprio_value = 0;
1417                         info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1418                             TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1419
1420                         info->tier_importance = -relprio_value;
1421                 } else {
1422                         info->qos_tier = THREAD_QOS_UNSPECIFIED;
1423                         info->tier_importance = 0;
1424                 }
1425
1426                 break;
1427         }
1428
1429         default:
1430                 result = KERN_INVALID_ARGUMENT;
1431                 break;
1432         }
1433
1434         thread_mtx_unlock(thread);
1435
1436         return result;
1437 }
1438
1439 void
1440 thread_policy_create(thread_t thread)
1441 {
1442         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1443             (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1444             thread_tid(thread), theffective_0(thread),
1445             theffective_1(thread), thread->base_pri, 0);
1446
1447         /* We pass a pend token but ignore it */
1448         struct task_pend_token pend_token = {};
1449
1450         thread_policy_update_internal_spinlocked(thread, TRUE, &pend_token);
1451
1452         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1453             (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1454             thread_tid(thread), theffective_0(thread),
1455             theffective_1(thread), thread->base_pri, 0);
1456 }
1457
1458 static void
1459 thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token)
1460 {
1461         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1462             (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1463             thread_tid(thread), theffective_0(thread),
1464             theffective_1(thread), thread->base_pri, 0);
1465
1466         thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1467
1468         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1469             (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1470             thread_tid(thread), theffective_0(thread),
1471             theffective_1(thread), thread->base_pri, 0);
1472 }
1473
1474
1475
1476 /*
1477  * One thread state update function TO RULE THEM ALL
1478  *
1479  * This function updates the thread effective policy fields
1480  * and pushes the results to the relevant subsystems.
1481  *
1482  * Returns TRUE if a pended action needs to be run.
1483  *
1484  * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1485  */
1486 static void
1487 thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority,
1488     task_pend_token_t pend_token)
1489 {
1490         /*
1491          * Step 1:
1492          *  Gather requested policy and effective task state
1493          */
1494
1495         struct thread_requested_policy requested = thread->requested_policy;
1496         struct task_effective_policy task_effective = thread->task->effective_policy;
1497
1498         /*
1499          * Step 2:
1500          *  Calculate new effective policies from requested policy, task and thread state
1501          *  Rules:
1502          *      Don't change requested, it won't take effect
1503          */
1504
1505         struct thread_effective_policy next = {};
1506
1507         next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1508
1509         uint32_t next_qos = requested.thrp_qos;
1510
1511         if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1512                 next_qos = MAX(requested.thrp_qos_override, next_qos);
1513                 next_qos = MAX(requested.thrp_qos_promote, next_qos);
1514                 next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1515                 next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1516                 next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1517         }
1518
1519         next.thep_qos = next_qos;
1520
1521         /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1522         if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1523                 if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1524                         next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1525                 } else {
1526                         next.thep_qos = task_effective.tep_qos_clamp;
1527                 }
1528         }
1529
1530         /*
1531          * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1532          * This allows QoS promotions to work properly even after the process is unclamped.
1533          */
1534         next.thep_qos_promote = next.thep_qos;
1535
1536         /* The ceiling only applies to threads that are in the QoS world */
1537         if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1538             next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1539                 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1540         }
1541
1542         /* Apply the sync ipc qos override */
1543         assert(requested.thrp_qos_sync_ipc_override == THREAD_QOS_UNSPECIFIED);
1544
1545         /*
1546          * The QoS relative priority is only applicable when the original programmer's
1547          * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1548          * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1549          * since otherwise it would be lower than unclamped threads. Similarly, in the
1550          * presence of boosting, the programmer doesn't know what other actors
1551          * are boosting the thread.
1552          */
1553         if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1554             (requested.thrp_qos == next.thep_qos) &&
1555             (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1556                 next.thep_qos_relprio = requested.thrp_qos_relprio;
1557         } else {
1558                 next.thep_qos_relprio = 0;
1559         }
1560
1561         /* Calculate DARWIN_BG */
1562         boolean_t wants_darwinbg        = FALSE;
1563         boolean_t wants_all_sockets_bg  = FALSE; /* Do I want my existing sockets to be bg */
1564
1565         /*
1566          * If DARWIN_BG has been requested at either level, it's engaged.
1567          * darwinbg threads always create bg sockets,
1568          * but only some types of darwinbg change the sockets
1569          * after they're created
1570          */
1571         if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1572                 wants_all_sockets_bg = wants_darwinbg = TRUE;
1573         }
1574
1575         if (requested.thrp_pidbind_bg) {
1576                 wants_all_sockets_bg = wants_darwinbg = TRUE;
1577         }
1578
1579         if (task_effective.tep_darwinbg) {
1580                 wants_darwinbg = TRUE;
1581         }
1582
1583         if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1584             next.thep_qos == THREAD_QOS_MAINTENANCE) {
1585                 wants_darwinbg = TRUE;
1586         }
1587
1588         /* Calculate side effects of DARWIN_BG */
1589
1590         if (wants_darwinbg) {
1591                 next.thep_darwinbg = 1;
1592         }
1593
1594         if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1595                 next.thep_new_sockets_bg = 1;
1596         }
1597
1598         /* Don't use task_effective.tep_all_sockets_bg here */
1599         if (wants_all_sockets_bg) {
1600                 next.thep_all_sockets_bg = 1;
1601         }
1602
1603         /* darwinbg implies background QOS (or lower) */
1604         if (next.thep_darwinbg &&
1605             (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1606                 next.thep_qos = THREAD_QOS_BACKGROUND;
1607                 next.thep_qos_relprio = 0;
1608         }
1609
1610         /* Calculate IO policy */
1611
1612         int iopol = THROTTLE_LEVEL_TIER0;
1613
1614         /* Factor in the task's IO policy */
1615         if (next.thep_darwinbg) {
1616                 iopol = MAX(iopol, task_effective.tep_bg_iotier);
1617         }
1618
1619         iopol = MAX(iopol, task_effective.tep_io_tier);
1620
1621         /* Look up the associated IO tier value for the QoS class */
1622         iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1623
1624         iopol = MAX(iopol, requested.thrp_int_iotier);
1625         iopol = MAX(iopol, requested.thrp_ext_iotier);
1626
1627         next.thep_io_tier = iopol;
1628
1629         /*
1630          * If a QoS override is causing IO to go into a lower tier, we also set
1631          * the passive bit so that a thread doesn't end up stuck in its own throttle
1632          * window when the override goes away.
1633          */
1634         boolean_t qos_io_override_active = FALSE;
1635         if (thread_qos_policy_params.qos_iotier[next.thep_qos] <
1636             thread_qos_policy_params.qos_iotier[requested.thrp_qos]) {
1637                 qos_io_override_active = TRUE;
1638         }
1639
1640         /* Calculate Passive IO policy */
1641         if (requested.thrp_ext_iopassive ||
1642             requested.thrp_int_iopassive ||
1643             qos_io_override_active ||
1644             task_effective.tep_io_passive) {
1645                 next.thep_io_passive = 1;
1646         }
1647
1648         /* Calculate timer QOS */
1649         uint32_t latency_qos = requested.thrp_latency_qos;
1650
1651         latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1652         latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1653
1654         next.thep_latency_qos = latency_qos;
1655
1656         /* Calculate throughput QOS */
1657         uint32_t through_qos = requested.thrp_through_qos;
1658
1659         through_qos = MAX(through_qos, task_effective.tep_through_qos);
1660         through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1661
1662         next.thep_through_qos = through_qos;
1663
1664         if (task_effective.tep_terminated || requested.thrp_terminated) {
1665                 /* Shoot down the throttles that slow down exit or response to SIGTERM */
1666                 next.thep_terminated    = 1;
1667                 next.thep_darwinbg      = 0;
1668                 next.thep_io_tier       = THROTTLE_LEVEL_TIER0;
1669                 next.thep_qos           = THREAD_QOS_UNSPECIFIED;
1670                 next.thep_latency_qos   = LATENCY_QOS_TIER_UNSPECIFIED;
1671                 next.thep_through_qos   = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1672         }
1673
1674         /*
1675          * Step 3:
1676          *  Swap out old policy for new policy
1677          */
1678
1679         struct thread_effective_policy prev = thread->effective_policy;
1680
1681         thread_update_qos_cpu_time_locked(thread);
1682
1683         /* This is the point where the new values become visible to other threads */
1684         thread->effective_policy = next;
1685
1686         /*
1687          * Step 4:
1688          *  Pend updates that can't be done while holding the thread lock
1689          */
1690
1691         if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1692                 pend_token->tpt_update_sockets = 1;
1693         }
1694
1695         /* TODO: Doesn't this only need to be done if the throttle went up? */
1696         if (prev.thep_io_tier != next.thep_io_tier) {
1697                 pend_token->tpt_update_throttle = 1;
1698         }
1699
1700         /*
1701          * Check for the attributes that sfi_thread_classify() consults,
1702          *  and trigger SFI re-evaluation.
1703          */
1704         if (prev.thep_qos != next.thep_qos ||
1705             prev.thep_darwinbg != next.thep_darwinbg) {
1706                 pend_token->tpt_update_thread_sfi = 1;
1707         }
1708
1709         integer_t old_base_pri = thread->base_pri;
1710
1711         /*
1712          * Step 5:
1713          *  Update other subsystems as necessary if something has changed
1714          */
1715
1716         /* Check for the attributes that thread_recompute_priority() consults */
1717         if (prev.thep_qos != next.thep_qos ||
1718             prev.thep_qos_relprio != next.thep_qos_relprio ||
1719             prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1720             prev.thep_terminated != next.thep_terminated ||
1721             pend_token->tpt_force_recompute_pri == 1 ||
1722             recompute_priority) {
1723                 thread_recompute_priority(thread);
1724         }
1725
1726         /*
1727          * Check if the thread is waiting on a turnstile and needs priority propagation.
1728          */
1729         if (pend_token->tpt_update_turnstile &&
1730             ((old_base_pri == thread->base_pri) ||
1731             !thread_get_waiting_turnstile(thread))) {
1732                 /*
1733                  * Reset update turnstile pend token since either
1734                  * the thread priority did not change or thread is
1735                  * not blocked on a turnstile.
1736                  */
1737                 pend_token->tpt_update_turnstile = 0;
1738         }
1739 }
1740
1741
1742 /*
1743  * Initiate a thread policy state transition on a thread with its TID
1744  * Useful if you cannot guarantee the thread won't get terminated
1745  * Precondition: No locks are held
1746  * Will take task lock - using the non-tid variant is faster
1747  * if you already have a thread ref.
1748  */
1749 void
1750 proc_set_thread_policy_with_tid(task_t     task,
1751     uint64_t   tid,
1752     int        category,
1753     int        flavor,
1754     int        value)
1755 {
1756         /* takes task lock, returns ref'ed thread or NULL */
1757         thread_t thread = task_findtid(task, tid);
1758
1759         if (thread == THREAD_NULL) {
1760                 return;
1761         }
1762
1763         proc_set_thread_policy(thread, category, flavor, value);
1764
1765         thread_deallocate(thread);
1766 }
1767
1768 /*
1769  * Initiate a thread policy transition on a thread
1770  * This path supports networking transitions (i.e. darwinbg transitions)
1771  * Precondition: No locks are held
1772  */
1773 void
1774 proc_set_thread_policy(thread_t   thread,
1775     int        category,
1776     int        flavor,
1777     int        value)
1778 {
1779         struct task_pend_token pend_token = {};
1780
1781         thread_mtx_lock(thread);
1782
1783         proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
1784
1785         thread_mtx_unlock(thread);
1786
1787         thread_policy_update_complete_unlocked(thread, &pend_token);
1788 }
1789
1790 /*
1791  * Do the things that can't be done while holding a thread mutex.
1792  * These are set up to call back into thread policy to get the latest value,
1793  * so they don't have to be synchronized with the update.
1794  * The only required semantic is 'call this sometime after updating effective policy'
1795  *
1796  * Precondition: Thread mutex is not held
1797  *
1798  * This may be called with the task lock held, but in that case it won't be
1799  * called with tpt_update_sockets set.
1800  */
1801 void
1802 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1803 {
1804 #ifdef MACH_BSD
1805         if (pend_token->tpt_update_sockets) {
1806                 proc_apply_task_networkbg(thread->task->bsd_info, thread);
1807         }
1808 #endif /* MACH_BSD */
1809
1810         if (pend_token->tpt_update_throttle) {
1811                 rethrottle_thread(thread->uthread);
1812         }
1813
1814         if (pend_token->tpt_update_thread_sfi) {
1815                 sfi_reevaluate(thread);
1816         }
1817
1818         if (pend_token->tpt_update_turnstile) {
1819                 turnstile_update_thread_priority_chain(thread);
1820         }
1821 }
1822
1823 /*
1824  * Set and update thread policy
1825  * Thread mutex might be held
1826  */
1827 static void
1828 proc_set_thread_policy_locked(thread_t          thread,
1829     int               category,
1830     int               flavor,
1831     int               value,
1832     int               value2,
1833     task_pend_token_t pend_token)
1834 {
1835         spl_t s = splsched();
1836         thread_lock(thread);
1837
1838         proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1839
1840         thread_unlock(thread);
1841         splx(s);
1842 }
1843
1844 /*
1845  * Set and update thread policy
1846  * Thread spinlock is held
1847  */
1848 static void
1849 proc_set_thread_policy_spinlocked(thread_t          thread,
1850     int               category,
1851     int               flavor,
1852     int               value,
1853     int               value2,
1854     task_pend_token_t pend_token)
1855 {
1856         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1857             (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1858             thread_tid(thread), threquested_0(thread),
1859             threquested_1(thread), value, 0);
1860
1861         thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1862
1863         thread_policy_update_spinlocked(thread, FALSE, pend_token);
1864
1865         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1866             (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1867             thread_tid(thread), threquested_0(thread),
1868             threquested_1(thread), tpending(pend_token), 0);
1869 }
1870
1871 /*
1872  * Set the requested state for a specific flavor to a specific value.
1873  */
1874 static void
1875 thread_set_requested_policy_spinlocked(thread_t     thread,
1876     int               category,
1877     int               flavor,
1878     int               value,
1879     int               value2,
1880     task_pend_token_t pend_token)
1881 {
1882         int tier, passive;
1883
1884         struct thread_requested_policy requested = thread->requested_policy;
1885
1886         switch (flavor) {
1887         /* Category: EXTERNAL and INTERNAL, thread and task */
1888
1889         case TASK_POLICY_DARWIN_BG:
1890                 if (category == TASK_POLICY_EXTERNAL) {
1891                         requested.thrp_ext_darwinbg = value;
1892                 } else {
1893                         requested.thrp_int_darwinbg = value;
1894                 }
1895                 break;
1896
1897         case TASK_POLICY_IOPOL:
1898                 proc_iopol_to_tier(value, &tier, &passive);
1899                 if (category == TASK_POLICY_EXTERNAL) {
1900                         requested.thrp_ext_iotier  = tier;
1901                         requested.thrp_ext_iopassive = passive;
1902                 } else {
1903                         requested.thrp_int_iotier  = tier;
1904                         requested.thrp_int_iopassive = passive;
1905                 }
1906                 break;
1907
1908         case TASK_POLICY_IO:
1909                 if (category == TASK_POLICY_EXTERNAL) {
1910                         requested.thrp_ext_iotier = value;
1911                 } else {
1912                         requested.thrp_int_iotier = value;
1913                 }
1914                 break;
1915
1916         case TASK_POLICY_PASSIVE_IO:
1917                 if (category == TASK_POLICY_EXTERNAL) {
1918                         requested.thrp_ext_iopassive = value;
1919                 } else {
1920                         requested.thrp_int_iopassive = value;
1921                 }
1922                 break;
1923
1924         /* Category: ATTRIBUTE, thread only */
1925
1926         case TASK_POLICY_PIDBIND_BG:
1927                 assert(category == TASK_POLICY_ATTRIBUTE);
1928                 requested.thrp_pidbind_bg = value;
1929                 break;
1930
1931         case TASK_POLICY_LATENCY_QOS:
1932                 assert(category == TASK_POLICY_ATTRIBUTE);
1933                 requested.thrp_latency_qos = value;
1934                 break;
1935
1936         case TASK_POLICY_THROUGH_QOS:
1937                 assert(category == TASK_POLICY_ATTRIBUTE);
1938                 requested.thrp_through_qos = value;
1939                 break;
1940
1941         case TASK_POLICY_QOS_OVERRIDE:
1942                 assert(category == TASK_POLICY_ATTRIBUTE);
1943                 requested.thrp_qos_override = value;
1944                 pend_token->tpt_update_turnstile = 1;
1945                 break;
1946
1947         case TASK_POLICY_QOS_AND_RELPRIO:
1948                 assert(category == TASK_POLICY_ATTRIBUTE);
1949                 requested.thrp_qos = value;
1950                 requested.thrp_qos_relprio = value2;
1951                 pend_token->tpt_update_turnstile = 1;
1952                 DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
1953                 break;
1954
1955         case TASK_POLICY_QOS_WORKQ_OVERRIDE:
1956                 assert(category == TASK_POLICY_ATTRIBUTE);
1957                 requested.thrp_qos_workq_override = value;
1958                 pend_token->tpt_update_turnstile = 1;
1959                 break;
1960
1961         case TASK_POLICY_QOS_PROMOTE:
1962                 assert(category == TASK_POLICY_ATTRIBUTE);
1963                 requested.thrp_qos_promote = value;
1964                 break;
1965
1966         case TASK_POLICY_QOS_KEVENT_OVERRIDE:
1967                 assert(category == TASK_POLICY_ATTRIBUTE);
1968                 requested.thrp_qos_kevent_override = value;
1969                 pend_token->tpt_update_turnstile = 1;
1970                 break;
1971
1972         case TASK_POLICY_QOS_SERVICER_OVERRIDE:
1973                 assert(category == TASK_POLICY_ATTRIBUTE);
1974                 requested.thrp_qos_wlsvc_override = value;
1975                 pend_token->tpt_update_turnstile = 1;
1976                 break;
1977
1978         case TASK_POLICY_TERMINATED:
1979                 assert(category == TASK_POLICY_ATTRIBUTE);
1980                 requested.thrp_terminated = value;
1981                 break;
1982
1983         default:
1984                 panic("unknown task policy: %d %d %d", category, flavor, value);
1985                 break;
1986         }
1987
1988         thread->requested_policy = requested;
1989 }
1990
1991 /*
1992  * Gets what you set. Effective values may be different.
1993  * Precondition: No locks are held
1994  */
1995 int
1996 proc_get_thread_policy(thread_t   thread,
1997     int        category,
1998     int        flavor)
1999 {
2000         int value = 0;
2001         thread_mtx_lock(thread);
2002         value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2003         thread_mtx_unlock(thread);
2004         return value;
2005 }
2006
2007 static int
2008 proc_get_thread_policy_locked(thread_t   thread,
2009     int        category,
2010     int        flavor,
2011     int*       value2)
2012 {
2013         int value = 0;
2014
2015         spl_t s = splsched();
2016         thread_lock(thread);
2017
2018         value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2019
2020         thread_unlock(thread);
2021         splx(s);
2022
2023         return value;
2024 }
2025
2026 /*
2027  * Gets what you set. Effective values may be different.
2028  */
2029 static int
2030 thread_get_requested_policy_spinlocked(thread_t thread,
2031     int      category,
2032     int      flavor,
2033     int*     value2)
2034 {
2035         int value = 0;
2036
2037         struct thread_requested_policy requested = thread->requested_policy;
2038
2039         switch (flavor) {
2040         case TASK_POLICY_DARWIN_BG:
2041                 if (category == TASK_POLICY_EXTERNAL) {
2042                         value = requested.thrp_ext_darwinbg;
2043                 } else {
2044                         value = requested.thrp_int_darwinbg;
2045                 }
2046                 break;
2047         case TASK_POLICY_IOPOL:
2048                 if (category == TASK_POLICY_EXTERNAL) {
2049                         value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2050                             requested.thrp_ext_iopassive);
2051                 } else {
2052                         value = proc_tier_to_iopol(requested.thrp_int_iotier,
2053                             requested.thrp_int_iopassive);
2054                 }
2055                 break;
2056         case TASK_POLICY_IO:
2057                 if (category == TASK_POLICY_EXTERNAL) {
2058                         value = requested.thrp_ext_iotier;
2059                 } else {
2060                         value = requested.thrp_int_iotier;
2061                 }
2062                 break;
2063         case TASK_POLICY_PASSIVE_IO:
2064                 if (category == TASK_POLICY_EXTERNAL) {
2065                         value = requested.thrp_ext_iopassive;
2066                 } else {
2067                         value = requested.thrp_int_iopassive;
2068                 }
2069                 break;
2070         case TASK_POLICY_QOS:
2071                 assert(category == TASK_POLICY_ATTRIBUTE);
2072                 value = requested.thrp_qos;
2073                 break;
2074         case TASK_POLICY_QOS_OVERRIDE:
2075                 assert(category == TASK_POLICY_ATTRIBUTE);
2076                 value = requested.thrp_qos_override;
2077                 break;
2078         case TASK_POLICY_LATENCY_QOS:
2079                 assert(category == TASK_POLICY_ATTRIBUTE);
2080                 value = requested.thrp_latency_qos;
2081                 break;
2082         case TASK_POLICY_THROUGH_QOS:
2083                 assert(category == TASK_POLICY_ATTRIBUTE);
2084                 value = requested.thrp_through_qos;
2085                 break;
2086         case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2087                 assert(category == TASK_POLICY_ATTRIBUTE);
2088                 value = requested.thrp_qos_workq_override;
2089                 break;
2090         case TASK_POLICY_QOS_AND_RELPRIO:
2091                 assert(category == TASK_POLICY_ATTRIBUTE);
2092                 assert(value2 != NULL);
2093                 value = requested.thrp_qos;
2094                 *value2 = requested.thrp_qos_relprio;
2095                 break;
2096         case TASK_POLICY_QOS_PROMOTE:
2097                 assert(category == TASK_POLICY_ATTRIBUTE);
2098                 value = requested.thrp_qos_promote;
2099                 break;
2100         case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2101                 assert(category == TASK_POLICY_ATTRIBUTE);
2102                 value = requested.thrp_qos_kevent_override;
2103                 break;
2104         case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2105                 assert(category == TASK_POLICY_ATTRIBUTE);
2106                 value = requested.thrp_qos_wlsvc_override;
2107                 break;
2108         case TASK_POLICY_TERMINATED:
2109                 assert(category == TASK_POLICY_ATTRIBUTE);
2110                 value = requested.thrp_terminated;
2111                 break;
2112
2113         default:
2114                 panic("unknown policy_flavor %d", flavor);
2115                 break;
2116         }
2117
2118         return value;
2119 }
2120
2121 /*
2122  * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2123  *
2124  * NOTE: This accessor does not take the task or thread lock.
2125  * Notifications of state updates need to be externally synchronized with state queries.
2126  * This routine *MUST* remain interrupt safe, as it is potentially invoked
2127  * within the context of a timer interrupt.
2128  *
2129  * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2130  *      Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2131  *      I don't think that cost is worth not having the right answer.
2132  */
2133 int
2134 proc_get_effective_thread_policy(thread_t thread,
2135     int      flavor)
2136 {
2137         int value = 0;
2138
2139         switch (flavor) {
2140         case TASK_POLICY_DARWIN_BG:
2141                 /*
2142                  * This call is used within the timer layer, as well as
2143                  * prioritizing requests to the graphics system.
2144                  * It also informs SFI and originator-bg-state.
2145                  * Returns 1 for background mode, 0 for normal mode
2146                  */
2147
2148                 value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2149                 break;
2150         case TASK_POLICY_IO:
2151                 /*
2152                  * The I/O system calls here to find out what throttling tier to apply to an operation.
2153                  * Returns THROTTLE_LEVEL_* values
2154                  */
2155                 value = thread->effective_policy.thep_io_tier;
2156                 if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2157                         value = MIN(value, thread->iotier_override);
2158                 }
2159                 break;
2160         case TASK_POLICY_PASSIVE_IO:
2161                 /*
2162                  * The I/O system calls here to find out whether an operation should be passive.
2163                  * (i.e. not cause operations with lower throttle tiers to be throttled)
2164                  * Returns 1 for passive mode, 0 for normal mode
2165                  *
2166                  * If an override is causing IO to go into a lower tier, we also set
2167                  * the passive bit so that a thread doesn't end up stuck in its own throttle
2168                  * window when the override goes away.
2169                  */
2170                 value = thread->effective_policy.thep_io_passive ? 1 : 0;
2171                 if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2172                     thread->iotier_override < thread->effective_policy.thep_io_tier) {
2173                         value = 1;
2174                 }
2175                 break;
2176         case TASK_POLICY_ALL_SOCKETS_BG:
2177                 /*
2178                  * do_background_socket() calls this to determine whether
2179                  * it should change the thread's sockets
2180                  * Returns 1 for background mode, 0 for normal mode
2181                  * This consults both thread and task so un-DBGing a thread while the task is BG
2182                  * doesn't get you out of the network throttle.
2183                  */
2184                 value = (thread->effective_policy.thep_all_sockets_bg ||
2185                     thread->task->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2186                 break;
2187         case TASK_POLICY_NEW_SOCKETS_BG:
2188                 /*
2189                  * socreate() calls this to determine if it should mark a new socket as background
2190                  * Returns 1 for background mode, 0 for normal mode
2191                  */
2192                 value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2193                 break;
2194         case TASK_POLICY_LATENCY_QOS:
2195                 /*
2196                  * timer arming calls into here to find out the timer coalescing level
2197                  * Returns a latency QoS tier (0-6)
2198                  */
2199                 value = thread->effective_policy.thep_latency_qos;
2200                 break;
2201         case TASK_POLICY_THROUGH_QOS:
2202                 /*
2203                  * This value is passed into the urgency callout from the scheduler
2204                  * to the performance management subsystem.
2205                  *
2206                  * Returns a throughput QoS tier (0-6)
2207                  */
2208                 value = thread->effective_policy.thep_through_qos;
2209                 break;
2210         case TASK_POLICY_QOS:
2211                 /*
2212                  * This is communicated to the performance management layer and SFI.
2213                  *
2214                  * Returns a QoS policy tier
2215                  */
2216                 value = thread->effective_policy.thep_qos;
2217                 break;
2218         default:
2219                 panic("unknown thread policy flavor %d", flavor);
2220                 break;
2221         }
2222
2223         return value;
2224 }
2225
2226
2227 /*
2228  * (integer_t) casts limit the number of bits we can fit here
2229  * this interface is deprecated and replaced by the _EXT struct ?
2230  */
2231 static void
2232 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2233 {
2234         uint64_t bits = 0;
2235         struct thread_requested_policy requested = thread->requested_policy;
2236
2237         bits |= (requested.thrp_int_darwinbg    ? POLICY_REQ_INT_DARWIN_BG  : 0);
2238         bits |= (requested.thrp_ext_darwinbg    ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2239         bits |= (requested.thrp_int_iotier      ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2240         bits |= (requested.thrp_ext_iotier      ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2241         bits |= (requested.thrp_int_iopassive   ? POLICY_REQ_INT_PASSIVE_IO : 0);
2242         bits |= (requested.thrp_ext_iopassive   ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2243
2244         bits |= (requested.thrp_qos             ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2245         bits |= (requested.thrp_qos_override    ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT)   : 0);
2246
2247         bits |= (requested.thrp_pidbind_bg      ? POLICY_REQ_PIDBIND_BG     : 0);
2248
2249         bits |= (requested.thrp_latency_qos     ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2250         bits |= (requested.thrp_through_qos     ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2251
2252         info->requested = (integer_t) bits;
2253         bits = 0;
2254
2255         struct thread_effective_policy effective = thread->effective_policy;
2256
2257         bits |= (effective.thep_darwinbg        ? POLICY_EFF_DARWIN_BG      : 0);
2258
2259         bits |= (effective.thep_io_tier         ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2260         bits |= (effective.thep_io_passive      ? POLICY_EFF_IO_PASSIVE     : 0);
2261         bits |= (effective.thep_all_sockets_bg  ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2262         bits |= (effective.thep_new_sockets_bg  ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2263
2264         bits |= (effective.thep_qos             ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2265
2266         bits |= (effective.thep_latency_qos     ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2267         bits |= (effective.thep_through_qos     ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2268
2269         info->effective = (integer_t)bits;
2270         bits = 0;
2271
2272         info->pending = 0;
2273 }
2274
2275 /*
2276  * Sneakily trace either the task and thread requested
2277  * or just the thread requested, depending on if we have enough room.
2278  * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2279  *
2280  *                                LP32            LP64
2281  * threquested_0(thread)          thread[0]       task[0]
2282  * threquested_1(thread)          thread[1]       thread[0]
2283  *
2284  */
2285
2286 uintptr_t
2287 threquested_0(thread_t thread)
2288 {
2289         static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2290
2291         uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2292
2293         return raw[0];
2294 }
2295
2296 uintptr_t
2297 threquested_1(thread_t thread)
2298 {
2299 #if defined __LP64__
2300         return *(uintptr_t*)&thread->task->requested_policy;
2301 #else
2302         uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2303         return raw[1];
2304 #endif
2305 }
2306
2307 uintptr_t
2308 theffective_0(thread_t thread)
2309 {
2310         static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2311
2312         uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2313         return raw[0];
2314 }
2315
2316 uintptr_t
2317 theffective_1(thread_t thread)
2318 {
2319 #if defined __LP64__
2320         return *(uintptr_t*)&thread->task->effective_policy;
2321 #else
2322         uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2323         return raw[1];
2324 #endif
2325 }
2326
2327
2328 /*
2329  * Set an override on the thread which is consulted with a
2330  * higher priority than the task/thread policy. This should
2331  * only be set for temporary grants until the thread
2332  * returns to the userspace boundary
2333  *
2334  * We use atomic operations to swap in the override, with
2335  * the assumption that the thread itself can
2336  * read the override and clear it on return to userspace.
2337  *
2338  * No locking is performed, since it is acceptable to see
2339  * a stale override for one loop through throttle_lowpri_io().
2340  * However a thread reference must be held on the thread.
2341  */
2342
2343 void
2344 set_thread_iotier_override(thread_t thread, int policy)
2345 {
2346         int current_override;
2347
2348         /* Let most aggressive I/O policy win until user boundary */
2349         do {
2350                 current_override = thread->iotier_override;
2351
2352                 if (current_override != THROTTLE_LEVEL_NONE) {
2353                         policy = MIN(current_override, policy);
2354                 }
2355
2356                 if (current_override == policy) {
2357                         /* no effective change */
2358                         return;
2359                 }
2360         } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2361
2362         /*
2363          * Since the thread may be currently throttled,
2364          * re-evaluate tiers and potentially break out
2365          * of an msleep
2366          */
2367         rethrottle_thread(thread->uthread);
2368 }
2369
2370 /*
2371  * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2372  * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2373  * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2374  * priority thread. In these cases, we attempt to propagate the priority token, as long
2375  * as the subsystem informs us of the relationships between the threads. The userspace
2376  * synchronization subsystem should maintain the information of owner->resource and
2377  * resource->waiters itself.
2378  */
2379
2380 /*
2381  * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2382  * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2383  * to be handled specially in the future, but for now it's fine to slam
2384  * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2385  */
2386 static void
2387 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2388 {
2389         if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2390                 /* Map all input resource/type to a single one */
2391                 *resource = USER_ADDR_NULL;
2392                 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2393         } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2394                 /* no transform */
2395         } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2396                 /* Map all mutex overrides to a single one, to avoid memory overhead */
2397                 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2398                         *resource = USER_ADDR_NULL;
2399                 }
2400         }
2401 }
2402
2403 /* This helper routine finds an existing override if known. Locking should be done by caller */
2404 static struct thread_qos_override *
2405 find_qos_override(thread_t thread,
2406     user_addr_t resource,
2407     int resource_type)
2408 {
2409         struct thread_qos_override *override;
2410
2411         override = thread->overrides;
2412         while (override) {
2413                 if (override->override_resource == resource &&
2414                     override->override_resource_type == resource_type) {
2415                         return override;
2416                 }
2417
2418                 override = override->override_next;
2419         }
2420
2421         return NULL;
2422 }
2423
2424 static void
2425 find_and_decrement_qos_override(thread_t       thread,
2426     user_addr_t    resource,
2427     int            resource_type,
2428     boolean_t      reset,
2429     struct thread_qos_override **free_override_list)
2430 {
2431         struct thread_qos_override *override, *override_prev;
2432
2433         override_prev = NULL;
2434         override = thread->overrides;
2435         while (override) {
2436                 struct thread_qos_override *override_next = override->override_next;
2437
2438                 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2439                     (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2440                         if (reset) {
2441                                 override->override_contended_resource_count = 0;
2442                         } else {
2443                                 override->override_contended_resource_count--;
2444                         }
2445
2446                         if (override->override_contended_resource_count == 0) {
2447                                 if (override_prev == NULL) {
2448                                         thread->overrides = override_next;
2449                                 } else {
2450                                         override_prev->override_next = override_next;
2451                                 }
2452
2453                                 /* Add to out-param for later zfree */
2454                                 override->override_next = *free_override_list;
2455                                 *free_override_list = override;
2456                         } else {
2457                                 override_prev = override;
2458                         }
2459
2460                         if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2461                                 return;
2462                         }
2463                 } else {
2464                         override_prev = override;
2465                 }
2466
2467                 override = override_next;
2468         }
2469 }
2470
2471 /* This helper recalculates the current requested override using the policy selected at boot */
2472 static int
2473 calculate_requested_qos_override(thread_t thread)
2474 {
2475         if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2476                 return THREAD_QOS_UNSPECIFIED;
2477         }
2478
2479         /* iterate over all overrides and calculate MAX */
2480         struct thread_qos_override *override;
2481         int qos_override = THREAD_QOS_UNSPECIFIED;
2482
2483         override = thread->overrides;
2484         while (override) {
2485                 qos_override = MAX(qos_override, override->override_qos);
2486                 override = override->override_next;
2487         }
2488
2489         return qos_override;
2490 }
2491
2492 /*
2493  * Returns:
2494  * - 0 on success
2495  * - EINVAL if some invalid input was passed
2496  */
2497 static int
2498 proc_thread_qos_add_override_internal(thread_t         thread,
2499     int              override_qos,
2500     boolean_t        first_override_for_resource,
2501     user_addr_t      resource,
2502     int              resource_type)
2503 {
2504         struct task_pend_token pend_token = {};
2505         int rc = 0;
2506
2507         thread_mtx_lock(thread);
2508
2509         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2510             thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2511
2512         DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2513             uint64_t, thread->requested_policy.thrp_qos,
2514             uint64_t, thread->effective_policy.thep_qos,
2515             int, override_qos, boolean_t, first_override_for_resource);
2516
2517         struct thread_qos_override *override;
2518         struct thread_qos_override *override_new = NULL;
2519         int new_qos_override, prev_qos_override;
2520         int new_effective_qos;
2521
2522         canonicalize_resource_and_type(&resource, &resource_type);
2523
2524         override = find_qos_override(thread, resource, resource_type);
2525         if (first_override_for_resource && !override) {
2526                 /* We need to allocate a new object. Drop the thread lock and
2527                  * recheck afterwards in case someone else added the override
2528                  */
2529                 thread_mtx_unlock(thread);
2530                 override_new = zalloc(thread_qos_override_zone);
2531                 thread_mtx_lock(thread);
2532                 override = find_qos_override(thread, resource, resource_type);
2533         }
2534         if (first_override_for_resource && override) {
2535                 /* Someone else already allocated while the thread lock was dropped */
2536                 override->override_contended_resource_count++;
2537         } else if (!override && override_new) {
2538                 override = override_new;
2539                 override_new = NULL;
2540                 override->override_next = thread->overrides;
2541                 /* since first_override_for_resource was TRUE */
2542                 override->override_contended_resource_count = 1;
2543                 override->override_resource = resource;
2544                 override->override_resource_type = resource_type;
2545                 override->override_qos = THREAD_QOS_UNSPECIFIED;
2546                 thread->overrides = override;
2547         }
2548
2549         if (override) {
2550                 if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2551                         override->override_qos = override_qos;
2552                 } else {
2553                         override->override_qos = MAX(override->override_qos, override_qos);
2554                 }
2555         }
2556
2557         /* Determine how to combine the various overrides into a single current
2558          * requested override
2559          */
2560         new_qos_override = calculate_requested_qos_override(thread);
2561
2562         prev_qos_override = proc_get_thread_policy_locked(thread,
2563             TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2564
2565         if (new_qos_override != prev_qos_override) {
2566                 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2567                     TASK_POLICY_QOS_OVERRIDE,
2568                     new_qos_override, 0, &pend_token);
2569         }
2570
2571         new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2572
2573         thread_mtx_unlock(thread);
2574
2575         thread_policy_update_complete_unlocked(thread, &pend_token);
2576
2577         if (override_new) {
2578                 zfree(thread_qos_override_zone, override_new);
2579         }
2580
2581         DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2582             int, new_qos_override, int, new_effective_qos, int, rc);
2583
2584         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2585             new_qos_override, resource, resource_type, 0, 0);
2586
2587         return rc;
2588 }
2589
2590 int
2591 proc_thread_qos_add_override(task_t           task,
2592     thread_t         thread,
2593     uint64_t         tid,
2594     int              override_qos,
2595     boolean_t        first_override_for_resource,
2596     user_addr_t      resource,
2597     int              resource_type)
2598 {
2599         boolean_t has_thread_reference = FALSE;
2600         int rc = 0;
2601
2602         if (thread == THREAD_NULL) {
2603                 thread = task_findtid(task, tid);
2604                 /* returns referenced thread */
2605
2606                 if (thread == THREAD_NULL) {
2607                         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2608                             tid, 0, 0xdead, 0, 0);
2609                         return ESRCH;
2610                 }
2611                 has_thread_reference = TRUE;
2612         } else {
2613                 assert(thread->task == task);
2614         }
2615         rc = proc_thread_qos_add_override_internal(thread, override_qos,
2616             first_override_for_resource, resource, resource_type);
2617         if (has_thread_reference) {
2618                 thread_deallocate(thread);
2619         }
2620
2621         return rc;
2622 }
2623
2624 static void
2625 proc_thread_qos_remove_override_internal(thread_t       thread,
2626     user_addr_t    resource,
2627     int            resource_type,
2628     boolean_t      reset)
2629 {
2630         struct task_pend_token pend_token = {};
2631
2632         struct thread_qos_override *deferred_free_override_list = NULL;
2633         int new_qos_override, prev_qos_override, new_effective_qos;
2634
2635         thread_mtx_lock(thread);
2636
2637         canonicalize_resource_and_type(&resource, &resource_type);
2638
2639         find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2640
2641         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2642             thread_tid(thread), resource, reset, 0, 0);
2643
2644         DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2645             uint64_t, thread->requested_policy.thrp_qos,
2646             uint64_t, thread->effective_policy.thep_qos);
2647
2648         /* Determine how to combine the various overrides into a single current requested override */
2649         new_qos_override = calculate_requested_qos_override(thread);
2650
2651         spl_t s = splsched();
2652         thread_lock(thread);
2653
2654         /*
2655          * The override chain and therefore the value of the current override is locked with thread mutex,
2656          * so we can do a get/set without races.  However, the rest of thread policy is locked under the spinlock.
2657          * This means you can't change the current override from a spinlock-only setter.
2658          */
2659         prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2660
2661         if (new_qos_override != prev_qos_override) {
2662                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2663         }
2664
2665         new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2666
2667         thread_unlock(thread);
2668         splx(s);
2669
2670         thread_mtx_unlock(thread);
2671
2672         thread_policy_update_complete_unlocked(thread, &pend_token);
2673
2674         while (deferred_free_override_list) {
2675                 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2676
2677                 zfree(thread_qos_override_zone, deferred_free_override_list);
2678                 deferred_free_override_list = override_next;
2679         }
2680
2681         DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2682             int, new_qos_override, int, new_effective_qos);
2683
2684         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2685             thread_tid(thread), 0, 0, 0, 0);
2686 }
2687
2688 int
2689 proc_thread_qos_remove_override(task_t      task,
2690     thread_t    thread,
2691     uint64_t    tid,
2692     user_addr_t resource,
2693     int         resource_type)
2694 {
2695         boolean_t has_thread_reference = FALSE;
2696
2697         if (thread == THREAD_NULL) {
2698                 thread = task_findtid(task, tid);
2699                 /* returns referenced thread */
2700
2701                 if (thread == THREAD_NULL) {
2702                         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2703                             tid, 0, 0xdead, 0, 0);
2704                         return ESRCH;
2705                 }
2706                 has_thread_reference = TRUE;
2707         } else {
2708                 assert(task == thread->task);
2709         }
2710
2711         proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2712
2713         if (has_thread_reference) {
2714                 thread_deallocate(thread);
2715         }
2716
2717         return 0;
2718 }
2719
2720 /* Deallocate before thread termination */
2721 void
2722 proc_thread_qos_deallocate(thread_t thread)
2723 {
2724         /* This thread must have no more IPC overrides. */
2725         assert(thread->kevent_overrides == 0);
2726         assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2727         assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2728
2729         /*
2730          * Clear out any lingering override objects.
2731          */
2732         struct thread_qos_override *override;
2733
2734         thread_mtx_lock(thread);
2735         override = thread->overrides;
2736         thread->overrides = NULL;
2737         thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2738         /* We don't need to re-evaluate thread policy here because the thread has already exited */
2739         thread_mtx_unlock(thread);
2740
2741         while (override) {
2742                 struct thread_qos_override *override_next = override->override_next;
2743
2744                 zfree(thread_qos_override_zone, override);
2745                 override = override_next;
2746         }
2747 }
2748
2749 /*
2750  * Set up the primordial thread's QoS
2751  */
2752 void
2753 task_set_main_thread_qos(task_t task, thread_t thread)
2754 {
2755         struct task_pend_token pend_token = {};
2756
2757         assert(thread->task == task);
2758
2759         thread_mtx_lock(thread);
2760
2761         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2762             (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2763             thread_tid(thread), threquested_0(thread), threquested_1(thread),
2764             thread->requested_policy.thrp_qos, 0);
2765
2766         int primordial_qos = task_compute_main_thread_qos(task);
2767
2768         proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2769             primordial_qos, 0, &pend_token);
2770
2771         thread_mtx_unlock(thread);
2772
2773         thread_policy_update_complete_unlocked(thread, &pend_token);
2774
2775         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2776             (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2777             thread_tid(thread), threquested_0(thread), threquested_1(thread),
2778             primordial_qos, 0);
2779 }
2780
2781 /*
2782  * KPI for pthread kext
2783  *
2784  * Return a good guess at what the initial manager QoS will be
2785  * Dispatch can override this in userspace if it so chooses
2786  */
2787 int
2788 task_get_default_manager_qos(task_t task)
2789 {
2790         int primordial_qos = task_compute_main_thread_qos(task);
2791
2792         if (primordial_qos == THREAD_QOS_LEGACY) {
2793                 primordial_qos = THREAD_QOS_USER_INITIATED;
2794         }
2795
2796         return primordial_qos;
2797 }
2798
2799 /*
2800  * Check if the kernel promotion on thread has changed
2801  * and apply it.
2802  *
2803  * thread locked on entry and exit
2804  */
2805 boolean_t
2806 thread_recompute_kernel_promotion_locked(thread_t thread)
2807 {
2808         boolean_t needs_update = FALSE;
2809         int kern_promotion_schedpri = thread_get_inheritor_turnstile_sched_priority(thread);
2810
2811         /*
2812          * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
2813          * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
2814          * and propagates the priority through the chain with the same cap, because as of now it does
2815          * not differenciate on the kernel primitive.
2816          *
2817          * If this assumption will change with the adoption of a kernel primitive that does not
2818          * cap the when adding/propagating,
2819          * then here is the place to put the generic cap for all kernel primitives
2820          * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
2821          */
2822         assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
2823
2824         if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
2825                 KDBG(MACHDBG_CODE(
2826                             DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
2827                     thread_tid(thread),
2828                     kern_promotion_schedpri,
2829                     thread->kern_promotion_schedpri);
2830
2831                 needs_update = TRUE;
2832                 thread->kern_promotion_schedpri = kern_promotion_schedpri;
2833                 thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
2834         }
2835
2836         return needs_update;
2837 }
2838
2839 /*
2840  * Check if the user promotion on thread has changed
2841  * and apply it.
2842  *
2843  * thread locked on entry, might drop the thread lock
2844  * and reacquire it.
2845  */
2846 boolean_t
2847 thread_recompute_user_promotion_locked(thread_t thread)
2848 {
2849         boolean_t needs_update = FALSE;
2850         struct task_pend_token pend_token = {};
2851         int user_promotion_basepri = MIN(thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
2852         int old_base_pri = thread->base_pri;
2853         thread_qos_t qos_promotion;
2854
2855         /* Check if user promotion has changed */
2856         if (thread->user_promotion_basepri == user_promotion_basepri) {
2857                 return needs_update;
2858         } else {
2859                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2860                     (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
2861                     thread_tid(thread),
2862                     user_promotion_basepri,
2863                     thread->user_promotion_basepri,
2864                     0, 0);
2865                 KDBG(MACHDBG_CODE(
2866                             DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
2867                     thread_tid(thread),
2868                     user_promotion_basepri,
2869                     thread->user_promotion_basepri);
2870         }
2871
2872         /* Update the user promotion base pri */
2873         thread->user_promotion_basepri = user_promotion_basepri;
2874         pend_token.tpt_force_recompute_pri = 1;
2875
2876         if (user_promotion_basepri <= MAXPRI_THROTTLE) {
2877                 qos_promotion = THREAD_QOS_UNSPECIFIED;
2878         } else {
2879                 qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
2880         }
2881
2882         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2883             TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
2884
2885         if (thread_get_waiting_turnstile(thread) &&
2886             thread->base_pri != old_base_pri) {
2887                 needs_update = TRUE;
2888         }
2889
2890         thread_unlock(thread);
2891
2892         thread_policy_update_complete_unlocked(thread, &pend_token);
2893
2894         thread_lock(thread);
2895
2896         return needs_update;
2897 }
2898
2899 /*
2900  * Convert the thread user promotion base pri to qos for threads in qos world.
2901  * For priority above UI qos, the qos would be set to UI.
2902  */
2903 thread_qos_t
2904 thread_user_promotion_qos_for_pri(int priority)
2905 {
2906         int qos;
2907         for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
2908                 if (thread_qos_policy_params.qos_pri[qos] <= priority) {
2909                         return qos;
2910                 }
2911         }
2912         return THREAD_QOS_MAINTENANCE;
2913 }
2914
2915 /*
2916  * Set the thread's QoS Kevent override
2917  * Owned by the Kevent subsystem
2918  *
2919  * May be called with spinlocks held, but not spinlocks
2920  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
2921  *
2922  * One 'add' must be balanced by one 'drop'.
2923  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
2924  * Before the thread is deallocated, there must be 0 remaining overrides.
2925  */
2926 static void
2927 thread_kevent_override(thread_t    thread,
2928     uint32_t    qos_override,
2929     boolean_t   is_new_override)
2930 {
2931         struct task_pend_token pend_token = {};
2932         boolean_t needs_update;
2933
2934         spl_t s = splsched();
2935         thread_lock(thread);
2936
2937         uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
2938
2939         assert(qos_override > THREAD_QOS_UNSPECIFIED);
2940         assert(qos_override < THREAD_QOS_LAST);
2941
2942         if (is_new_override) {
2943                 if (thread->kevent_overrides++ == 0) {
2944                         /* This add is the first override for this thread */
2945                         assert(old_override == THREAD_QOS_UNSPECIFIED);
2946                 } else {
2947                         /* There are already other overrides in effect for this thread */
2948                         assert(old_override > THREAD_QOS_UNSPECIFIED);
2949                 }
2950         } else {
2951                 /* There must be at least one override (the previous add call) in effect */
2952                 assert(thread->kevent_overrides > 0);
2953                 assert(old_override > THREAD_QOS_UNSPECIFIED);
2954         }
2955
2956         /*
2957          * We can't allow lowering if there are several IPC overrides because
2958          * the caller can't possibly know the whole truth
2959          */
2960         if (thread->kevent_overrides == 1) {
2961                 needs_update = qos_override != old_override;
2962         } else {
2963                 needs_update = qos_override > old_override;
2964         }
2965
2966         if (needs_update) {
2967                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2968                     TASK_POLICY_QOS_KEVENT_OVERRIDE,
2969                     qos_override, 0, &pend_token);
2970                 assert(pend_token.tpt_update_sockets == 0);
2971         }
2972
2973         thread_unlock(thread);
2974         splx(s);
2975
2976         thread_policy_update_complete_unlocked(thread, &pend_token);
2977 }
2978
2979 void
2980 thread_add_kevent_override(thread_t thread, uint32_t qos_override)
2981 {
2982         thread_kevent_override(thread, qos_override, TRUE);
2983 }
2984
2985 void
2986 thread_update_kevent_override(thread_t thread, uint32_t qos_override)
2987 {
2988         thread_kevent_override(thread, qos_override, FALSE);
2989 }
2990
2991 void
2992 thread_drop_kevent_override(thread_t thread)
2993 {
2994         struct task_pend_token pend_token = {};
2995
2996         spl_t s = splsched();
2997         thread_lock(thread);
2998
2999         assert(thread->kevent_overrides > 0);
3000
3001         if (--thread->kevent_overrides == 0) {
3002                 /*
3003                  * There are no more overrides for this thread, so we should
3004                  * clear out the saturated override value
3005                  */
3006
3007                 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3008                     TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3009                     0, &pend_token);
3010         }
3011
3012         thread_unlock(thread);
3013         splx(s);
3014
3015         thread_policy_update_complete_unlocked(thread, &pend_token);
3016 }
3017
3018 /*
3019  * Set the thread's QoS Workloop Servicer override
3020  * Owned by the Kevent subsystem
3021  *
3022  * May be called with spinlocks held, but not spinlocks
3023  * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3024  *
3025  * One 'add' must be balanced by one 'drop'.
3026  * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3027  * Before the thread is deallocated, there must be 0 remaining overrides.
3028  */
3029 static void
3030 thread_servicer_override(thread_t    thread,
3031     uint32_t    qos_override,
3032     boolean_t   is_new_override)
3033 {
3034         struct task_pend_token pend_token = {};
3035
3036         spl_t s = splsched();
3037         thread_lock(thread);
3038
3039         if (is_new_override) {
3040                 assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3041         } else {
3042                 assert(thread->requested_policy.thrp_qos_wlsvc_override);
3043         }
3044
3045         proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3046             TASK_POLICY_QOS_SERVICER_OVERRIDE,
3047             qos_override, 0, &pend_token);
3048
3049         thread_unlock(thread);
3050         splx(s);
3051
3052         assert(pend_token.tpt_update_sockets == 0);
3053         thread_policy_update_complete_unlocked(thread, &pend_token);
3054 }
3055
3056 void
3057 thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3058 {
3059         assert(qos_override > THREAD_QOS_UNSPECIFIED);
3060         assert(qos_override < THREAD_QOS_LAST);
3061
3062         thread_servicer_override(thread, qos_override, TRUE);
3063 }
3064
3065 void
3066 thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3067 {
3068         assert(qos_override > THREAD_QOS_UNSPECIFIED);
3069         assert(qos_override < THREAD_QOS_LAST);
3070
3071         thread_servicer_override(thread, qos_override, FALSE);
3072 }
3073
3074 void
3075 thread_drop_servicer_override(thread_t thread)
3076 {
3077         thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3078 }
3079
3080
3081 /* Get current requested qos / relpri, may be called from spinlock context */
3082 thread_qos_t
3083 thread_get_requested_qos(thread_t thread, int *relpri)
3084 {
3085         int relprio_value = 0;
3086         thread_qos_t qos;
3087
3088         qos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3089             TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3090         if (relpri) {
3091                 *relpri = -relprio_value;
3092         }
3093         return qos;
3094 }
3095
3096 /*
3097  * This function will promote the thread priority
3098  * since exec could block other threads calling
3099  * proc_find on the proc. This boost must be removed
3100  * via call to thread_clear_exec_promotion.
3101  *
3102  * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3103  */
3104 void
3105 thread_set_exec_promotion(thread_t thread)
3106 {
3107         spl_t s = splsched();
3108         thread_lock(thread);
3109
3110         sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3111
3112         thread_unlock(thread);
3113         splx(s);
3114 }
3115
3116 /*
3117  * This function will clear the exec thread
3118  * promotion set on the thread by thread_set_exec_promotion.
3119  */
3120 void
3121 thread_clear_exec_promotion(thread_t thread)
3122 {
3123         spl_t s = splsched();
3124         thread_lock(thread);
3125
3126         sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3127
3128         thread_unlock(thread);
3129         splx(s);
3130 }