osfmk/kern/task_policy.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <mach/mach_types.h>
  30 #include <mach/task_server.h>
  31
  32 #include <kern/sched.h>
  33 #include <kern/task.h>
  34 #include <mach/thread_policy.h>
  35 #include <sys/errno.h>
  36 #include <sys/resource.h>
  37 #include <machine/limits.h>
  38 #include <kern/ledger.h>
  39 #include <kern/thread_call.h>
  40 #include <kern/sfi.h>
  41 #include <kern/coalition.h>
  42 #if CONFIG_TELEMETRY
  43 #include <kern/telemetry.h>
  44 #endif
  45
  46 #if IMPORTANCE_INHERITANCE
  47 #include <ipc/ipc_importance.h>
  48 #if IMPORTANCE_DEBUG
  49 #include <mach/machine/sdt.h>
  50 #endif /* IMPORTANCE_DEBUG */
  51 #endif /* IMPORTANCE_INHERITACE */
  52
  53 #include <sys/kdebug.h>
  54
  55 /*
  56  *  Task Policy
  57  *
  58  *  This subsystem manages task and thread IO priority and backgrounding,
  59  *  as well as importance inheritance, process suppression, task QoS, and apptype.
  60  *  These properties have a suprising number of complex interactions, so they are
  61  *  centralized here in one state machine to simplify the implementation of those interactions.
  62  *
  63  *  Architecture:
  64  *  Threads and tasks have three policy fields: requested, effective, and pending.
  65  *  Requested represents the wishes of each interface that influences task policy.
  66  *  Effective represents the distillation of that policy into a set of behaviors.
  67  *  Pending represents updates that haven't been applied yet.
  68  *
  69  *  Each interface that has an input into the task policy state machine controls a field in requested.
  70  *  If the interface has a getter, it returns what is in the field in requested, but that is
  71  *  not necessarily what is actually in effect.
  72  *
  73  *  All kernel subsystems that behave differently based on task policy call into
  74  *  the get_effective_policy function, which returns the decision of the task policy state machine
  75  *  for that subsystem by querying only the 'effective' field.
  76  *
  77  *  Policy change operations:
  78  *  Here are the steps to change a policy on a task or thread:
  79  *  1) Lock task
  80  *  2) Change requested field for the relevant policy
  81  *  3) Run a task policy update, which recalculates effective based on requested,
  82  *     then takes a diff between the old and new versions of requested and calls the relevant
  83  *     other subsystems to apply these changes, and updates the pending field.
  84  *  4) Unlock task
  85  *  5) Run task policy update complete, which looks at the pending field to update
  86  *     subsystems which cannot be touched while holding the task lock.
  87  *
  88  *  To add a new requested policy, add the field in the requested struct, the flavor in task.h,
  89  *  the setter and getter in proc_(set|get)_task_policy*, and dump the state in task_requested_bitfield,
  90  *  then set up the effects of that behavior in task_policy_update*. If the policy manifests
  91  *  itself as a distinct effective policy, add it to the effective struct and add it to the
  92  *  proc_get_effective_policy accessor.
  93  *
  94  *  Most policies are set via proc_set_task_policy, but policies that don't fit that interface
  95  *  roll their own lock/set/update/unlock/complete code inside this file.
  96  *
  97  *
  98  *  Suppression policy
  99  *
 100  *  These are a set of behaviors that can be requested for a task.  They currently have specific
 101  *  implied actions when they're enabled, but they may be made customizable in the future.
 102  *
 103  *  When the affected task is boosted, we temporarily disable the suppression behaviors
 104  *  so that the affected process has a chance to run so it can call the API to permanently
 105  *  disable the suppression behaviors.
 106  *
 107  *  Locking
 108  *
 109  *  Changing task policy on a task or thread takes the task lock, and not the thread lock.
 110  *  TODO: Should changing policy on a thread take the thread lock instead?
 111  *
 112  *  Querying the effective policy does not take the task lock, to prevent deadlocks or slowdown in sensitive code.
 113  *  This means that any notification of state change needs to be externally synchronized.
 114  *
 115  */
 116
 117 extern const qos_policy_params_t thread_qos_policy_params;
 118
 119 /* for task holds without dropping the lock */
 120 extern void task_hold_locked(task_t task);
 121 extern void task_release_locked(task_t task);
 122 extern void task_wait_locked(task_t task, boolean_t until_not_runnable);
 123
 124 extern void thread_recompute_qos(thread_t thread);
 125
 126 /* Task policy related helper functions */
 127 static void proc_set_task_policy_locked(task_t task, thread_t thread, int category, int flavor, int value);
 128 static void proc_set_task_policy2_locked(task_t task, thread_t thread, int category, int flavor, int value1, int value2);
 129
 130 static void task_policy_update_locked(task_t task, thread_t thread, task_pend_token_t pend_token);
 131 static void task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create, task_pend_token_t pend_token);
 132 static void task_policy_update_task_locked(task_t task, boolean_t update_throttle, boolean_t update_bg_throttle, boolean_t update_sfi);
 133 static void task_policy_update_thread_locked(thread_t thread, int update_cpu, boolean_t update_throttle, boolean_t update_sfi, boolean_t update_qos);
 134
 135 #if CONFIG_SCHED_SFI
 136 static boolean_t task_policy_update_coalition_focal_tasks(task_t task, int prev_role, int next_role);
 137 #endif
 138
 139 static int proc_get_effective_policy(task_t task, thread_t thread, int policy);
 140
 141 static void proc_iopol_to_tier(int iopolicy, int *tier, int *passive);
 142 static int proc_tier_to_iopol(int tier, int passive);
 143
 144 static uintptr_t trequested_0(task_t task, thread_t thread);
 145 static uintptr_t trequested_1(task_t task, thread_t thread);
 146 static uintptr_t teffective_0(task_t task, thread_t thread);
 147 static uintptr_t teffective_1(task_t task, thread_t thread);
 148 static uint32_t tpending(task_pend_token_t pend_token);
 149 static uint64_t task_requested_bitfield(task_t task, thread_t thread);
 150 static uint64_t task_effective_bitfield(task_t task, thread_t thread);
 151
 152 void proc_get_thread_policy(thread_t thread, thread_policy_state_t info);
 153
 154 /* CPU Limits related helper functions */
 155 static int task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope);
 156 int task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int entitled);
 157 static int task_clear_cpuusage_locked(task_t task, int cpumon_entitled);
 158 int task_disable_cpumon(task_t task);
 159 static int task_apply_resource_actions(task_t task, int type);
 160 void task_action_cpuusage(thread_call_param_t param0, thread_call_param_t param1);
 161 void proc_init_cpumon_params(void);
 162
 163 #ifdef MACH_BSD
 164 int             proc_pid(void *proc);
 165 extern int      proc_selfpid(void);
 166 extern char *   proc_name_address(void *p);
 167 extern void     rethrottle_thread(void * uthread);
 168 extern void     proc_apply_task_networkbg(void * bsd_info, thread_t thread);
 169 #endif /* MACH_BSD */
 170
 171 extern zone_t thread_qos_override_zone;
 172 static boolean_t _proc_thread_qos_remove_override_internal(task_t task, thread_t thread, uint64_t tid, user_addr_t resource, int resource_type, boolean_t reset);
 173
 174
 175 /* Importance Inheritance related helper functions */
 176
 177 #if IMPORTANCE_INHERITANCE
 178
 179 static void task_add_importance_watchport(task_t task, mach_port_t port, int *boostp);
 180 static void task_importance_update_live_donor(task_t target_task);
 181
 182 #endif /* IMPORTANCE_INHERITANCE */
 183
 184 #if IMPORTANCE_DEBUG
 185 #define __impdebug_only
 186 #else
 187 #define __impdebug_only __unused
 188 #endif
 189
 190 #if IMPORTANCE_INHERITANCE
 191 #define __imp_only
 192 #else
 193 #define __imp_only __unused
 194 #endif
 195
 196 #define TASK_LOCKED   1
 197 #define TASK_UNLOCKED 0
 198
 199 #define DO_LOWPRI_CPU   1
 200 #define UNDO_LOWPRI_CPU 2
 201
 202 /* Macros for making tracing simpler */
 203
 204 #define tpriority(task, thread)  ((uintptr_t)(thread == THREAD_NULL ? (task->priority)  : (thread->base_pri)))
 205 #define tisthread(thread) (thread == THREAD_NULL ? TASK_POLICY_TASK  : TASK_POLICY_THREAD)
 206 #define targetid(task, thread)   ((uintptr_t)(thread == THREAD_NULL ? (task_pid(task)) : (thread->thread_id)))
 207
 208 /*
 209  * Default parameters for certain policies
 210  */
 211
 212 int proc_standard_daemon_tier = THROTTLE_LEVEL_TIER1;
 213 int proc_suppressed_disk_tier = THROTTLE_LEVEL_TIER1;
 214 int proc_tal_disk_tier        = THROTTLE_LEVEL_TIER1;
 215
 216 int proc_graphics_timer_qos   = (LATENCY_QOS_TIER_0 & 0xFF);
 217
 218 const int proc_default_bg_iotier  = THROTTLE_LEVEL_TIER2;
 219
 220 /* Latency/throughput QoS fields remain zeroed, i.e. TIER_UNSPECIFIED at creation */
 221 const struct task_requested_policy default_task_requested_policy = {
 222         .bg_iotier = proc_default_bg_iotier
 223 };
 224 const struct task_effective_policy default_task_effective_policy = {};
 225 const struct task_pended_policy default_task_pended_policy = {};
 226
 227 /*
 228  * Default parameters for CPU usage monitor.
 229  *
 230  * Default setting is 50% over 3 minutes.
 231  */
 232 #define         DEFAULT_CPUMON_PERCENTAGE 50
 233 #define         DEFAULT_CPUMON_INTERVAL   (3 * 60)
 234
 235 uint8_t         proc_max_cpumon_percentage;
 236 uint64_t        proc_max_cpumon_interval;
 237
 238 kern_return_t
 239 qos_latency_policy_validate(task_latency_qos_t ltier) {
 240         if ((ltier != LATENCY_QOS_TIER_UNSPECIFIED) &&
 241             ((ltier > LATENCY_QOS_TIER_5) || (ltier < LATENCY_QOS_TIER_0)))
 242                 return KERN_INVALID_ARGUMENT;
 243
 244         return KERN_SUCCESS;
 245 }
 246
 247 kern_return_t
 248 qos_throughput_policy_validate(task_throughput_qos_t ttier) {
 249         if ((ttier != THROUGHPUT_QOS_TIER_UNSPECIFIED) &&
 250             ((ttier > THROUGHPUT_QOS_TIER_5) || (ttier < THROUGHPUT_QOS_TIER_0)))
 251                 return KERN_INVALID_ARGUMENT;
 252
 253         return KERN_SUCCESS;
 254 }
 255
 256 static kern_return_t
 257 task_qos_policy_validate(task_qos_policy_t qosinfo, mach_msg_type_number_t count) {
 258         if (count < TASK_QOS_POLICY_COUNT)
 259                 return KERN_INVALID_ARGUMENT;
 260
 261         task_latency_qos_t ltier = qosinfo->task_latency_qos_tier;
 262         task_throughput_qos_t ttier = qosinfo->task_throughput_qos_tier;
 263
 264         kern_return_t kr = qos_latency_policy_validate(ltier);
 265
 266         if (kr != KERN_SUCCESS)
 267                 return kr;
 268
 269         kr = qos_throughput_policy_validate(ttier);
 270
 271         return kr;
 272 }
 273
 274 uint32_t
 275 qos_extract(uint32_t qv) {
 276         return (qv & 0xFF);
 277 }
 278
 279 uint32_t
 280 qos_latency_policy_package(uint32_t qv) {
 281         return (qv == LATENCY_QOS_TIER_UNSPECIFIED) ? LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | qv);
 282 }
 283
 284 uint32_t
 285 qos_throughput_policy_package(uint32_t qv) {
 286         return (qv == THROUGHPUT_QOS_TIER_UNSPECIFIED) ? THROUGHPUT_QOS_TIER_UNSPECIFIED : ((0xFE << 16) | qv);
 287 }
 288
 289 /* TEMPORARY boot-arg controlling task_policy suppression (App Nap) */
 290 static boolean_t task_policy_suppression_disable = FALSE;
 291
 292 kern_return_t
 293 task_policy_set(
 294         task_t                                  task,
 295         task_policy_flavor_t    flavor,
 296         task_policy_t                   policy_info,
 297         mach_msg_type_number_t  count)
 298 {
 299         kern_return_t           result = KERN_SUCCESS;
 300
 301         if (task == TASK_NULL || task == kernel_task)
 302                 return (KERN_INVALID_ARGUMENT);
 303
 304         switch (flavor) {
 305
 306         case TASK_CATEGORY_POLICY: {
 307                 task_category_policy_t info = (task_category_policy_t)policy_info;
 308
 309                 if (count < TASK_CATEGORY_POLICY_COUNT)
 310                         return (KERN_INVALID_ARGUMENT);
 311
 312
 313                 switch(info->role) {
 314                         case TASK_FOREGROUND_APPLICATION:
 315                         case TASK_BACKGROUND_APPLICATION:
 316                         case TASK_DEFAULT_APPLICATION:
 317                                 proc_set_task_policy(task, THREAD_NULL,
 318                                                      TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
 319                                                      info->role);
 320                                 break;
 321
 322                         case TASK_CONTROL_APPLICATION:
 323                                 if (task != current_task() || task->sec_token.val[0] != 0)
 324                                         result = KERN_INVALID_ARGUMENT;
 325                                 else
 326                                         proc_set_task_policy(task, THREAD_NULL,
 327                                                              TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
 328                                                              info->role);
 329                                 break;
 330
 331                         case TASK_GRAPHICS_SERVER:
 332                                 /* TODO: Restrict this role to FCFS <rdar://problem/12552788> */
 333                                 if (task != current_task() || task->sec_token.val[0] != 0)
 334                                         result = KERN_INVALID_ARGUMENT;
 335                                 else
 336                                         proc_set_task_policy(task, THREAD_NULL,
 337                                                              TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
 338                                                              info->role);
 339                                 break;
 340                         default:
 341                                 result = KERN_INVALID_ARGUMENT;
 342                                 break;
 343                 } /* switch (info->role) */
 344
 345                 break;
 346         }
 347
 348 /* Desired energy-efficiency/performance "quality-of-service" */
 349         case TASK_BASE_QOS_POLICY:
 350         case TASK_OVERRIDE_QOS_POLICY:
 351         {
 352                 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
 353                 kern_return_t kr = task_qos_policy_validate(qosinfo, count);
 354
 355                 if (kr != KERN_SUCCESS)
 356                         return kr;
 357
 358
 359                 uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier);
 360                 uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier);
 361
 362                 proc_set_task_policy2(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE,
 363                                                           flavor == TASK_BASE_QOS_POLICY ? TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS : TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS,
 364                                                           lqos, tqos);
 365         }
 366         break;
 367
 368         case TASK_BASE_LATENCY_QOS_POLICY:
 369         {
 370                 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
 371                 kern_return_t kr = task_qos_policy_validate(qosinfo, count);
 372
 373                 if (kr != KERN_SUCCESS)
 374                         return kr;
 375
 376                 uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier);
 377
 378                 proc_set_task_policy(task, NULL, TASK_POLICY_ATTRIBUTE, TASK_BASE_LATENCY_QOS_POLICY, lqos);
 379         }
 380         break;
 381
 382         case TASK_BASE_THROUGHPUT_QOS_POLICY:
 383         {
 384                 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
 385                 kern_return_t kr = task_qos_policy_validate(qosinfo, count);
 386
 387                 if (kr != KERN_SUCCESS)
 388                         return kr;
 389
 390                 uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier);
 391
 392                 proc_set_task_policy(task, NULL, TASK_POLICY_ATTRIBUTE, TASK_BASE_THROUGHPUT_QOS_POLICY, tqos);
 393         }
 394         break;
 395
 396         case TASK_SUPPRESSION_POLICY:
 397         {
 398
 399                 task_suppression_policy_t info = (task_suppression_policy_t)policy_info;
 400
 401                 if (count < TASK_SUPPRESSION_POLICY_COUNT)
 402                         return (KERN_INVALID_ARGUMENT);
 403
 404                 struct task_qos_policy qosinfo;
 405
 406                 qosinfo.task_latency_qos_tier = info->timer_throttle;
 407                 qosinfo.task_throughput_qos_tier = info->throughput_qos;
 408
 409                 kern_return_t kr = task_qos_policy_validate(&qosinfo, TASK_QOS_POLICY_COUNT);
 410
 411                 if (kr != KERN_SUCCESS)
 412                         return kr;
 413
 414                 /* TEMPORARY disablement of task suppression */
 415                 if (task_policy_suppression_disable && info->active)
 416                         return KERN_SUCCESS;
 417
 418                 struct task_pend_token pend_token = {};
 419
 420                 task_lock(task);
 421
 422                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 423                                           (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_START,
 424                                           proc_selfpid(), task_pid(task), trequested_0(task, THREAD_NULL),
 425                                           trequested_1(task, THREAD_NULL), 0);
 426
 427                 task->requested_policy.t_sup_active      = (info->active)         ? 1 : 0;
 428                 task->requested_policy.t_sup_lowpri_cpu  = (info->lowpri_cpu)     ? 1 : 0;
 429                 task->requested_policy.t_sup_timer       = qos_extract(info->timer_throttle);
 430                 task->requested_policy.t_sup_disk        = (info->disk_throttle)  ? 1 : 0;
 431                 task->requested_policy.t_sup_cpu_limit   = (info->cpu_limit)      ? 1 : 0;
 432                 task->requested_policy.t_sup_suspend     = (info->suspend)        ? 1 : 0;
 433                 task->requested_policy.t_sup_throughput  = qos_extract(info->throughput_qos);
 434                 task->requested_policy.t_sup_cpu         = (info->suppressed_cpu) ? 1 : 0;
 435                 task->requested_policy.t_sup_bg_sockets  = (info->background_sockets) ? 1 : 0;
 436
 437                 task_policy_update_locked(task, THREAD_NULL, &pend_token);
 438
 439                 task_unlock(task);
 440
 441                 task_policy_update_complete_unlocked(task, THREAD_NULL, &pend_token);
 442
 443                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 444                                           (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_END,
 445                                           proc_selfpid(), task_pid(task), trequested_0(task, THREAD_NULL),
 446                                           trequested_1(task, THREAD_NULL), 0);
 447
 448                 break;
 449
 450         }
 451
 452         default:
 453                 result = KERN_INVALID_ARGUMENT;
 454                 break;
 455         }
 456
 457         return (result);
 458 }
 459
 460 /* Sets BSD 'nice' value on the task */
 461 kern_return_t
 462 task_importance(
 463         task_t                          task,
 464         integer_t                       importance)
 465 {
 466         if (task == TASK_NULL || task == kernel_task)
 467                 return (KERN_INVALID_ARGUMENT);
 468
 469         task_lock(task);
 470
 471         if (!task->active) {
 472                 task_unlock(task);
 473
 474                 return (KERN_TERMINATED);
 475         }
 476
 477         if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) >= TASK_CONTROL_APPLICATION) {
 478                 task_unlock(task);
 479
 480                 return (KERN_INVALID_ARGUMENT);
 481         }
 482
 483         task->importance = importance;
 484
 485         /* TODO: tracepoint? */
 486
 487         /* Redrive only the task priority calculation */
 488         task_policy_update_task_locked(task, FALSE, FALSE, FALSE);
 489
 490         task_unlock(task);
 491
 492         return (KERN_SUCCESS);
 493 }
 494
 495 kern_return_t
 496 task_policy_get(
 497         task_t                                  task,
 498         task_policy_flavor_t    flavor,
 499         task_policy_t                   policy_info,
 500         mach_msg_type_number_t  *count,
 501         boolean_t                               *get_default)
 502 {
 503         if (task == TASK_NULL || task == kernel_task)
 504                 return (KERN_INVALID_ARGUMENT);
 505
 506         switch (flavor) {
 507
 508         case TASK_CATEGORY_POLICY:
 509         {
 510                 task_category_policy_t          info = (task_category_policy_t)policy_info;
 511
 512                 if (*count < TASK_CATEGORY_POLICY_COUNT)
 513                         return (KERN_INVALID_ARGUMENT);
 514
 515                 if (*get_default)
 516                         info->role = TASK_UNSPECIFIED;
 517                 else
 518                         info->role = proc_get_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE);
 519                 break;
 520         }
 521
 522         case TASK_BASE_QOS_POLICY: /* FALLTHRU */
 523         case TASK_OVERRIDE_QOS_POLICY:
 524         {
 525                 task_qos_policy_t info = (task_qos_policy_t)policy_info;
 526
 527                 if (*count < TASK_QOS_POLICY_COUNT)
 528                         return (KERN_INVALID_ARGUMENT);
 529
 530                 if (*get_default) {
 531                         info->task_latency_qos_tier = LATENCY_QOS_TIER_UNSPECIFIED;
 532                         info->task_throughput_qos_tier = THROUGHPUT_QOS_TIER_UNSPECIFIED;
 533                 } else if (flavor == TASK_BASE_QOS_POLICY) {
 534                         int value1, value2;
 535
 536                         proc_get_task_policy2(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2);
 537
 538                         info->task_latency_qos_tier = qos_latency_policy_package(value1);
 539                         info->task_throughput_qos_tier = qos_throughput_policy_package(value2);
 540
 541                 } else if (flavor == TASK_OVERRIDE_QOS_POLICY) {
 542                         int value1, value2;
 543
 544                         proc_get_task_policy2(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2);
 545
 546                         info->task_latency_qos_tier = qos_latency_policy_package(value1);
 547                         info->task_throughput_qos_tier = qos_throughput_policy_package(value2);
 548                 }
 549
 550                 break;
 551         }
 552
 553         case TASK_POLICY_STATE:
 554         {
 555                 task_policy_state_t info = (task_policy_state_t)policy_info;
 556
 557                 if (*count < TASK_POLICY_STATE_COUNT)
 558                         return (KERN_INVALID_ARGUMENT);
 559
 560                 /* Only root can get this info */
 561                 if (current_task()->sec_token.val[0] != 0)
 562                         return KERN_PROTECTION_FAILURE;
 563
 564                 if (*get_default) {
 565                         info->requested = 0;
 566                         info->effective = 0;
 567                         info->pending = 0;
 568                         info->imp_assertcnt = 0;
 569                         info->imp_externcnt = 0;
 570                         info->flags = 0;
 571                         info->imp_transitions = 0;
 572                 } else {
 573                         task_lock(task);
 574
 575                         info->requested = task_requested_bitfield(task, THREAD_NULL);
 576                         info->effective = task_effective_bitfield(task, THREAD_NULL);
 577                         info->pending   = 0;
 578
 579                         info->flags = 0;
 580                         if (task->task_imp_base != NULL) {
 581                                 info->imp_assertcnt = task->task_imp_base->iit_assertcnt;
 582                                 info->imp_externcnt = IIT_EXTERN(task->task_imp_base);
 583                                 info->flags |= (task_is_marked_importance_receiver(task) ? TASK_IMP_RECEIVER : 0);
 584                                 info->flags |= (task_is_marked_importance_denap_receiver(task) ? TASK_DENAP_RECEIVER : 0);
 585                                 info->flags |= (task_is_marked_importance_donor(task) ? TASK_IMP_DONOR : 0);
 586                                 info->flags |= (task_is_marked_live_importance_donor(task) ? TASK_IMP_LIVE_DONOR : 0);
 587                                 info->imp_transitions = task->task_imp_base->iit_transitions;
 588                         } else {
 589                                 info->imp_assertcnt = 0;
 590                                 info->imp_externcnt = 0;
 591                                 info->imp_transitions = 0;
 592                         }
 593                         task_unlock(task);
 594                 }
 595
 596                 info->reserved[0] = 0;
 597                 info->reserved[1] = 0;
 598
 599                 break;
 600         }
 601
 602         case TASK_SUPPRESSION_POLICY:
 603         {
 604                 task_suppression_policy_t info = (task_suppression_policy_t)policy_info;
 605
 606                 if (*count < TASK_SUPPRESSION_POLICY_COUNT)
 607                         return (KERN_INVALID_ARGUMENT);
 608
 609                 task_lock(task);
 610
 611                 if (*get_default) {
 612                         info->active            = 0;
 613                         info->lowpri_cpu        = 0;
 614                         info->timer_throttle    = LATENCY_QOS_TIER_UNSPECIFIED;
 615                         info->disk_throttle     = 0;
 616                         info->cpu_limit         = 0;
 617                         info->suspend           = 0;
 618                         info->throughput_qos    = 0;
 619                         info->suppressed_cpu    = 0;
 620                 } else {
 621                         info->active            = task->requested_policy.t_sup_active;
 622                         info->lowpri_cpu        = task->requested_policy.t_sup_lowpri_cpu;
 623                         info->timer_throttle    = qos_latency_policy_package(task->requested_policy.t_sup_timer);
 624                         info->disk_throttle     = task->requested_policy.t_sup_disk;
 625                         info->cpu_limit         = task->requested_policy.t_sup_cpu_limit;
 626                         info->suspend           = task->requested_policy.t_sup_suspend;
 627                         info->throughput_qos    = qos_throughput_policy_package(task->requested_policy.t_sup_throughput);
 628                         info->suppressed_cpu    = task->requested_policy.t_sup_cpu;
 629                         info->background_sockets = task->requested_policy.t_sup_bg_sockets;
 630                 }
 631
 632                 task_unlock(task);
 633                 break;
 634         }
 635
 636         default:
 637                 return (KERN_INVALID_ARGUMENT);
 638         }
 639
 640         return (KERN_SUCCESS);
 641 }
 642
 643 /*
 644  * Called at task creation
 645  * We calculate the correct effective but don't apply it to anything yet.
 646  * The threads, etc will inherit from the task as they get created.
 647  */
 648 void
 649 task_policy_create(task_t task, int parent_boosted)
 650 {
 651         if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
 652                 if (parent_boosted) {
 653                         task->requested_policy.t_apptype = TASK_APPTYPE_DAEMON_INTERACTIVE;
 654                         task_importance_mark_donor(task, TRUE);
 655                 } else {
 656                         task->requested_policy.t_apptype = TASK_APPTYPE_DAEMON_BACKGROUND;
 657                         task_importance_mark_receiver(task, FALSE);
 658                 }
 659         }
 660
 661         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 662                                   (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_START,
 663                                   task_pid(task), teffective_0(task, THREAD_NULL),
 664                                   teffective_1(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0);
 665
 666         task_policy_update_internal_locked(task, THREAD_NULL, TRUE, NULL);
 667
 668         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 669                                   (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_END,
 670                                   task_pid(task), teffective_0(task, THREAD_NULL),
 671                                   teffective_1(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0);
 672
 673         task_importance_update_live_donor(task);
 674         task_policy_update_task_locked(task, FALSE, FALSE, FALSE);
 675 }
 676
 677 void
 678 thread_policy_create(thread_t thread)
 679 {
 680         task_t task = thread->task;
 681
 682         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 683                                   (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
 684                                   targetid(task, thread), teffective_0(task, thread),
 685                                   teffective_1(task, thread), tpriority(task, thread), 0);
 686
 687         task_policy_update_internal_locked(task, thread, TRUE, NULL);
 688
 689         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 690                                   (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
 691                                   targetid(task, thread), teffective_0(task, thread),
 692                                   teffective_1(task, thread), tpriority(task, thread), 0);
 693 }
 694
 695 static void
 696 task_policy_update_locked(task_t task, thread_t thread, task_pend_token_t pend_token)
 697 {
 698         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 699                                   (IMPORTANCE_CODE(IMP_UPDATE, tisthread(thread)) | DBG_FUNC_START),
 700                                   targetid(task, thread), teffective_0(task, thread),
 701                                   teffective_1(task, thread), tpriority(task, thread), 0);
 702
 703         task_policy_update_internal_locked(task, thread, FALSE, pend_token);
 704
 705         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 706                                   (IMPORTANCE_CODE(IMP_UPDATE, tisthread(thread))) | DBG_FUNC_END,
 707                                   targetid(task, thread), teffective_0(task, thread),
 708                                   teffective_1(task, thread), tpriority(task, thread), 0);
 709 }
 710
 711 /*
 712  * One state update function TO RULE THEM ALL
 713  *
 714  * This function updates the task or thread effective policy fields
 715  * and pushes the results to the relevant subsystems.
 716  *
 717  * Must call update_complete after unlocking the task,
 718  * as some subsystems cannot be updated while holding the task lock.
 719  *
 720  * Called with task locked, not thread
 721  */
 722
 723 static void
 724 task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create, task_pend_token_t pend_token)
 725 {
 726         boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
 727
 728         /*
 729          * Step 1:
 730          *  Gather requested policy
 731          */
 732
 733         struct task_requested_policy requested =
 734                 (on_task) ? task->requested_policy : thread->requested_policy;
 735
 736
 737         /*
 738          * Step 2:
 739          *  Calculate new effective policies from requested policy and task state
 740          *  Rules:
 741          *      If in an 'on_task' block, must only look at and set fields starting with t_
 742          *      If operating on a task, don't touch anything starting with th_
 743          *      If operating on a thread, don't touch anything starting with t_
 744          *      Don't change requested, it won't take effect
 745          */
 746
 747         struct task_effective_policy next = {};
 748         struct task_effective_policy task_effective;
 749
 750         /* Calculate QoS policies */
 751
 752         if (on_task) {
 753                 /* Update task role */
 754                 next.t_role = requested.t_role;
 755
 756                 /* Set task qos clamp and ceiling */
 757                 next.t_qos_clamp = requested.t_qos_clamp;
 758
 759                 if (requested.t_apptype == TASK_APPTYPE_APP_DEFAULT ||
 760                     requested.t_apptype == TASK_APPTYPE_APP_TAL) {
 761
 762                         switch (next.t_role) {
 763                                 case TASK_FOREGROUND_APPLICATION:
 764                                         /* Foreground apps get urgent scheduler priority */
 765                                         next.qos_ui_is_urgent = 1;
 766                                         next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED;
 767                                         break;
 768
 769                                 case TASK_BACKGROUND_APPLICATION:
 770                                         /* This is really 'non-focal but on-screen' */
 771                                         next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED;
 772                                         break;
 773
 774                                 case TASK_DEFAULT_APPLICATION:
 775                                         /* This is 'may render UI but we don't know if it's focal/nonfocal' */
 776                                         next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED;
 777                                         break;
 778
 779                                 case TASK_NONUI_APPLICATION:
 780                                         /* i.e. 'off-screen' */
 781                                         next.t_qos_ceiling = THREAD_QOS_LEGACY;
 782                                         break;
 783
 784                                 case TASK_CONTROL_APPLICATION:
 785                                 case TASK_GRAPHICS_SERVER:
 786                                         next.qos_ui_is_urgent = 1;
 787                                         next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED;
 788                                         break;
 789
 790                                 case TASK_THROTTLE_APPLICATION:
 791                                         /* i.e. 'TAL launch' */
 792                                         next.t_qos_ceiling = THREAD_QOS_UTILITY;
 793                                         break;
 794
 795                                 case TASK_UNSPECIFIED:
 796                                 default:
 797                                         /* Apps that don't have an application role get
 798                                          * USER_INTERACTIVE and USER_INITIATED squashed to LEGACY */
 799                                         next.t_qos_ceiling = THREAD_QOS_LEGACY;
 800                                         break;
 801                         }
 802                 } else {
 803                         /* Daemons get USER_INTERACTIVE squashed to USER_INITIATED */
 804                         next.t_qos_ceiling = THREAD_QOS_USER_INITIATED;
 805                 }
 806         } else {
 807                 /*
 808                  * Set thread qos tier
 809                  * Note that an override only overrides the QoS field, not other policy settings.
 810                  * A thread must already be participating in QoS for override to take effect
 811                  */
 812
 813                 /* Snapshot the task's effective policy */
 814                 task_effective = task->effective_policy;
 815
 816                 next.qos_ui_is_urgent = task_effective.qos_ui_is_urgent;
 817
 818                 if ((requested.thrp_qos_override != THREAD_QOS_UNSPECIFIED) && (requested.thrp_qos != THREAD_QOS_UNSPECIFIED))
 819                         next.thep_qos = MAX(requested.thrp_qos_override, requested.thrp_qos);
 820                 else
 821                         next.thep_qos = requested.thrp_qos;
 822
 823                 /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
 824                 if (task_effective.t_qos_clamp != THREAD_QOS_UNSPECIFIED) {
 825                         if (next.thep_qos != THREAD_QOS_UNSPECIFIED)
 826                                 next.thep_qos = MIN(task_effective.t_qos_clamp, next.thep_qos);
 827                         else
 828                                 next.thep_qos = task_effective.t_qos_clamp;
 829                 }
 830
 831                 /* The ceiling only applies to threads that are in the QoS world */
 832                 if (task_effective.t_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
 833                     next.thep_qos                != THREAD_QOS_UNSPECIFIED) {
 834                         next.thep_qos = MIN(task_effective.t_qos_ceiling, next.thep_qos);
 835                 }
 836
 837                 /*
 838                  * The QoS relative priority is only applicable when the original programmer's
 839                  * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
 840                  * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
 841                  * since otherwise it would be lower than unclamped threads. Similarly, in the
 842                  * presence of boosting, the programmer doesn't know what other actors
 843                  * are boosting the thread.
 844                  */
 845                 if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
 846                     (requested.thrp_qos == next.thep_qos) &&
 847                     (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
 848                         next.thep_qos_relprio = requested.thrp_qos_relprio;
 849                 } else {
 850                         next.thep_qos_relprio = 0;
 851                 }
 852         }
 853
 854         /* Calculate DARWIN_BG */
 855         boolean_t wants_darwinbg        = FALSE;
 856         boolean_t wants_all_sockets_bg  = FALSE; /* Do I want my existing sockets to be bg */
 857         boolean_t wants_watchersbg      = FALSE; /* Do I want my pidbound threads to be bg */
 858
 859         /*
 860          * If DARWIN_BG has been requested at either level, it's engaged.
 861          * Only true DARWIN_BG changes cause watchers to transition.
 862          *
 863          * Backgrounding due to apptype does.
 864          */
 865         if (requested.int_darwinbg || requested.ext_darwinbg)
 866                 wants_watchersbg = wants_all_sockets_bg = wants_darwinbg = TRUE;
 867
 868         if (on_task) {
 869                 /* Background TAL apps are throttled when TAL is enabled */
 870                 if (requested.t_apptype      == TASK_APPTYPE_APP_TAL &&
 871                     requested.t_role         == TASK_BACKGROUND_APPLICATION &&
 872                     requested.t_tal_enabled  == 1) {
 873                         next.t_tal_engaged = 1;
 874                 }
 875
 876                 if ((requested.t_apptype     == TASK_APPTYPE_APP_DEFAULT ||
 877                      requested.t_apptype     == TASK_APPTYPE_APP_TAL) &&
 878                     requested.t_role         == TASK_THROTTLE_APPLICATION) {
 879                         next.t_tal_engaged = 1;
 880                 }
 881
 882                 /* Adaptive daemons are DARWIN_BG unless boosted, and don't get network throttled. */
 883                 if (requested.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE &&
 884                     requested.t_boosted == 0)
 885                         wants_darwinbg = TRUE;
 886
 887                 /* Background daemons are always DARWIN_BG, no exceptions, and don't get network throttled. */
 888                 if (requested.t_apptype == TASK_APPTYPE_DAEMON_BACKGROUND)
 889                         wants_darwinbg = TRUE;
 890
 891                 if (next.t_qos_clamp == THREAD_QOS_BACKGROUND || next.t_qos_clamp == THREAD_QOS_MAINTENANCE)
 892                         wants_darwinbg = TRUE;
 893         } else {
 894                 if (requested.th_pidbind_bg)
 895                         wants_all_sockets_bg = wants_darwinbg = TRUE;
 896
 897                 if (requested.th_workq_bg)
 898                         wants_darwinbg = TRUE;
 899
 900                 if (next.thep_qos == THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_MAINTENANCE)
 901                         wants_darwinbg = TRUE;
 902         }
 903
 904         /* Calculate side effects of DARWIN_BG */
 905
 906         if (wants_darwinbg) {
 907                 next.darwinbg = 1;
 908                 /* darwinbg threads/tasks always create bg sockets, but we don't always loop over all sockets */
 909                 next.new_sockets_bg = 1;
 910                 next.lowpri_cpu = 1;
 911         }
 912
 913         if (wants_all_sockets_bg)
 914                 next.all_sockets_bg = 1;
 915
 916         if (on_task && wants_watchersbg)
 917                 next.t_watchers_bg = 1;
 918
 919         /* darwinbg on either task or thread implies background QOS (or lower) */
 920         if (!on_task &&
 921                 (wants_darwinbg || task_effective.darwinbg) &&
 922                 (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)){
 923                 next.thep_qos = THREAD_QOS_BACKGROUND;
 924                 next.thep_qos_relprio = 0;
 925         }
 926
 927         /* Calculate low CPU priority */
 928
 929         boolean_t wants_lowpri_cpu = FALSE;
 930
 931         if (wants_darwinbg)
 932                 wants_lowpri_cpu = TRUE;
 933
 934         if (next.t_tal_engaged)
 935                 wants_lowpri_cpu = TRUE;
 936
 937         if (on_task && requested.t_sup_lowpri_cpu && requested.t_boosted == 0)
 938                 wants_lowpri_cpu = TRUE;
 939
 940         if (wants_lowpri_cpu)
 941                 next.lowpri_cpu = 1;
 942
 943         /* Calculate IO policy */
 944
 945         /* Update BG IO policy (so we can see if it has changed) */
 946         next.bg_iotier = requested.bg_iotier;
 947
 948         int iopol = THROTTLE_LEVEL_TIER0;
 949
 950         if (wants_darwinbg)
 951                 iopol = MAX(iopol, requested.bg_iotier);
 952
 953         if (on_task) {
 954                 if (requested.t_apptype == TASK_APPTYPE_DAEMON_STANDARD)
 955                         iopol = MAX(iopol, proc_standard_daemon_tier);
 956
 957                 if (requested.t_sup_disk && requested.t_boosted == 0)
 958                         iopol = MAX(iopol, proc_suppressed_disk_tier);
 959
 960                 if (next.t_tal_engaged)
 961                         iopol = MAX(iopol, proc_tal_disk_tier);
 962
 963                 if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED)
 964                         iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.t_qos_clamp]);
 965
 966         } else {
 967                 /* Look up the associated IO tier value for the QoS class */
 968                 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
 969         }
 970
 971         iopol = MAX(iopol, requested.int_iotier);
 972         iopol = MAX(iopol, requested.ext_iotier);
 973
 974         next.io_tier = iopol;
 975
 976         /* Calculate Passive IO policy */
 977
 978         if (requested.ext_iopassive || requested.int_iopassive)
 979                 next.io_passive = 1;
 980
 981         /* Calculate miscellaneous policy */
 982
 983         if (on_task) {
 984                 /* Calculate suppression-active flag */
 985                 if (requested.t_sup_active && requested.t_boosted == 0)
 986                         next.t_sup_active = 1;
 987
 988                 /* Calculate suspend policy */
 989                 if (requested.t_sup_suspend && requested.t_boosted == 0)
 990                         next.t_suspended = 1;
 991
 992                 /* Calculate timer QOS */
 993                 int latency_qos = requested.t_base_latency_qos;
 994
 995                 if (requested.t_sup_timer && requested.t_boosted == 0)
 996                         latency_qos = requested.t_sup_timer;
 997
 998                 if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED)
 999                         latency_qos = MAX(latency_qos, (int)thread_qos_policy_params.qos_latency_qos[next.t_qos_clamp]);
1000
1001                 if (requested.t_over_latency_qos != 0)
1002                         latency_qos = requested.t_over_latency_qos;
1003
1004                 /* Treat the windowserver special */
1005                 if (requested.t_role == TASK_GRAPHICS_SERVER)
1006                         latency_qos = proc_graphics_timer_qos;
1007
1008                 next.t_latency_qos = latency_qos;
1009
1010                 /* Calculate throughput QOS */
1011                 int through_qos = requested.t_base_through_qos;
1012
1013                 if (requested.t_sup_throughput && requested.t_boosted == 0)
1014                         through_qos = requested.t_sup_throughput;
1015
1016                 if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED)
1017                         through_qos = MAX(through_qos, (int)thread_qos_policy_params.qos_through_qos[next.t_qos_clamp]);
1018
1019                 if (requested.t_over_through_qos != 0)
1020                         through_qos = requested.t_over_through_qos;
1021
1022                 next.t_through_qos = through_qos;
1023
1024                 /* Calculate suppressed CPU priority */
1025                 if (requested.t_sup_cpu && requested.t_boosted == 0)
1026                         next.t_suppressed_cpu = 1;
1027
1028                 /*
1029                  * Calculate background sockets
1030                  * Don't take into account boosting to limit transition frequency.
1031                  */
1032                 if (requested.t_sup_bg_sockets){
1033                         next.all_sockets_bg = 1;
1034                         next.new_sockets_bg = 1;
1035                 }
1036
1037                 /* Apply SFI Managed class bit */
1038                 next.t_sfi_managed = requested.t_sfi_managed;
1039
1040                 /* Calculate 'live donor' status for live importance */
1041                 switch (requested.t_apptype) {
1042                         case TASK_APPTYPE_APP_TAL:
1043                         case TASK_APPTYPE_APP_DEFAULT:
1044                                 if (requested.ext_darwinbg == 0)
1045                                         next.t_live_donor = 1;
1046                                 else
1047                                         next.t_live_donor = 0;
1048                                 break;
1049
1050                         case TASK_APPTYPE_DAEMON_INTERACTIVE:
1051                         case TASK_APPTYPE_DAEMON_STANDARD:
1052                         case TASK_APPTYPE_DAEMON_ADAPTIVE:
1053                         case TASK_APPTYPE_DAEMON_BACKGROUND:
1054                         default:
1055                                 next.t_live_donor = 0;
1056                                 break;
1057                 }
1058         }
1059
1060         if (requested.terminated) {
1061                 /*
1062                  * Shoot down the throttles that slow down exit or response to SIGTERM
1063                  * We don't need to shoot down:
1064                  * passive        (don't want to cause others to throttle)
1065                  * all_sockets_bg (don't need to iterate FDs on every exit)
1066                  * new_sockets_bg (doesn't matter for exiting process)
1067                  * pidsuspend     (jetsam-ed BG process shouldn't run again)
1068                  * watchers_bg    (watcher threads don't need to be unthrottled)
1069                  * t_latency_qos  (affects userspace timers only)
1070                  */
1071
1072                 next.terminated         = 1;
1073                 next.darwinbg           = 0;
1074                 next.lowpri_cpu         = 0;
1075                 next.io_tier            = THROTTLE_LEVEL_TIER0;
1076                 if (on_task) {
1077                         next.t_tal_engaged = 0;
1078                         next.t_role = TASK_UNSPECIFIED;
1079                         next.t_suppressed_cpu = 0;
1080
1081                         /* TODO: This should only be shot down on SIGTERM, not exit */
1082                         next.t_suspended   = 0;
1083                 } else {
1084                         next.thep_qos = THREAD_QOS_UNSPECIFIED;
1085                 }
1086         }
1087
1088         /*
1089          * Step 3:
1090          *  Swap out old policy for new policy
1091          */
1092
1093         if (!on_task) {
1094                 /* Acquire thread mutex to synchronize against
1095                  * thread_policy_set(). Consider reworking to separate qos
1096                  * fields, or locking the task in thread_policy_set.
1097                  * A more efficient model would be to make the thread bits
1098                  * authoritative.
1099                  */
1100                 thread_mtx_lock(thread);
1101         }
1102
1103         struct task_effective_policy prev =
1104                 (on_task) ? task->effective_policy : thread->effective_policy;
1105
1106         /*
1107          * Check for invalid transitions here for easier debugging
1108          * TODO: dump the structs as hex in the panic string
1109          */
1110         if (task == kernel_task && prev.all_sockets_bg != next.all_sockets_bg)
1111                 panic("unexpected network change for kernel task");
1112
1113         /* This is the point where the new values become visible to other threads */
1114         if (on_task)
1115                 task->effective_policy = next;
1116         else {
1117                 /* Preserve thread specific latency/throughput QoS modified via
1118                  * thread_policy_set(). Inelegant in the extreme, to be reworked.
1119                  *
1120                  * If thread QoS class is set, we don't need to preserve the previously set values.
1121                  * We should ensure to not accidentally preserve previous thread QoS values if you set a thread
1122                  * back to default QoS.
1123                  */
1124                 uint32_t lqos = thread->effective_policy.t_latency_qos, tqos = thread->effective_policy.t_through_qos;
1125
1126                 if (prev.thep_qos == THREAD_QOS_UNSPECIFIED && next.thep_qos == THREAD_QOS_UNSPECIFIED) {
1127                         next.t_latency_qos = lqos;
1128                         next.t_through_qos = tqos;
1129                 } else if (prev.thep_qos != THREAD_QOS_UNSPECIFIED && next.thep_qos == THREAD_QOS_UNSPECIFIED) {
1130                         next.t_latency_qos = 0;
1131                         next.t_through_qos = 0;
1132                 } else {
1133                         next.t_latency_qos = thread_qos_policy_params.qos_latency_qos[next.thep_qos];
1134                         next.t_through_qos = thread_qos_policy_params.qos_through_qos[next.thep_qos];
1135                 }
1136
1137                 thread_update_qos_cpu_time(thread, TRUE);
1138                 thread->effective_policy = next;
1139                 thread_mtx_unlock(thread);
1140         }
1141
1142         /* Don't do anything further to a half-formed task or thread */
1143         if (in_create)
1144                 return;
1145
1146         /*
1147          * Step 4:
1148          *  Pend updates that can't be done while holding the task lock
1149          */
1150
1151         if (prev.all_sockets_bg != next.all_sockets_bg)
1152                 pend_token->tpt_update_sockets = 1;
1153
1154         if (on_task) {
1155                 /* Only re-scan the timer list if the qos level is getting less strong */
1156                 if (prev.t_latency_qos > next.t_latency_qos)
1157                         pend_token->tpt_update_timers = 1;
1158
1159
1160                 if (prev.t_live_donor != next.t_live_donor)
1161                         pend_token->tpt_update_live_donor = 1;
1162         }
1163
1164         /*
1165          * Step 5:
1166          *  Update other subsystems as necessary if something has changed
1167          */
1168
1169         boolean_t update_throttle = (prev.io_tier != next.io_tier) ? TRUE : FALSE;
1170
1171         if (on_task) {
1172                 if (prev.t_suspended == 0 && next.t_suspended == 1 && task->active) {
1173                         task_hold_locked(task);
1174                         task_wait_locked(task, FALSE);
1175                 }
1176                 if (prev.t_suspended == 1 && next.t_suspended == 0 && task->active) {
1177                         task_release_locked(task);
1178                 }
1179
1180                 boolean_t update_threads = FALSE;
1181                 boolean_t update_sfi = FALSE;
1182
1183                 if (prev.bg_iotier          != next.bg_iotier        ||
1184                     prev.terminated         != next.terminated       ||
1185                     prev.t_qos_clamp        != next.t_qos_clamp      ||
1186                     prev.t_qos_ceiling      != next.t_qos_ceiling    ||
1187                     prev.qos_ui_is_urgent   != next.qos_ui_is_urgent ||
1188                     prev.darwinbg           != next.darwinbg)
1189                         update_threads = TRUE;
1190
1191                 /*
1192                  * A bit of a layering violation. We know what task policy attributes
1193                  * sfi_thread_classify() consults, so if they change, trigger SFI
1194                  * re-evaluation.
1195                  */
1196                 if ((prev.t_latency_qos != next.t_latency_qos) ||
1197                         (prev.t_role != next.t_role) ||
1198                         (prev.darwinbg != next.darwinbg) ||
1199                         (prev.t_sfi_managed != next.t_sfi_managed))
1200                         update_sfi = TRUE;
1201
1202 #if CONFIG_SCHED_SFI
1203                 if (prev.t_role != next.t_role && task_policy_update_coalition_focal_tasks(task, prev.t_role, next.t_role)) {
1204                         update_sfi = TRUE;
1205                         pend_token->tpt_update_coal_sfi = 1;
1206                 }
1207 #endif /* !CONFIG_SCHED_SFI */
1208
1209                 task_policy_update_task_locked(task, update_throttle, update_threads, update_sfi);
1210         } else {
1211                 int update_cpu = 0;
1212                 boolean_t update_sfi = FALSE;
1213                 boolean_t update_qos = FALSE;
1214
1215                 if (prev.lowpri_cpu != next.lowpri_cpu)
1216                         update_cpu = (next.lowpri_cpu ? DO_LOWPRI_CPU : UNDO_LOWPRI_CPU);
1217
1218                 if (prev.darwinbg != next.darwinbg ||
1219                     prev.thep_qos != next.thep_qos)
1220                         update_sfi = TRUE;
1221
1222                 if (prev.thep_qos           != next.thep_qos          ||
1223                     prev.thep_qos_relprio   != next.thep_qos_relprio  ||
1224                     prev.qos_ui_is_urgent   != next.qos_ui_is_urgent  ||
1225                     prev.terminated         != next.terminated) {
1226                         update_qos = TRUE;
1227                 }
1228
1229                 task_policy_update_thread_locked(thread, update_cpu, update_throttle, update_sfi, update_qos);
1230         }
1231 }
1232
1233
1234 #if CONFIG_SCHED_SFI
1235 /*
1236  * Yet another layering violation. We reach out and bang on the coalition directly.
1237  */
1238 static boolean_t
1239 task_policy_update_coalition_focal_tasks(task_t     task,
1240                                          int        prev_role,
1241                                          int        next_role)
1242 {
1243         boolean_t sfi_transition = FALSE;
1244
1245         /* task moving into/out-of the foreground */
1246         if (prev_role != TASK_FOREGROUND_APPLICATION && next_role == TASK_FOREGROUND_APPLICATION) {
1247                 if (task_coalition_adjust_focal_count(task, 1) == 1)
1248                         sfi_transition = TRUE;
1249         } else if (prev_role == TASK_FOREGROUND_APPLICATION && next_role != TASK_FOREGROUND_APPLICATION) {
1250                 if (task_coalition_adjust_focal_count(task, -1) == 0)
1251                         sfi_transition = TRUE;
1252         }
1253
1254         /* task moving into/out-of background */
1255         if (prev_role != TASK_BACKGROUND_APPLICATION && next_role == TASK_BACKGROUND_APPLICATION) {
1256                 if (task_coalition_adjust_nonfocal_count(task, 1) == 1)
1257                         sfi_transition = TRUE;
1258         } else if (prev_role == TASK_BACKGROUND_APPLICATION && next_role != TASK_BACKGROUND_APPLICATION) {
1259                 if (task_coalition_adjust_nonfocal_count(task, -1) == 0)
1260                         sfi_transition = TRUE;
1261         }
1262
1263         return sfi_transition;
1264 }
1265 #endif /* CONFIG_SCHED_SFI */
1266
1267 /* Despite the name, the thread's task is locked, the thread is not */
1268 void
1269 task_policy_update_thread_locked(thread_t thread,
1270                                  int update_cpu,
1271                                  boolean_t update_throttle,
1272                                  boolean_t update_sfi,
1273                                  boolean_t update_qos)
1274 {
1275         thread_precedence_policy_data_t policy;
1276
1277         if (update_throttle) {
1278                 rethrottle_thread(thread->uthread);
1279         }
1280
1281         if (update_sfi) {
1282                 sfi_reevaluate(thread);
1283         }
1284
1285         /*
1286          * TODO: pidbind needs to stuff remembered importance into saved_importance
1287          * properly deal with bg'ed threads being pidbound and unbging while pidbound
1288          *
1289          * TODO: A BG thread's priority is 0 on desktop and 4 on embedded.  Need to reconcile this.
1290          * */
1291         if (update_cpu == DO_LOWPRI_CPU) {
1292                 thread->saved_importance = thread->importance;
1293                 policy.importance = INT_MIN;
1294         } else if (update_cpu == UNDO_LOWPRI_CPU) {
1295                 policy.importance = thread->saved_importance;
1296                 thread->saved_importance = 0;
1297         }
1298
1299         /* Takes thread lock and thread mtx lock */
1300         if (update_cpu)
1301                 thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
1302                                            (thread_policy_t)&policy,
1303                                            THREAD_PRECEDENCE_POLICY_COUNT);
1304
1305         if (update_qos)
1306                 thread_recompute_qos(thread);
1307 }
1308
1309 /*
1310  * Calculate priority on a task, loop through its threads, and tell them about
1311  * priority changes and throttle changes.
1312  */
1313 void
1314 task_policy_update_task_locked(task_t    task,
1315                                boolean_t update_throttle,
1316                                boolean_t update_threads,
1317                                boolean_t update_sfi)
1318 {
1319         boolean_t update_priority = FALSE;
1320
1321         if (task == kernel_task)
1322                 panic("Attempting to set task policy on kernel_task");
1323
1324         int priority     = BASEPRI_DEFAULT;
1325         int max_priority = MAXPRI_USER;
1326
1327         if (proc_get_effective_task_policy(task, TASK_POLICY_LOWPRI_CPU)) {
1328                 priority = MAXPRI_THROTTLE;
1329                 max_priority = MAXPRI_THROTTLE;
1330         } else if (proc_get_effective_task_policy(task, TASK_POLICY_SUPPRESSED_CPU)) {
1331                 priority = MAXPRI_SUPPRESSED;
1332                 max_priority = MAXPRI_SUPPRESSED;
1333         } else {
1334                 switch (proc_get_effective_task_policy(task, TASK_POLICY_ROLE)) {
1335                         case TASK_CONTROL_APPLICATION:
1336                                 priority = BASEPRI_CONTROL;
1337                                 break;
1338                         case TASK_GRAPHICS_SERVER:
1339                                 priority = BASEPRI_GRAPHICS;
1340                                 max_priority = MAXPRI_RESERVED;
1341                                 break;
1342                         default:
1343                                 break;
1344                 }
1345
1346                 /* factor in 'nice' value */
1347                 priority += task->importance;
1348
1349                 if (task->effective_policy.t_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1350                         int qos_clamp_priority = thread_qos_policy_params.qos_pri[task->effective_policy.t_qos_clamp];
1351
1352                         priority        = MIN(priority, qos_clamp_priority);
1353                         max_priority    = MIN(max_priority, qos_clamp_priority);
1354                 }
1355         }
1356
1357         /* avoid extra work if priority isn't changing */
1358         if (task->priority != priority || task->max_priority != max_priority) {
1359                 update_priority = TRUE;
1360
1361                 /* update the scheduling priority for the task */
1362                 task->max_priority = max_priority;
1363
1364                 if (priority > task->max_priority)
1365                         priority = task->max_priority;
1366                 else if (priority < MINPRI)
1367                         priority = MINPRI;
1368
1369                 task->priority = priority;
1370         }
1371
1372         /* Loop over the threads in the task only once, and only if necessary */
1373         if (update_threads || update_throttle || update_priority || update_sfi ) {
1374                 thread_t thread;
1375
1376                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1377                         if (update_priority) {
1378                                 thread_mtx_lock(thread);
1379
1380                                 thread_task_priority(thread, priority, max_priority);
1381
1382                                 thread_mtx_unlock(thread);
1383                         }
1384
1385                         if (update_throttle) {
1386                                 rethrottle_thread(thread->uthread);
1387                         }
1388
1389                         if (update_sfi) {
1390                                 sfi_reevaluate(thread);
1391                         }
1392
1393                         if (update_threads) {
1394                                 thread->requested_policy.bg_iotier  = task->effective_policy.bg_iotier;
1395                                 thread->requested_policy.terminated = task->effective_policy.terminated;
1396
1397                                 task_policy_update_internal_locked(task, thread, FALSE, NULL);
1398                                 /*  The thread policy must not emit any completion actions due to this change. */
1399                         }
1400                 }
1401         }
1402 }
1403
1404 #if CONFIG_SCHED_SFI
1405 /* coalition object is locked */
1406 static void
1407 task_sfi_reevaluate_cb(coalition_t coal, void *ctx, task_t task)
1408 {
1409         thread_t thread;
1410
1411         /* unused for now */
1412         (void)coal;
1413
1414         /* skip the task we're re-evaluating on behalf of: it's already updated */
1415         if (task == (task_t)ctx)
1416                 return;
1417
1418         task_lock(task);
1419
1420         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1421                 sfi_reevaluate(thread);
1422         }
1423
1424         task_unlock(task);
1425 }
1426 #endif /* CONFIG_SCHED_SFI */
1427
1428 /*
1429  * Called with task unlocked to do things that can't be done while holding the task lock
1430  */
1431 void
1432 task_policy_update_complete_unlocked(task_t task, thread_t thread, task_pend_token_t pend_token)
1433 {
1434         boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1435
1436 #ifdef MACH_BSD
1437         if (pend_token->tpt_update_sockets)
1438                 proc_apply_task_networkbg(task->bsd_info, thread);
1439 #endif /* MACH_BSD */
1440
1441         if (on_task) {
1442                 /* The timer throttle has been removed or reduced, we need to look for expired timers and fire them */
1443                 if (pend_token->tpt_update_timers)
1444                         ml_timer_evaluate();
1445
1446
1447                 if (pend_token->tpt_update_live_donor)
1448                         task_importance_update_live_donor(task);
1449
1450 #if CONFIG_SCHED_SFI
1451                 /* use the resource coalition for SFI re-evaluation */
1452                 if (pend_token->tpt_update_coal_sfi)
1453                         coalition_for_each_task(task->coalition[COALITION_TYPE_RESOURCE],
1454                                                 (void *)task, task_sfi_reevaluate_cb);
1455 #endif /* CONFIG_SCHED_SFI */
1456         }
1457 }
1458
1459 /*
1460  * Initiate a task policy state transition
1461  *
1462  * Everything that modifies requested except functions that need to hold the task lock
1463  * should use this function
1464  *
1465  * Argument validation should be performed before reaching this point.
1466  *
1467  * TODO: Do we need to check task->active or thread->active?
1468  */
1469 void
1470 proc_set_task_policy(task_t     task,
1471                      thread_t   thread,
1472                      int        category,
1473                      int        flavor,
1474                      int        value)
1475 {
1476         struct task_pend_token pend_token = {};
1477
1478         task_lock(task);
1479
1480         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1481                                   (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_START,
1482                                   targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), value, 0);
1483
1484         proc_set_task_policy_locked(task, thread, category, flavor, value);
1485
1486         task_policy_update_locked(task, thread, &pend_token);
1487
1488         task_unlock(task);
1489
1490         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1491                                   (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_END,
1492                                   targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), tpending(&pend_token), 0);
1493
1494         task_policy_update_complete_unlocked(task, thread, &pend_token);
1495 }
1496
1497 /*
1498  * Initiate a task policy state transition on a thread with its TID
1499  * Useful if you cannot guarantee the thread won't get terminated
1500  */
1501 void
1502 proc_set_task_policy_thread(task_t     task,
1503                             uint64_t   tid,
1504                             int        category,
1505                             int        flavor,
1506                             int        value)
1507 {
1508         thread_t thread;
1509         thread_t self = current_thread();
1510         struct task_pend_token pend_token = {};
1511
1512         task_lock(task);
1513
1514         if (tid == TID_NULL || tid == self->thread_id)
1515                 thread = self;
1516         else
1517                 thread = task_findtid(task, tid);
1518
1519         if (thread == THREAD_NULL) {
1520                 task_unlock(task);
1521                 return;
1522         }
1523
1524         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1525                                   (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1526                                   targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), value, 0);
1527
1528         proc_set_task_policy_locked(task, thread, category, flavor, value);
1529
1530         task_policy_update_locked(task, thread, &pend_token);
1531
1532         task_unlock(task);
1533
1534         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1535                                   (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1536                                   targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), tpending(&pend_token), 0);
1537
1538         task_policy_update_complete_unlocked(task, thread, &pend_token);
1539 }
1540
1541 /*
1542  * Variant of proc_set_task_policy() that sets two scalars in the requested policy structure.
1543  * Same locking rules apply.
1544  */
1545 void
1546 proc_set_task_policy2(task_t task, thread_t thread, int category, int flavor, int value1, int value2)
1547 {
1548         struct task_pend_token pend_token = {};
1549
1550         task_lock(task);
1551
1552         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1553                                   (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_START,
1554                                   targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), value1, 0);
1555
1556         proc_set_task_policy2_locked(task, thread, category, flavor, value1, value2);
1557
1558         task_policy_update_locked(task, thread, &pend_token);
1559
1560         task_unlock(task);
1561
1562         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1563                                   (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_END,
1564                                   targetid(task, thread), trequested_0(task, thread), trequested_0(task, thread), tpending(&pend_token), 0);
1565
1566         task_policy_update_complete_unlocked(task, thread, &pend_token);
1567 }
1568
1569 /*
1570  * Set the requested state for a specific flavor to a specific value.
1571  *
1572  *  TODO:
1573  *  Verify that arguments to non iopol things are 1 or 0
1574  */
1575 static void
1576 proc_set_task_policy_locked(task_t      task,
1577                             thread_t    thread,
1578                             int         category,
1579                             int         flavor,
1580                             int         value)
1581 {
1582         boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1583
1584         int tier, passive;
1585
1586         struct task_requested_policy requested =
1587                 (on_task) ? task->requested_policy : thread->requested_policy;
1588
1589         switch (flavor) {
1590
1591         /* Category: EXTERNAL and INTERNAL, thread and task */
1592
1593                 case TASK_POLICY_DARWIN_BG:
1594                         if (category == TASK_POLICY_EXTERNAL)
1595                                 requested.ext_darwinbg = value;
1596                         else
1597                                 requested.int_darwinbg = value;
1598                         break;
1599
1600                 case TASK_POLICY_IOPOL:
1601                         proc_iopol_to_tier(value, &tier, &passive);
1602                         if (category == TASK_POLICY_EXTERNAL) {
1603                                 requested.ext_iotier  = tier;
1604                                 requested.ext_iopassive = passive;
1605                         } else {
1606                                 requested.int_iotier  = tier;
1607                                 requested.int_iopassive = passive;
1608                         }
1609                         break;
1610
1611                 case TASK_POLICY_IO:
1612                         if (category == TASK_POLICY_EXTERNAL)
1613                                 requested.ext_iotier = value;
1614                         else
1615                                 requested.int_iotier = value;
1616                         break;
1617
1618                 case TASK_POLICY_PASSIVE_IO:
1619                         if (category == TASK_POLICY_EXTERNAL)
1620                                 requested.ext_iopassive = value;
1621                         else
1622                                 requested.int_iopassive = value;
1623                         break;
1624
1625         /* Category: INTERNAL, task only */
1626
1627                 case TASK_POLICY_DARWIN_BG_IOPOL:
1628                         assert(on_task && category == TASK_POLICY_INTERNAL);
1629                         proc_iopol_to_tier(value, &tier, &passive);
1630                         requested.bg_iotier = tier;
1631                         break;
1632
1633         /* Category: ATTRIBUTE, task only */
1634
1635                 case TASK_POLICY_TAL:
1636                         assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1637                         requested.t_tal_enabled = value;
1638                         break;
1639
1640                 case TASK_POLICY_BOOST:
1641                         assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1642                         requested.t_boosted = value;
1643                         break;
1644
1645                 case TASK_POLICY_ROLE:
1646                         assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1647                         requested.t_role = value;
1648                         break;
1649
1650                 case TASK_POLICY_TERMINATED:
1651                         assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1652                         requested.terminated = value;
1653                         break;
1654                 case TASK_BASE_LATENCY_QOS_POLICY:
1655                         assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1656                         requested.t_base_latency_qos = value;
1657                         break;
1658                 case TASK_BASE_THROUGHPUT_QOS_POLICY:
1659                         assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1660                         requested.t_base_through_qos = value;
1661                         break;
1662                 case TASK_POLICY_SFI_MANAGED:
1663                         assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1664                         requested.t_sfi_managed = value;
1665                         break;
1666
1667         /* Category: ATTRIBUTE, thread only */
1668
1669                 case TASK_POLICY_PIDBIND_BG:
1670                         assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1671                         requested.th_pidbind_bg = value;
1672                         break;
1673
1674                 case TASK_POLICY_WORKQ_BG:
1675                         assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1676                         requested.th_workq_bg = value;
1677                         break;
1678
1679                 case TASK_POLICY_QOS:
1680                         assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1681                         requested.thrp_qos = value;
1682                         break;
1683
1684                 case TASK_POLICY_QOS_OVERRIDE:
1685                         assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1686                         requested.thrp_qos_override = value;
1687                         break;
1688
1689                 default:
1690                         panic("unknown task policy: %d %d %d", category, flavor, value);
1691                         break;
1692         }
1693
1694         if (on_task)
1695                 task->requested_policy = requested;
1696         else
1697                 thread->requested_policy = requested;
1698 }
1699
1700 /*
1701  * Variant of proc_set_task_policy_locked() that sets two scalars in the requested policy structure.
1702  */
1703 static void
1704 proc_set_task_policy2_locked(task_t      task,
1705                              thread_t    thread,
1706                              int         category,
1707                              int         flavor,
1708                              int         value1,
1709                              int         value2)
1710 {
1711         boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1712
1713         struct task_requested_policy requested =
1714                 (on_task) ? task->requested_policy : thread->requested_policy;
1715
1716         switch (flavor) {
1717
1718         /* Category: ATTRIBUTE, task only */
1719
1720                 case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS:
1721                         assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1722                         requested.t_base_latency_qos = value1;
1723                         requested.t_base_through_qos = value2;
1724                         break;
1725
1726                 case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS:
1727                         assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1728                         requested.t_over_latency_qos = value1;
1729                         requested.t_over_through_qos = value2;
1730                         break;
1731
1732         /* Category: ATTRIBUTE, thread only */
1733
1734                 case TASK_POLICY_QOS_AND_RELPRIO:
1735
1736                         assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1737                         requested.thrp_qos = value1;
1738                         requested.thrp_qos_relprio = value2;
1739                         DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
1740                         break;
1741
1742                 default:
1743                         panic("unknown task policy: %d %d %d %d", category, flavor, value1, value2);
1744                         break;
1745         }
1746
1747         if (on_task)
1748                 task->requested_policy = requested;
1749         else
1750                 thread->requested_policy = requested;
1751 }
1752
1753
1754 /*
1755  * Gets what you set. Effective values may be different.
1756  */
1757 int
1758 proc_get_task_policy(task_t     task,
1759                      thread_t   thread,
1760                      int        category,
1761                      int        flavor)
1762 {
1763         boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1764
1765         int value = 0;
1766
1767         task_lock(task);
1768
1769         struct task_requested_policy requested =
1770                 (on_task) ? task->requested_policy : thread->requested_policy;
1771
1772         switch (flavor) {
1773                 case TASK_POLICY_DARWIN_BG:
1774                         if (category == TASK_POLICY_EXTERNAL)
1775                                 value = requested.ext_darwinbg;
1776                         else
1777                                 value = requested.int_darwinbg;
1778                         break;
1779                 case TASK_POLICY_IOPOL:
1780                         if (category == TASK_POLICY_EXTERNAL)
1781                                 value = proc_tier_to_iopol(requested.ext_iotier,
1782                                                             requested.ext_iopassive);
1783                         else
1784                                 value = proc_tier_to_iopol(requested.int_iotier,
1785                                                             requested.int_iopassive);
1786                         break;
1787                 case TASK_POLICY_IO:
1788                         if (category == TASK_POLICY_EXTERNAL)
1789                                 value = requested.ext_iotier;
1790                         else
1791                                 value = requested.int_iotier;
1792                         break;
1793                 case TASK_POLICY_PASSIVE_IO:
1794                         if (category == TASK_POLICY_EXTERNAL)
1795                                 value = requested.ext_iopassive;
1796                         else
1797                                 value = requested.int_iopassive;
1798                         break;
1799                 case TASK_POLICY_DARWIN_BG_IOPOL:
1800                         assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1801                         value = proc_tier_to_iopol(requested.bg_iotier, 0);
1802                         break;
1803                 case TASK_POLICY_ROLE:
1804                         assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1805                         value = requested.t_role;
1806                         break;
1807                 case TASK_POLICY_SFI_MANAGED:
1808                         assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1809                         value = requested.t_sfi_managed;
1810                         break;
1811                 case TASK_POLICY_QOS:
1812                         assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1813                         value = requested.thrp_qos;
1814                         break;
1815                 case TASK_POLICY_QOS_OVERRIDE:
1816                         assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1817                         value = requested.thrp_qos_override;
1818                         break;
1819                 default:
1820                         panic("unknown policy_flavor %d", flavor);
1821                         break;
1822         }
1823
1824         task_unlock(task);
1825
1826         return value;
1827 }
1828
1829 /*
1830  * Variant of proc_get_task_policy() that returns two scalar outputs.
1831  */
1832 void
1833 proc_get_task_policy2(task_t task, thread_t thread, int category __unused, int flavor, int *value1, int *value2)
1834 {
1835         boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1836
1837         task_lock(task);
1838
1839         struct task_requested_policy requested =
1840                 (on_task) ? task->requested_policy : thread->requested_policy;
1841
1842         switch (flavor) {
1843                 /* TASK attributes */
1844                 case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS:
1845                         assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1846                         *value1 = requested.t_base_latency_qos;
1847                         *value2 = requested.t_base_through_qos;
1848                         break;
1849
1850                 case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS:
1851                         assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1852                         *value1 = requested.t_over_latency_qos;
1853                         *value2 = requested.t_over_through_qos;
1854                         break;
1855
1856                 /* THREAD attributes */
1857                 case TASK_POLICY_QOS_AND_RELPRIO:
1858                         assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1859                         *value1 = requested.thrp_qos;
1860                         *value2 = requested.thrp_qos_relprio;
1861                         break;
1862
1863                 default:
1864                         panic("unknown policy_flavor %d", flavor);
1865                         break;
1866         }
1867
1868         task_unlock(task);
1869 }
1870
1871
1872 /*
1873  * Functions for querying effective state for relevant subsystems
1874  * ONLY the relevant subsystem should query these.
1875  * NEVER take a value from one of the 'effective' functions and stuff it into a setter.
1876  */
1877
1878 int
1879 proc_get_effective_task_policy(task_t task, int flavor)
1880 {
1881         return proc_get_effective_policy(task, THREAD_NULL, flavor);
1882 }
1883
1884 int
1885 proc_get_effective_thread_policy(thread_t thread, int flavor)
1886 {
1887         return proc_get_effective_policy(thread->task, thread, flavor);
1888 }
1889
1890 /*
1891  * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
1892  *
1893  * NOTE: This accessor does not take the task lock.
1894  * Notifications of state updates need to be externally synchronized with state queries.
1895  * This routine *MUST* remain interrupt safe, as it is potentially invoked
1896  * within the context of a timer interrupt.  It is also called in KDP context for stackshot.
1897  */
1898 static int
1899 proc_get_effective_policy(task_t   task,
1900                           thread_t thread,
1901                           int      flavor)
1902 {
1903         boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1904         int value = 0;
1905
1906         switch (flavor) {
1907                 case TASK_POLICY_DARWIN_BG:
1908                         /*
1909                          * This backs the KPI call proc_pidbackgrounded to find
1910                          * out if a pid is backgrounded,
1911                          * as well as proc_get_effective_thread_policy.
1912                          * Its main use is within the timer layer, as well as
1913                          * prioritizing requests to the graphics system.
1914                          * Returns 1 for background mode, 0 for normal mode
1915                          */
1916                         if (on_task)
1917                                 value = task->effective_policy.darwinbg;
1918                         else
1919                                 value = (task->effective_policy.darwinbg ||
1920                                           thread->effective_policy.darwinbg) ? 1 : 0;
1921                         break;
1922                 case TASK_POLICY_IO:
1923                         /*
1924                          * The I/O system calls here to find out what throttling tier to apply to an operation.
1925                          * Returns THROTTLE_LEVEL_* values. Some userspace spinlock operations can apply
1926                          * a temporary iotier override to make the I/O more aggressive to get the lock
1927                          * owner to release the spinlock.
1928                          */
1929                         if (on_task)
1930                                 value = task->effective_policy.io_tier;
1931                         else {
1932                                 value = MAX(task->effective_policy.io_tier,
1933                                              thread->effective_policy.io_tier);
1934                                 if (thread->iotier_override != THROTTLE_LEVEL_NONE)
1935                                         value = MIN(value, thread->iotier_override);
1936                         }
1937                         break;
1938                 case TASK_POLICY_PASSIVE_IO:
1939                         /*
1940                          * The I/O system calls here to find out whether an operation should be passive.
1941                          * (i.e. not cause operations with lower throttle tiers to be throttled)
1942                          * Returns 1 for passive mode, 0 for normal mode.
1943                          * If a userspace spinlock has applied an override, that I/O should always
1944                          * be passive to avoid self-throttling when the override is removed and lower
1945                          * iotier I/Os are issued.
1946                          */
1947                         if (on_task)
1948                                 value = task->effective_policy.io_passive;
1949                         else {
1950                                 int io_tier = MAX(task->effective_policy.io_tier, thread->effective_policy.io_tier);
1951                                 boolean_t override_in_effect = (thread->iotier_override != THROTTLE_LEVEL_NONE) && (thread->iotier_override < io_tier);
1952
1953                                 value = (task->effective_policy.io_passive ||
1954                                           thread->effective_policy.io_passive || override_in_effect) ? 1 : 0;
1955                         }
1956                         break;
1957                 case TASK_POLICY_ALL_SOCKETS_BG:
1958                         /*
1959                          * do_background_socket() calls this to determine what it should do to the proc's sockets
1960                          * Returns 1 for background mode, 0 for normal mode
1961                          *
1962                          * This consults both thread and task so un-DBGing a thread while the task is BG
1963                          * doesn't get you out of the network throttle.
1964                          */
1965                         if (on_task)
1966                                 value = task->effective_policy.all_sockets_bg;
1967                         else
1968                                 value = (task->effective_policy.all_sockets_bg ||
1969                                          thread->effective_policy.all_sockets_bg) ? 1 : 0;
1970                         break;
1971                 case TASK_POLICY_NEW_SOCKETS_BG:
1972                         /*
1973                          * socreate() calls this to determine if it should mark a new socket as background
1974                          * Returns 1 for background mode, 0 for normal mode
1975                          */
1976                         if (on_task)
1977                                 value = task->effective_policy.new_sockets_bg;
1978                         else
1979                                 value = (task->effective_policy.new_sockets_bg ||
1980                                           thread->effective_policy.new_sockets_bg) ? 1 : 0;
1981                         break;
1982                 case TASK_POLICY_LOWPRI_CPU:
1983                         /*
1984                          * Returns 1 for low priority cpu mode, 0 for normal mode
1985                          */
1986                         if (on_task)
1987                                 value = task->effective_policy.lowpri_cpu;
1988                         else
1989                                 value = (task->effective_policy.lowpri_cpu ||
1990                                           thread->effective_policy.lowpri_cpu) ? 1 : 0;
1991                         break;
1992                 case TASK_POLICY_SUPPRESSED_CPU:
1993                         /*
1994                          * Returns 1 for suppressed cpu mode, 0 for normal mode
1995                          */
1996                         assert(on_task);
1997                         value = task->effective_policy.t_suppressed_cpu;
1998                         break;
1999                 case TASK_POLICY_LATENCY_QOS:
2000                         /*
2001                          * timer arming calls into here to find out the timer coalescing level
2002                          * Returns a QoS tier (0-6)
2003                          */
2004                         if (on_task) {
2005                                 value = task->effective_policy.t_latency_qos;
2006                         } else {
2007                                 value = MAX(task->effective_policy.t_latency_qos, thread->effective_policy.t_latency_qos);
2008                         }
2009                         break;
2010                 case TASK_POLICY_THROUGH_QOS:
2011                         /*
2012                          * Returns a QoS tier (0-6)
2013                          */
2014                         assert(on_task);
2015                         value = task->effective_policy.t_through_qos;
2016                         break;
2017                 case TASK_POLICY_ROLE:
2018                         assert(on_task);
2019                         value = task->effective_policy.t_role;
2020                         break;
2021                 case TASK_POLICY_WATCHERS_BG:
2022                         assert(on_task);
2023                         value = task->effective_policy.t_watchers_bg;
2024                         break;
2025                 case TASK_POLICY_SFI_MANAGED:
2026                         assert(on_task);
2027                         value = task->effective_policy.t_sfi_managed;
2028                         break;
2029                 case TASK_POLICY_QOS:
2030                         assert(!on_task);
2031                         value = thread->effective_policy.thep_qos;
2032                         break;
2033                 default:
2034                         panic("unknown policy_flavor %d", flavor);
2035                         break;
2036         }
2037
2038         return value;
2039 }
2040
2041 /*
2042  * Convert from IOPOL_* values to throttle tiers.
2043  *
2044  * TODO: Can this be made more compact, like an array lookup
2045  * Note that it is possible to support e.g. IOPOL_PASSIVE_STANDARD in the future
2046  */
2047
2048 static void
2049 proc_iopol_to_tier(int iopolicy, int *tier, int *passive)
2050 {
2051         *passive = 0;
2052         *tier = 0;
2053         switch (iopolicy) {
2054                 case IOPOL_IMPORTANT:
2055                         *tier = THROTTLE_LEVEL_TIER0;
2056                         break;
2057                 case IOPOL_PASSIVE:
2058                         *tier = THROTTLE_LEVEL_TIER0;
2059                         *passive = 1;
2060                         break;
2061                 case IOPOL_STANDARD:
2062                         *tier = THROTTLE_LEVEL_TIER1;
2063                         break;
2064                 case IOPOL_UTILITY:
2065                         *tier = THROTTLE_LEVEL_TIER2;
2066                         break;
2067                 case IOPOL_THROTTLE:
2068                         *tier = THROTTLE_LEVEL_TIER3;
2069                         break;
2070                 default:
2071                         panic("unknown I/O policy %d", iopolicy);
2072                         break;
2073         }
2074 }
2075
2076 static int
2077 proc_tier_to_iopol(int tier, int passive)
2078 {
2079         if (passive == 1) {
2080                 switch (tier) {
2081                         case THROTTLE_LEVEL_TIER0:
2082                                 return IOPOL_PASSIVE;
2083                                 break;
2084                         default:
2085                                 panic("unknown passive tier %d", tier);
2086                                 return IOPOL_DEFAULT;
2087                                 break;
2088                 }
2089         } else {
2090                 switch (tier) {
2091                         case THROTTLE_LEVEL_NONE:
2092                         case THROTTLE_LEVEL_TIER0:
2093                                 return IOPOL_DEFAULT;
2094                                 break;
2095                         case THROTTLE_LEVEL_TIER1:
2096                                 return IOPOL_STANDARD;
2097                                 break;
2098                         case THROTTLE_LEVEL_TIER2:
2099                                 return IOPOL_UTILITY;
2100                                 break;
2101                         case THROTTLE_LEVEL_TIER3:
2102                                 return IOPOL_THROTTLE;
2103                                 break;
2104                         default:
2105                                 panic("unknown tier %d", tier);
2106                                 return IOPOL_DEFAULT;
2107                                 break;
2108                 }
2109         }
2110 }
2111
2112 int
2113 proc_darwin_role_to_task_role(int darwin_role, int* task_role)
2114 {
2115         integer_t role = TASK_UNSPECIFIED;
2116
2117         switch (darwin_role) {
2118                 case PRIO_DARWIN_ROLE_DEFAULT:
2119                         role = TASK_UNSPECIFIED;
2120                         break;
2121                 case PRIO_DARWIN_ROLE_UI_FOCAL:
2122                         role = TASK_FOREGROUND_APPLICATION;
2123                         break;
2124                 case PRIO_DARWIN_ROLE_UI:
2125                         role = TASK_DEFAULT_APPLICATION;
2126                         break;
2127                 case PRIO_DARWIN_ROLE_NON_UI:
2128                         role = TASK_NONUI_APPLICATION;
2129                         break;
2130                 case PRIO_DARWIN_ROLE_UI_NON_FOCAL:
2131                         role = TASK_BACKGROUND_APPLICATION;
2132                         break;
2133                 case PRIO_DARWIN_ROLE_TAL_LAUNCH:
2134                         role = TASK_THROTTLE_APPLICATION;
2135                         break;
2136                 default:
2137                         return EINVAL;
2138         }
2139
2140         *task_role = role;
2141
2142         return 0;
2143 }
2144
2145 int
2146 proc_task_role_to_darwin_role(int task_role)
2147 {
2148         switch (task_role) {
2149                 case TASK_FOREGROUND_APPLICATION:
2150                         return PRIO_DARWIN_ROLE_UI_FOCAL;
2151                 case TASK_BACKGROUND_APPLICATION:
2152                         return PRIO_DARWIN_ROLE_UI;
2153                 case TASK_NONUI_APPLICATION:
2154                         return PRIO_DARWIN_ROLE_NON_UI;
2155                 case TASK_DEFAULT_APPLICATION:
2156                         return PRIO_DARWIN_ROLE_UI_NON_FOCAL;
2157                 case TASK_THROTTLE_APPLICATION:
2158                         return PRIO_DARWIN_ROLE_TAL_LAUNCH;
2159                 case TASK_UNSPECIFIED:
2160                 default:
2161                         return PRIO_DARWIN_ROLE_DEFAULT;
2162         }
2163 }
2164
2165
2166 /* apply internal backgrounding for workqueue threads */
2167 int
2168 proc_apply_workq_bgthreadpolicy(thread_t thread)
2169 {
2170         if (thread == THREAD_NULL)
2171                 return ESRCH;
2172
2173         proc_set_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE,
2174                              TASK_POLICY_WORKQ_BG, TASK_POLICY_ENABLE);
2175
2176         return(0);
2177 }
2178
2179 /*
2180  * remove internal backgrounding for workqueue threads
2181  * does NOT go find sockets created while BG and unbackground them
2182  */
2183 int
2184 proc_restore_workq_bgthreadpolicy(thread_t thread)
2185 {
2186         if (thread == THREAD_NULL)
2187                 return ESRCH;
2188
2189         proc_set_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE,
2190                              TASK_POLICY_WORKQ_BG, TASK_POLICY_DISABLE);
2191
2192         return(0);
2193 }
2194
2195 /* here for temporary compatibility */
2196 int
2197 proc_setthread_saved_importance(__unused thread_t thread, __unused int importance)
2198 {
2199         return(0);
2200 }
2201
2202 /*
2203  * Set an override on the thread which is consulted with a
2204  * higher priority than the task/thread policy. This should
2205  * only be set for temporary grants until the thread
2206  * returns to the userspace boundary
2207  *
2208  * We use atomic operations to swap in the override, with
2209  * the assumption that the thread itself can
2210  * read the override and clear it on return to userspace.
2211  *
2212  * No locking is performed, since it is acceptable to see
2213  * a stale override for one loop through throttle_lowpri_io().
2214  * However a thread reference must be held on the thread.
2215  */
2216
2217 void set_thread_iotier_override(thread_t thread, int policy)
2218 {
2219         int current_override;
2220
2221         /* Let most aggressive I/O policy win until user boundary */
2222         do {
2223                 current_override = thread->iotier_override;
2224
2225                 if (current_override != THROTTLE_LEVEL_NONE)
2226                         policy = MIN(current_override, policy);
2227
2228                 if (current_override == policy) {
2229                         /* no effective change */
2230                         return;
2231                 }
2232         } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2233
2234         /*
2235          * Since the thread may be currently throttled,
2236          * re-evaluate tiers and potentially break out
2237          * of an msleep
2238          */
2239         rethrottle_thread(thread->uthread);
2240 }
2241
2242 /*
2243  * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2244  * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2245  * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2246  * priority thread. In these cases, we attempt to propagate the priority token, as long
2247  * as the subsystem informs us of the relationships between the threads. The userspace
2248  * synchronization subsystem should maintain the information of owner->resource and
2249  * resource->waiters itself.
2250  */
2251
2252 /*
2253  * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2254  * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2255  * to be handled specially in the future, but for now it's fine to slam
2256  * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2257  */
2258 static void _canonicalize_resource_and_type(user_addr_t *resource, int *resource_type) {
2259         if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2260                 /* Map all input resource/type to a single one */
2261                 *resource = USER_ADDR_NULL;
2262                 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2263         } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2264                 /* no transform */
2265         } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH) {
2266                 /* Map all dispatch overrides to a single one, to avoid memory overhead */
2267                 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE) {
2268                         *resource = USER_ADDR_NULL;
2269                 }
2270         } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2271                 /* Map all mutex overrides to a single one, to avoid memory overhead */
2272                 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2273                         *resource = USER_ADDR_NULL;
2274                 }
2275         }
2276 }
2277
2278 /* This helper routine finds an existing override if known. Locking should be done by caller */
2279 static struct thread_qos_override *_find_qos_override(thread_t thread, user_addr_t resource, int resource_type) {
2280         struct thread_qos_override *override;
2281
2282         override = thread->overrides;
2283         while (override) {
2284                 if (override->override_resource == resource &&
2285                         override->override_resource_type == resource_type) {
2286                         return override;
2287                 }
2288
2289                 override = override->override_next;
2290         }
2291
2292         return NULL;
2293 }
2294
2295 static void _find_and_decrement_qos_override(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset, struct thread_qos_override **free_override_list) {
2296         struct thread_qos_override *override, *override_prev;
2297
2298         override_prev = NULL;
2299         override = thread->overrides;
2300         while (override) {
2301                 struct thread_qos_override *override_next = override->override_next;
2302
2303                 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2304                         override->override_resource_type == resource_type) {
2305                         if (reset) {
2306                                 override->override_contended_resource_count = 0;
2307                         } else {
2308                                 override->override_contended_resource_count--;
2309                         }
2310
2311                         if (override->override_contended_resource_count == 0) {
2312                                 if (override_prev == NULL) {
2313                                         thread->overrides = override_next;
2314                                 } else {
2315                                         override_prev->override_next = override_next;
2316                                 }
2317
2318                                 /* Add to out-param for later zfree */
2319                                 override->override_next = *free_override_list;
2320                                 *free_override_list = override;
2321                         } else {
2322                                 override_prev = override;
2323                         }
2324
2325                         if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2326                                 return;
2327                         }
2328                 } else {
2329                         override_prev = override;
2330                 }
2331
2332                 override = override_next;
2333         }
2334 }
2335
2336 /* This helper recalculates the current requested override using the policy selected at boot */
2337 static int _calculate_requested_qos_override(thread_t thread)
2338 {
2339         if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2340                 return THREAD_QOS_UNSPECIFIED;
2341         }
2342
2343         /* iterate over all overrides and calculate MAX */
2344         struct thread_qos_override *override;
2345         int qos_override = THREAD_QOS_UNSPECIFIED;
2346
2347         override = thread->overrides;
2348         while (override) {
2349                 if (qos_override_mode != QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH ||
2350                         override->override_resource_type != THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE) {
2351                         qos_override = MAX(qos_override, override->override_qos);
2352                 }
2353
2354                 override = override->override_next;
2355         }
2356
2357         return qos_override;
2358 }
2359
2360 boolean_t proc_thread_qos_add_override(task_t task, thread_t thread, uint64_t tid, int override_qos, boolean_t first_override_for_resource, user_addr_t resource, int resource_type)
2361 {
2362         thread_t        self = current_thread();
2363         struct task_pend_token pend_token = {};
2364
2365         /* XXX move to thread mutex when thread policy does */
2366         task_lock(task);
2367
2368         /*
2369          * If thread is passed, it is assumed to be most accurate, since the caller must have an explicit (or implicit) reference
2370          * to the thread
2371          */
2372
2373         if (thread != THREAD_NULL) {
2374                 assert(task == thread->task);
2375         } else {
2376                 if (tid == self->thread_id) {
2377                         thread = self;
2378                 } else {
2379                         thread = task_findtid(task, tid);
2380
2381                         if (thread == THREAD_NULL) {
2382                                 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2383                                                                           tid, 0, 0xdead, 0, 0);
2384                                 task_unlock(task);
2385                                 return FALSE;
2386                         }
2387                 }
2388         }
2389
2390         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2391                                                   thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2392
2393         DTRACE_BOOST5(qos_add_override_pre, uint64_t, tid, uint64_t, thread->requested_policy.thrp_qos,
2394                 uint64_t, thread->effective_policy.thep_qos, int, override_qos, boolean_t, first_override_for_resource);
2395
2396         struct task_requested_policy requested = thread->requested_policy;
2397         struct thread_qos_override *override;
2398         struct thread_qos_override *deferred_free_override = NULL;
2399         int new_qos_override, prev_qos_override;
2400         int new_effective_qos;
2401         boolean_t has_thread_reference = FALSE;
2402
2403         _canonicalize_resource_and_type(&resource, &resource_type);
2404
2405         if (first_override_for_resource) {
2406                 override = _find_qos_override(thread, resource, resource_type);
2407                 if (override) {
2408                         override->override_contended_resource_count++;
2409                 } else {
2410                         struct thread_qos_override *override_new;
2411
2412                         /* We need to allocate a new object. Drop the task lock and recheck afterwards in case someone else added the override */
2413                         thread_reference(thread);
2414                         has_thread_reference = TRUE;
2415                         task_unlock(task);
2416                         override_new = zalloc(thread_qos_override_zone);
2417                         task_lock(task);
2418
2419                         override = _find_qos_override(thread, resource, resource_type);
2420                         if (override) {
2421                                 /* Someone else already allocated while the task lock was dropped */
2422                                 deferred_free_override = override_new;
2423                                 override->override_contended_resource_count++;
2424                         } else {
2425                                 override = override_new;
2426                                 override->override_next = thread->overrides;
2427                                 override->override_contended_resource_count = 1 /* since first_override_for_resource was TRUE */;
2428                                 override->override_resource = resource;
2429                                 override->override_resource_type = resource_type;
2430                                 override->override_qos = THREAD_QOS_UNSPECIFIED;
2431                                 thread->overrides = override;
2432                         }
2433                 }
2434         } else {
2435                 override = _find_qos_override(thread, resource, resource_type);
2436         }
2437
2438         if (override) {
2439                 if (override->override_qos == THREAD_QOS_UNSPECIFIED)
2440                         override->override_qos = override_qos;
2441                 else
2442                         override->override_qos = MAX(override->override_qos, override_qos);
2443         }
2444
2445         /* Determine how to combine the various overrides into a single current requested override */
2446         prev_qos_override = requested.thrp_qos_override;
2447         new_qos_override = _calculate_requested_qos_override(thread);
2448
2449         if (new_qos_override != prev_qos_override) {
2450                 requested.thrp_qos_override = new_qos_override;
2451
2452                 thread->requested_policy = requested;
2453
2454                 task_policy_update_locked(task, thread, &pend_token);
2455
2456                 if (!has_thread_reference) {
2457                         thread_reference(thread);
2458                 }
2459
2460                 task_unlock(task);
2461
2462                 task_policy_update_complete_unlocked(task, thread, &pend_token);
2463
2464                 new_effective_qos = thread->effective_policy.thep_qos;
2465
2466                 thread_deallocate(thread);
2467         } else {
2468                 new_effective_qos = thread->effective_policy.thep_qos;
2469
2470                 task_unlock(task);
2471
2472                 if (has_thread_reference) {
2473                         thread_deallocate(thread);
2474                 }
2475         }
2476
2477         if (deferred_free_override) {
2478                 zfree(thread_qos_override_zone, deferred_free_override);
2479         }
2480
2481         DTRACE_BOOST3(qos_add_override_post, int, prev_qos_override, int, new_qos_override,
2482                                   int, new_effective_qos);
2483
2484         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2485                                                   new_qos_override, resource, resource_type, 0, 0);
2486
2487         return TRUE;
2488 }
2489
2490
2491 static boolean_t _proc_thread_qos_remove_override_internal(task_t task, thread_t thread, uint64_t tid, user_addr_t resource, int resource_type, boolean_t reset)
2492 {
2493         thread_t        self = current_thread();
2494         struct task_pend_token pend_token = {};
2495
2496         /* XXX move to thread mutex when thread policy does */
2497         task_lock(task);
2498
2499         /*
2500          * If thread is passed, it is assumed to be most accurate, since the caller must have an explicit (or implicit) reference
2501          * to the thread
2502          */
2503         if (thread != THREAD_NULL) {
2504                 assert(task == thread->task);
2505         } else {
2506                 if (tid == self->thread_id) {
2507                         thread = self;
2508                 } else {
2509                         thread = task_findtid(task, tid);
2510
2511                         if (thread == THREAD_NULL) {
2512                                 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2513                                                                           tid, 0, 0xdead, 0, 0);
2514                                 task_unlock(task);
2515                                 return FALSE;
2516                         }
2517                 }
2518         }
2519
2520         struct task_requested_policy requested = thread->requested_policy;
2521         struct thread_qos_override *deferred_free_override_list = NULL;
2522         int new_qos_override, prev_qos_override;
2523
2524         _canonicalize_resource_and_type(&resource, &resource_type);
2525
2526         _find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2527
2528         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2529                                                   thread_tid(thread), resource, reset, 0, 0);
2530
2531         /* Determine how to combine the various overrides into a single current requested override */
2532         prev_qos_override = requested.thrp_qos_override;
2533         new_qos_override = _calculate_requested_qos_override(thread);
2534
2535         if (new_qos_override != prev_qos_override) {
2536                 requested.thrp_qos_override = new_qos_override;
2537
2538                 thread->requested_policy = requested;
2539
2540                 task_policy_update_locked(task, thread, &pend_token);
2541
2542                 thread_reference(thread);
2543
2544                 task_unlock(task);
2545
2546                 task_policy_update_complete_unlocked(task, thread, &pend_token);
2547
2548                 thread_deallocate(thread);
2549         } else {
2550                 task_unlock(task);
2551         }
2552
2553         while (deferred_free_override_list) {
2554                 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2555
2556                 zfree(thread_qos_override_zone, deferred_free_override_list);
2557                 deferred_free_override_list = override_next;
2558         }
2559
2560         KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2561                                                   0, 0, 0, 0, 0);
2562
2563         return TRUE;
2564 }
2565
2566 boolean_t proc_thread_qos_remove_override(task_t task, thread_t thread, uint64_t tid, user_addr_t resource, int resource_type)
2567 {
2568         return _proc_thread_qos_remove_override_internal(task, thread, tid, resource, resource_type, FALSE);
2569
2570 }
2571
2572 boolean_t proc_thread_qos_reset_override(task_t task, thread_t thread, uint64_t tid, user_addr_t resource, int resource_type)
2573 {
2574         return _proc_thread_qos_remove_override_internal(task, thread, tid, resource, resource_type, TRUE);
2575 }
2576
2577 /* Deallocate before thread termination */
2578 void proc_thread_qos_deallocate(thread_t thread)
2579 {
2580         task_t task = thread->task;
2581         struct thread_qos_override *override;
2582
2583         /* XXX move to thread mutex when thread policy does */
2584         task_lock(task);
2585         override = thread->overrides;
2586         thread->overrides = NULL;               /* task policy re-evaluation needed? */
2587         thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2588         task_unlock(task);
2589
2590         while (override) {
2591                 struct thread_qos_override *override_next = override->override_next;
2592
2593                 zfree(thread_qos_override_zone, override);
2594                 override = override_next;
2595         }
2596 }
2597
2598 /* TODO: remove this variable when interactive daemon audit period is over */
2599 extern boolean_t ipc_importance_interactive_receiver;
2600
2601 /*
2602  * Called at process exec to initialize the apptype, qos clamp, and qos seed of a process
2603  *
2604  * TODO: Make this function more table-driven instead of ad-hoc
2605  */
2606 void
2607 proc_set_task_spawnpolicy(task_t task, int apptype, int qos_clamp, int role,
2608                           ipc_port_t * portwatch_ports, int portwatch_count)
2609 {
2610         struct task_pend_token pend_token = {};
2611
2612         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2613                                   (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_START,
2614                                   task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
2615                                   apptype, 0);
2616
2617         switch (apptype) {
2618                 case TASK_APPTYPE_APP_TAL:
2619                 case TASK_APPTYPE_APP_DEFAULT:
2620                         /* Apps become donors via the 'live-donor' flag instead of the static donor flag */
2621                         task_importance_mark_donor(task, FALSE);
2622                         task_importance_mark_live_donor(task, TRUE);
2623                         task_importance_mark_receiver(task, FALSE);
2624                         /* Apps are de-nap recievers on desktop for suppression behaviors */
2625                         task_importance_mark_denap_receiver(task, TRUE);
2626                         break;
2627
2628                 case TASK_APPTYPE_DAEMON_INTERACTIVE:
2629                         task_importance_mark_donor(task, TRUE);
2630                         task_importance_mark_live_donor(task, FALSE);
2631
2632                         /*
2633                          * A boot arg controls whether interactive daemons are importance receivers.
2634                          * Normally, they are not.  But for testing their behavior as an adaptive
2635                          * daemon, the boot-arg can be set.
2636                          *
2637                          * TODO: remove this when the interactive daemon audit period is over.
2638                          */
2639                         task_importance_mark_receiver(task, /* FALSE */ ipc_importance_interactive_receiver);
2640                         task_importance_mark_denap_receiver(task, FALSE);
2641                         break;
2642
2643                 case TASK_APPTYPE_DAEMON_STANDARD:
2644                         task_importance_mark_donor(task, TRUE);
2645                         task_importance_mark_live_donor(task, FALSE);
2646                         task_importance_mark_receiver(task, FALSE);
2647                         task_importance_mark_denap_receiver(task, FALSE);
2648                         break;
2649
2650                 case TASK_APPTYPE_DAEMON_ADAPTIVE:
2651                         task_importance_mark_donor(task, FALSE);
2652                         task_importance_mark_live_donor(task, FALSE);
2653                         task_importance_mark_receiver(task, TRUE);
2654                         task_importance_mark_denap_receiver(task, FALSE);
2655                         break;
2656
2657                 case TASK_APPTYPE_DAEMON_BACKGROUND:
2658                         task_importance_mark_donor(task, FALSE);
2659                         task_importance_mark_live_donor(task, FALSE);
2660                         task_importance_mark_receiver(task, FALSE);
2661                         task_importance_mark_denap_receiver(task, FALSE);
2662                         break;
2663
2664                 case TASK_APPTYPE_NONE:
2665                         break;
2666         }
2667
2668         if (portwatch_ports != NULL && apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
2669                 int portwatch_boosts = 0;
2670
2671                 for (int i = 0; i < portwatch_count; i++) {
2672                         ipc_port_t port = NULL;
2673
2674                         if ((port = portwatch_ports[i]) != NULL) {
2675                                 int boost = 0;
2676                                 task_add_importance_watchport(task, port, &boost);
2677                                 portwatch_boosts += boost;
2678                         }
2679                 }
2680
2681                 if (portwatch_boosts > 0) {
2682                         task_importance_hold_internal_assertion(task, portwatch_boosts);
2683                 }
2684         }
2685
2686         task_lock(task);
2687
2688         if (apptype == TASK_APPTYPE_APP_TAL) {
2689                 /* TAL starts off enabled by default */
2690                 task->requested_policy.t_tal_enabled = 1;
2691         }
2692
2693         if (apptype != TASK_APPTYPE_NONE) {
2694                 task->requested_policy.t_apptype = apptype;
2695         }
2696
2697         if (role != TASK_UNSPECIFIED) {
2698                 task->requested_policy.t_role = role;
2699         }
2700
2701         if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
2702                 task->requested_policy.t_qos_clamp = qos_clamp;
2703         }
2704
2705         task_policy_update_locked(task, THREAD_NULL, &pend_token);
2706
2707         task_unlock(task);
2708
2709         /* Ensure the donor bit is updated to be in sync with the new live donor status */
2710         pend_token.tpt_update_live_donor = 1;
2711
2712         task_policy_update_complete_unlocked(task, THREAD_NULL, &pend_token);
2713
2714         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2715                                   (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_END,
2716                                   task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
2717                                   task_is_importance_receiver(task), 0);
2718 }
2719
2720 extern task_t bsd_init_task;
2721
2722 /* Set up the primordial thread's QoS */
2723 void
2724 task_set_main_thread_qos(task_t task, thread_t main_thread) {
2725         struct task_pend_token pend_token = {};
2726
2727         assert(main_thread->task == task);
2728
2729         task_lock(task);
2730
2731         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2732                                   (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2733                                   task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
2734                                   main_thread->requested_policy.thrp_qos, 0);
2735
2736         int primordial_qos = THREAD_QOS_UNSPECIFIED;
2737
2738         int qos_clamp = task->requested_policy.t_qos_clamp;
2739
2740         if (task == bsd_init_task) {
2741                 /* PID 1 gets a special case */
2742                 primordial_qos = THREAD_QOS_USER_INITIATED;
2743         }
2744
2745         switch (task->requested_policy.t_apptype) {
2746                 case TASK_APPTYPE_APP_TAL:
2747                 case TASK_APPTYPE_APP_DEFAULT:
2748                         primordial_qos = THREAD_QOS_USER_INTERACTIVE;
2749                         break;
2750
2751                 case TASK_APPTYPE_DAEMON_INTERACTIVE:
2752                 case TASK_APPTYPE_DAEMON_STANDARD:
2753                 case TASK_APPTYPE_DAEMON_ADAPTIVE:
2754                         primordial_qos = THREAD_QOS_LEGACY;
2755                         break;
2756
2757                 case TASK_APPTYPE_DAEMON_BACKGROUND:
2758                         primordial_qos = THREAD_QOS_BACKGROUND;
2759                         break;
2760         }
2761
2762         if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
2763                 if (primordial_qos != THREAD_QOS_UNSPECIFIED) {
2764                         primordial_qos = MIN(qos_clamp, primordial_qos);
2765                 } else {
2766                         primordial_qos = qos_clamp;
2767                 }
2768         }
2769
2770         main_thread->requested_policy.thrp_qos = primordial_qos;
2771
2772         task_policy_update_locked(task, main_thread, &pend_token);
2773
2774         task_unlock(task);
2775
2776         task_policy_update_complete_unlocked(task, main_thread, &pend_token);
2777
2778         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2779                                   (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2780                                   task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
2781                                   primordial_qos, 0);
2782 }
2783
2784 /* for process_policy to check before attempting to set */
2785 boolean_t
2786 proc_task_is_tal(task_t task)
2787 {
2788         return (task->requested_policy.t_apptype == TASK_APPTYPE_APP_TAL) ? TRUE : FALSE;
2789 }
2790
2791 int
2792 task_get_apptype(task_t task)
2793 {
2794         return task->requested_policy.t_apptype;
2795 }
2796
2797 /* for telemetry */
2798 integer_t
2799 task_grab_latency_qos(task_t task)
2800 {
2801         return qos_latency_policy_package(proc_get_effective_task_policy(task, TASK_POLICY_LATENCY_QOS));
2802 }
2803
2804 /* update the darwin background action state in the flags field for libproc */
2805 int
2806 proc_get_darwinbgstate(task_t task, uint32_t * flagsp)
2807 {
2808         if (task->requested_policy.ext_darwinbg)
2809                 *flagsp |= PROC_FLAG_EXT_DARWINBG;
2810
2811         if (task->requested_policy.int_darwinbg)
2812                 *flagsp |= PROC_FLAG_DARWINBG;
2813
2814
2815         if (task->requested_policy.t_apptype == TASK_APPTYPE_APP_DEFAULT ||
2816             task->requested_policy.t_apptype == TASK_APPTYPE_APP_TAL)
2817                 *flagsp |= PROC_FLAG_APPLICATION;
2818
2819         if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE)
2820                 *flagsp |= PROC_FLAG_ADAPTIVE;
2821
2822         if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE && task->requested_policy.t_boosted == 1)
2823                 *flagsp |= PROC_FLAG_ADAPTIVE_IMPORTANT;
2824
2825         if (task_is_importance_donor(task))
2826                 *flagsp |= PROC_FLAG_IMPORTANCE_DONOR;
2827
2828         if (task->effective_policy.t_sup_active)
2829                 *flagsp |= PROC_FLAG_SUPPRESSED;
2830
2831         return(0);
2832 }
2833
2834 /* All per-thread state is in the first 32-bits of the bitfield */
2835 void
2836 proc_get_thread_policy(thread_t thread, thread_policy_state_t info)
2837 {
2838         task_t task = thread->task;
2839         task_lock(task);
2840         info->requested = (integer_t)task_requested_bitfield(task, thread);
2841         info->effective = (integer_t)task_effective_bitfield(task, thread);
2842         info->pending   = 0;
2843         task_unlock(task);
2844 }
2845
2846 /*
2847  * Tracepoint data... Reading the tracepoint data can be somewhat complicated.
2848  * The current scheme packs as much data into a single tracepoint as it can.
2849  *
2850  * Each task/thread requested/effective structure is 64 bits in size. Any
2851  * given tracepoint will emit either requested or effective data, but not both.
2852  *
2853  * A tracepoint may emit any of task, thread, or task & thread data.
2854  *
2855  * The type of data emitted varies with pointer size. Where possible, both
2856  * task and thread data are emitted. In LP32 systems, the first and second
2857  * halves of either the task or thread data is emitted.
2858  *
2859  * The code uses uintptr_t array indexes instead of high/low to avoid
2860  * confusion WRT big vs little endian.
2861  *
2862  * The truth table for the tracepoint data functions is below, and has the
2863  * following invariants:
2864  *
2865  * 1) task and thread are uintptr_t*
2866  * 2) task may never be NULL
2867  *
2868  *
2869  *                                     LP32            LP64
2870  * trequested_0(task, NULL)            task[0]         task[0]
2871  * trequested_1(task, NULL)            task[1]         NULL
2872  * trequested_0(task, thread)          thread[0]       task[0]
2873  * trequested_1(task, thread)          thread[1]       thread[0]
2874  *
2875  * Basically, you get a full task or thread on LP32, and both on LP64.
2876  *
2877  * The uintptr_t munging here is squicky enough to deserve a comment.
2878  *
2879  * The variables we are accessing are laid out in memory like this:
2880  *
2881  * [            LP64 uintptr_t  0          ]
2882  * [ LP32 uintptr_t 0 ] [ LP32 uintptr_t 1 ]
2883  *
2884  *      1   2   3   4     5   6   7   8
2885  *
2886  */
2887
2888 static uintptr_t
2889 trequested_0(task_t task, thread_t thread)
2890 {
2891         assert(task);
2892         _Static_assert(sizeof(struct task_requested_policy) == sizeof(uint64_t), "size invariant violated");
2893         _Static_assert(sizeof(task->requested_policy) == sizeof(thread->requested_policy), "size invariant violated");
2894
2895         uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->requested_policy : &thread->requested_policy);
2896         return raw[0];
2897 }
2898
2899 static uintptr_t
2900 trequested_1(task_t task, thread_t thread)
2901 {
2902         assert(task);
2903         _Static_assert(sizeof(struct task_requested_policy) == sizeof(uint64_t), "size invariant violated");
2904         _Static_assert(sizeof(task->requested_policy) == sizeof(thread->requested_policy), "size invariant violated");
2905
2906 #if defined __LP64__
2907         return (thread == NULL) ? 0 : *(uintptr_t*)&thread->requested_policy;
2908 #else
2909         uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->requested_policy : &thread->requested_policy);
2910         return raw[1];
2911 #endif
2912 }
2913
2914 static uintptr_t
2915 teffective_0(task_t task, thread_t thread)
2916 {
2917         assert(task);
2918         _Static_assert(sizeof(struct task_effective_policy) == sizeof(uint64_t), "size invariant violated");
2919         _Static_assert(sizeof(task->effective_policy) == sizeof(thread->effective_policy), "size invariant violated");
2920
2921         uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->effective_policy : &thread->effective_policy);
2922         return raw[0];
2923 }
2924
2925 static uintptr_t
2926 teffective_1(task_t task, thread_t thread)
2927 {
2928         assert(task);
2929         _Static_assert(sizeof(struct task_effective_policy) == sizeof(uint64_t), "size invariant violated");
2930         _Static_assert(sizeof(task->effective_policy) == sizeof(thread->effective_policy), "size invariant violated");
2931
2932 #if defined __LP64__
2933         return (thread == NULL) ? 0 : *(uintptr_t*)&thread->effective_policy;
2934 #else
2935         uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->effective_policy : &thread->effective_policy);
2936         return raw[1];
2937 #endif
2938 }
2939
2940 /* dump pending for tracepoint */
2941 static uint32_t tpending(task_pend_token_t pend_token) { return *(uint32_t*)(void*)(pend_token); }
2942
2943 uint64_t
2944 task_requested_bitfield(task_t task, thread_t thread)
2945 {
2946         uint64_t bits = 0;
2947         struct task_requested_policy requested =
2948                 (thread == THREAD_NULL) ? task->requested_policy : thread->requested_policy;
2949
2950         bits |= (requested.int_darwinbg         ? POLICY_REQ_INT_DARWIN_BG  : 0);
2951         bits |= (requested.ext_darwinbg         ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2952         bits |= (requested.int_iotier           ? (((uint64_t)requested.int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2953         bits |= (requested.ext_iotier           ? (((uint64_t)requested.ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2954         bits |= (requested.int_iopassive        ? POLICY_REQ_INT_PASSIVE_IO : 0);
2955         bits |= (requested.ext_iopassive        ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2956         bits |= (requested.bg_iotier            ? (((uint64_t)requested.bg_iotier)  << POLICY_REQ_BG_IOTIER_SHIFT)   : 0);
2957         bits |= (requested.terminated           ? POLICY_REQ_TERMINATED     : 0);
2958
2959         bits |= (requested.th_pidbind_bg        ? POLICY_REQ_PIDBIND_BG     : 0);
2960         bits |= (requested.th_workq_bg          ? POLICY_REQ_WORKQ_BG       : 0);
2961
2962         if (thread != THREAD_NULL) {
2963                 bits |= (requested.thrp_qos     ? (((uint64_t)requested.thrp_qos)   << POLICY_REQ_TH_QOS_SHIFT)  : 0);
2964                 bits |= (requested.thrp_qos_override     ? (((uint64_t)requested.thrp_qos_override)   << POLICY_REQ_TH_QOS_OVER_SHIFT)  : 0);
2965         }
2966
2967         bits |= (requested.t_boosted            ? POLICY_REQ_BOOSTED        : 0);
2968         bits |= (requested.t_tal_enabled        ? POLICY_REQ_TAL_ENABLED    : 0);
2969         bits |= (requested.t_apptype            ? (((uint64_t)requested.t_apptype)    << POLICY_REQ_APPTYPE_SHIFT)  : 0);
2970         bits |= (requested.t_role               ? (((uint64_t)requested.t_role)       << POLICY_REQ_ROLE_SHIFT)     : 0);
2971
2972         bits |= (requested.t_sup_active         ? POLICY_REQ_SUP_ACTIVE         : 0);
2973         bits |= (requested.t_sup_lowpri_cpu     ? POLICY_REQ_SUP_LOWPRI_CPU     : 0);
2974         bits |= (requested.t_sup_cpu            ? POLICY_REQ_SUP_CPU            : 0);
2975         bits |= (requested.t_sup_timer          ? (((uint64_t)requested.t_sup_timer)  << POLICY_REQ_SUP_TIMER_THROTTLE_SHIFT) : 0);
2976         bits |= (requested.t_sup_throughput     ? (((uint64_t)requested.t_sup_throughput)   << POLICY_REQ_SUP_THROUGHPUT_SHIFT)   : 0);
2977         bits |= (requested.t_sup_disk           ? POLICY_REQ_SUP_DISK_THROTTLE  : 0);
2978         bits |= (requested.t_sup_cpu_limit      ? POLICY_REQ_SUP_CPU_LIMIT      : 0);
2979         bits |= (requested.t_sup_suspend        ? POLICY_REQ_SUP_SUSPEND        : 0);
2980         bits |= (requested.t_sup_bg_sockets     ? POLICY_REQ_SUP_BG_SOCKETS     : 0);
2981         bits |= (requested.t_base_latency_qos   ? (((uint64_t)requested.t_base_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2982         bits |= (requested.t_over_latency_qos   ? (((uint64_t)requested.t_over_latency_qos) << POLICY_REQ_OVER_LATENCY_QOS_SHIFT) : 0);
2983         bits |= (requested.t_base_through_qos   ? (((uint64_t)requested.t_base_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2984         bits |= (requested.t_over_through_qos   ? (((uint64_t)requested.t_over_through_qos) << POLICY_REQ_OVER_THROUGH_QOS_SHIFT) : 0);
2985         bits |= (requested.t_sfi_managed        ? POLICY_REQ_SFI_MANAGED        : 0);
2986         bits |= (requested.t_qos_clamp          ? (((uint64_t)requested.t_qos_clamp)        << POLICY_REQ_QOS_CLAMP_SHIFT)        : 0);
2987
2988         return bits;
2989 }
2990
2991 uint64_t
2992 task_effective_bitfield(task_t task, thread_t thread)
2993 {
2994         uint64_t bits = 0;
2995         struct task_effective_policy effective =
2996                 (thread == THREAD_NULL) ? task->effective_policy : thread->effective_policy;
2997
2998         bits |= (effective.io_tier              ? (((uint64_t)effective.io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2999         bits |= (effective.io_passive           ? POLICY_EFF_IO_PASSIVE     : 0);
3000         bits |= (effective.darwinbg             ? POLICY_EFF_DARWIN_BG      : 0);
3001         bits |= (effective.lowpri_cpu           ? POLICY_EFF_LOWPRI_CPU     : 0);
3002         bits |= (effective.terminated           ? POLICY_EFF_TERMINATED     : 0);
3003         bits |= (effective.all_sockets_bg       ? POLICY_EFF_ALL_SOCKETS_BG : 0);
3004         bits |= (effective.new_sockets_bg       ? POLICY_EFF_NEW_SOCKETS_BG : 0);
3005         bits |= (effective.bg_iotier            ? (((uint64_t)effective.bg_iotier) << POLICY_EFF_BG_IOTIER_SHIFT) : 0);
3006         bits |= (effective.qos_ui_is_urgent     ? POLICY_EFF_QOS_UI_IS_URGENT : 0);
3007
3008         if (thread != THREAD_NULL)
3009                 bits |= (effective.thep_qos     ? (((uint64_t)effective.thep_qos)   << POLICY_EFF_TH_QOS_SHIFT)  : 0);
3010
3011         bits |= (effective.t_tal_engaged        ? POLICY_EFF_TAL_ENGAGED    : 0);
3012         bits |= (effective.t_suspended          ? POLICY_EFF_SUSPENDED      : 0);
3013         bits |= (effective.t_watchers_bg        ? POLICY_EFF_WATCHERS_BG    : 0);
3014         bits |= (effective.t_sup_active         ? POLICY_EFF_SUP_ACTIVE     : 0);
3015         bits |= (effective.t_suppressed_cpu     ? POLICY_EFF_SUP_CPU        : 0);
3016         bits |= (effective.t_role               ? (((uint64_t)effective.t_role)        << POLICY_EFF_ROLE_SHIFT)        : 0);
3017         bits |= (effective.t_latency_qos        ? (((uint64_t)effective.t_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
3018         bits |= (effective.t_through_qos        ? (((uint64_t)effective.t_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
3019         bits |= (effective.t_sfi_managed        ? POLICY_EFF_SFI_MANAGED    : 0);
3020         bits |= (effective.t_qos_ceiling        ? (((uint64_t)effective.t_qos_ceiling) << POLICY_EFF_QOS_CEILING_SHIFT) : 0);
3021
3022         return bits;
3023 }
3024
3025
3026 /*
3027  * Resource usage and CPU related routines
3028  */
3029
3030 int
3031 proc_get_task_ruse_cpu(task_t task, uint32_t *policyp, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep)
3032 {
3033
3034         int error = 0;
3035         int scope;
3036
3037         task_lock(task);
3038
3039
3040         error = task_get_cpuusage(task, percentagep, intervalp, deadlinep, &scope);
3041         task_unlock(task);
3042
3043         /*
3044          * Reverse-map from CPU resource limit scopes back to policies (see comment below).
3045          */
3046         if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
3047                 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC;
3048         } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
3049                 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE;
3050         } else if (scope == TASK_RUSECPU_FLAGS_DEADLINE) {
3051                 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
3052         }
3053
3054         return(error);
3055 }
3056
3057 /*
3058  * Configure the default CPU usage monitor parameters.
3059  *
3060  * For tasks which have this mechanism activated: if any thread in the
3061  * process consumes more CPU than this, an EXC_RESOURCE exception will be generated.
3062  */
3063 void
3064 proc_init_cpumon_params(void)
3065 {
3066         /*
3067          * The max CPU percentage can be configured via the boot-args and
3068          * a key in the device tree. The boot-args are honored first, then the
3069          * device tree.
3070          */
3071         if (!PE_parse_boot_argn("max_cpumon_percentage", &proc_max_cpumon_percentage,
3072                 sizeof (proc_max_cpumon_percentage)))
3073         {
3074                 uint64_t max_percentage = 0ULL;
3075
3076                 if (!PE_get_default("kern.max_cpumon_percentage", &max_percentage,
3077                         sizeof(max_percentage)))
3078                 {
3079                         max_percentage = DEFAULT_CPUMON_PERCENTAGE;
3080                 }
3081
3082                 assert(max_percentage <= UINT8_MAX);
3083                 proc_max_cpumon_percentage = (uint8_t) max_percentage;
3084         }
3085
3086         if (proc_max_cpumon_percentage > 100) {
3087                 proc_max_cpumon_percentage = 100;
3088         }
3089
3090         /*
3091          * The interval should be specified in seconds.
3092          *
3093          * Like the max CPU percentage, the max CPU interval can be configured
3094          * via boot-args and the device tree.
3095          */
3096         if (!PE_parse_boot_argn("max_cpumon_interval", &proc_max_cpumon_interval,
3097                 sizeof (proc_max_cpumon_interval)))
3098         {
3099                 if (!PE_get_default("kern.max_cpumon_interval", &proc_max_cpumon_interval,
3100                         sizeof(proc_max_cpumon_interval)))
3101                 {
3102                         proc_max_cpumon_interval = DEFAULT_CPUMON_INTERVAL;
3103                 }
3104         }
3105
3106         proc_max_cpumon_interval *= NSEC_PER_SEC;
3107
3108         /* TEMPORARY boot arg to control App suppression */
3109         PE_parse_boot_argn("task_policy_suppression_disable",
3110                            &task_policy_suppression_disable,
3111                            sizeof(task_policy_suppression_disable));
3112 }
3113
3114 /*
3115  * Currently supported configurations for CPU limits.
3116  *
3117  * Policy                               | Deadline-based CPU limit | Percentage-based CPU limit
3118  * -------------------------------------+--------------------------+------------------------------
3119  * PROC_POLICY_RSRCACT_THROTTLE         | ENOTSUP                  | Task-wide scope only
3120  * PROC_POLICY_RSRCACT_SUSPEND          | Task-wide scope only     | ENOTSUP
3121  * PROC_POLICY_RSRCACT_TERMINATE        | Task-wide scope only     | ENOTSUP
3122  * PROC_POLICY_RSRCACT_NOTIFY_KQ        | Task-wide scope only     | ENOTSUP
3123  * PROC_POLICY_RSRCACT_NOTIFY_EXC       | ENOTSUP                  | Per-thread scope only
3124  *
3125  * A deadline-based CPU limit is actually a simple wallclock timer - the requested action is performed
3126  * after the specified amount of wallclock time has elapsed.
3127  *
3128  * A percentage-based CPU limit performs the requested action after the specified amount of actual CPU time
3129  * has been consumed -- regardless of how much wallclock time has elapsed -- by either the task as an
3130  * aggregate entity (so-called "Task-wide" or "Proc-wide" scope, whereby the CPU time consumed by all threads
3131  * in the task are added together), or by any one thread in the task (so-called "per-thread" scope).
3132  *
3133  * We support either deadline != 0 OR percentage != 0, but not both. The original intention in having them
3134  * share an API was to use actual CPU time as the basis of the deadline-based limit (as in: perform an action
3135  * after I have used some amount of CPU time; this is different than the recurring percentage/interval model)
3136  * but the potential consumer of the API at the time was insisting on wallclock time instead.
3137  *
3138  * Currently, requesting notification via an exception is the only way to get per-thread scope for a
3139  * CPU limit. All other types of notifications force task-wide scope for the limit.
3140  */
3141 int
3142 proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint8_t percentage, uint64_t interval, uint64_t deadline,
3143         int cpumon_entitled)
3144 {
3145         int error = 0;
3146         int scope;
3147
3148         /*
3149          * Enforce the matrix of supported configurations for policy, percentage, and deadline.
3150          */
3151         switch (policy) {
3152         // If no policy is explicitly given, the default is to throttle.
3153         case TASK_POLICY_RESOURCE_ATTRIBUTE_NONE:
3154         case TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE:
3155                 if (deadline != 0)
3156                         return (ENOTSUP);
3157                 scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
3158                 break;
3159         case TASK_POLICY_RESOURCE_ATTRIBUTE_SUSPEND:
3160         case TASK_POLICY_RESOURCE_ATTRIBUTE_TERMINATE:
3161         case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_KQ:
3162                 if (percentage != 0)
3163                         return (ENOTSUP);
3164                 scope = TASK_RUSECPU_FLAGS_DEADLINE;
3165                 break;
3166         case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC:
3167                 if (deadline != 0)
3168                         return (ENOTSUP);
3169                 scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
3170 #ifdef CONFIG_NOMONITORS
3171                 return (error);
3172 #endif /* CONFIG_NOMONITORS */
3173                 break;
3174         default:
3175                 return (EINVAL);
3176         }
3177
3178         task_lock(task);
3179         if (task != current_task()) {
3180                 task->policy_ru_cpu_ext = policy;
3181         } else {
3182                 task->policy_ru_cpu = policy;
3183         }
3184         error = task_set_cpuusage(task, percentage, interval, deadline, scope, cpumon_entitled);
3185         task_unlock(task);
3186         return(error);
3187 }
3188
3189 int
3190 proc_clear_task_ruse_cpu(task_t task, int cpumon_entitled)
3191 {
3192         int error = 0;
3193         int action;
3194         void * bsdinfo = NULL;
3195
3196         task_lock(task);
3197         if (task != current_task()) {
3198                 task->policy_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;
3199         } else {
3200                 task->policy_ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;
3201         }
3202
3203         error = task_clear_cpuusage_locked(task, cpumon_entitled);
3204         if (error != 0)
3205                 goto out;
3206
3207         action = task->applied_ru_cpu;
3208         if (task->applied_ru_cpu_ext != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
3209                 /* reset action */
3210                 task->applied_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
3211         }
3212         if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
3213                 bsdinfo = task->bsd_info;
3214                 task_unlock(task);
3215                 proc_restore_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
3216                 goto out1;
3217         }
3218
3219 out:
3220         task_unlock(task);
3221 out1:
3222         return(error);
3223
3224 }
3225
3226 /* used to apply resource limit related actions */
3227 static int
3228 task_apply_resource_actions(task_t task, int type)
3229 {
3230         int action = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
3231         void * bsdinfo = NULL;
3232
3233         switch (type) {
3234                 case TASK_POLICY_CPU_RESOURCE_USAGE:
3235                         break;
3236                 case TASK_POLICY_WIREDMEM_RESOURCE_USAGE:
3237                 case TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE:
3238                 case TASK_POLICY_DISK_RESOURCE_USAGE:
3239                 case TASK_POLICY_NETWORK_RESOURCE_USAGE:
3240                 case TASK_POLICY_POWER_RESOURCE_USAGE:
3241                         return(0);
3242
3243                 default:
3244                         return(1);
3245         };
3246
3247         /* only cpu actions for now */
3248         task_lock(task);
3249
3250         if (task->applied_ru_cpu_ext == TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
3251                 /* apply action */
3252                 task->applied_ru_cpu_ext = task->policy_ru_cpu_ext;
3253                 action = task->applied_ru_cpu_ext;
3254         } else {
3255                 action = task->applied_ru_cpu_ext;
3256         }
3257
3258         if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
3259                 bsdinfo = task->bsd_info;
3260                 task_unlock(task);
3261                 proc_apply_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
3262         } else
3263                 task_unlock(task);
3264
3265         return(0);
3266 }
3267
3268 /*
3269  * XXX This API is somewhat broken; we support multiple simultaneous CPU limits, but the get/set API
3270  * only allows for one at a time. This means that if there is a per-thread limit active, the other
3271  * "scopes" will not be accessible via this API. We could change it to pass in the scope of interest
3272  * to the caller, and prefer that, but there's no need for that at the moment.
3273  */
3274 int
3275 task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope)
3276 {
3277         *percentagep = 0;
3278         *intervalp = 0;
3279         *deadlinep = 0;
3280
3281         if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) != 0) {
3282                 *scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
3283                 *percentagep = task->rusage_cpu_perthr_percentage;
3284                 *intervalp = task->rusage_cpu_perthr_interval;
3285         } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) != 0) {
3286                 *scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
3287                 *percentagep = task->rusage_cpu_percentage;
3288                 *intervalp = task->rusage_cpu_interval;
3289         } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) != 0) {
3290                 *scope = TASK_RUSECPU_FLAGS_DEADLINE;
3291                 *deadlinep = task->rusage_cpu_deadline;
3292         } else {
3293                 *scope = 0;
3294         }
3295
3296         return(0);
3297 }
3298
3299 /*
3300  * Disable the CPU usage monitor for the task. Return value indicates
3301  * if the mechanism was actually enabled.
3302  */
3303 int
3304 task_disable_cpumon(task_t task) {
3305         thread_t thread;
3306
3307         task_lock_assert_owned(task);
3308
3309         if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) {
3310                 return (KERN_INVALID_ARGUMENT);
3311         }
3312
3313 #if CONFIG_TELEMETRY
3314         /*
3315          * Disable task-wide telemetry if it was ever enabled by the CPU usage
3316          * monitor's warning zone.
3317          */
3318         telemetry_task_ctl_locked(task, TF_CPUMON_WARNING, 0);
3319 #endif
3320
3321         /*
3322          * Disable the monitor for the task, and propagate that change to each thread.
3323          */
3324         task->rusage_cpu_flags &= ~(TASK_RUSECPU_FLAGS_PERTHR_LIMIT | TASK_RUSECPU_FLAGS_FATAL_CPUMON);
3325         queue_iterate(&task->threads, thread, thread_t, task_threads) {
3326                 set_astledger(thread);
3327         }
3328         task->rusage_cpu_perthr_percentage = 0;
3329         task->rusage_cpu_perthr_interval = 0;
3330
3331         return (KERN_SUCCESS);
3332 }
3333
3334 int
3335 task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int cpumon_entitled)
3336 {
3337         thread_t thread;
3338         uint64_t abstime = 0;
3339         uint64_t limittime = 0;
3340
3341         lck_mtx_assert(&task->lock, LCK_MTX_ASSERT_OWNED);
3342
3343         /* By default, refill once per second */
3344         if (interval == 0)
3345                 interval = NSEC_PER_SEC;
3346
3347         if (percentage != 0) {
3348                 if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
3349                         boolean_t warn = FALSE;
3350
3351                         /*
3352                          * A per-thread CPU limit on a task generates an exception
3353                          * (LEDGER_ACTION_EXCEPTION) if any one thread in the task
3354                          * exceeds the limit.
3355                          */
3356
3357                         if (percentage == TASK_POLICY_CPUMON_DISABLE) {
3358                                 if (cpumon_entitled) {
3359                                         task_disable_cpumon(task);
3360                                         return (0);
3361                                 }
3362
3363                                 /*
3364                                  * This task wishes to disable the CPU usage monitor, but it's
3365                                  * missing the required entitlement:
3366                                  *     com.apple.private.kernel.override-cpumon
3367                                  *
3368                                  * Instead, treat this as a request to reset its params
3369                                  * back to the defaults.
3370                                  */
3371                                 warn = TRUE;
3372                                 percentage = TASK_POLICY_CPUMON_DEFAULTS;
3373                         }
3374
3375                         if (percentage == TASK_POLICY_CPUMON_DEFAULTS) {
3376                                 percentage = proc_max_cpumon_percentage;
3377                                 interval   = proc_max_cpumon_interval;
3378                         }
3379
3380                         if (percentage > 100) {
3381                                 percentage = 100;
3382                         }
3383
3384                         /*
3385                          * Passing in an interval of -1 means either:
3386                          * - Leave the interval as-is, if there's already a per-thread
3387                          *   limit configured
3388                          * - Use the system default.
3389                          */
3390                         if (interval == -1ULL) {
3391                                 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
3392                                         interval = task->rusage_cpu_perthr_interval;
3393                                 } else {
3394                                         interval = proc_max_cpumon_interval;
3395                                 }
3396                         }
3397
3398                         /*
3399                          * Enforce global caps on CPU usage monitor here if the process is not
3400                          * entitled to escape the global caps.
3401                          */
3402                          if ((percentage > proc_max_cpumon_percentage) && (cpumon_entitled == 0)) {
3403                                 warn = TRUE;
3404                                 percentage = proc_max_cpumon_percentage;
3405                          }
3406
3407                          if ((interval > proc_max_cpumon_interval) && (cpumon_entitled == 0)) {
3408                                 warn = TRUE;
3409                                 interval = proc_max_cpumon_interval;
3410                          }
3411
3412                         if (warn) {
3413                                 int       pid = 0;
3414                                 const char *procname = "unknown";
3415
3416 #ifdef MACH_BSD
3417                                 pid = proc_selfpid();
3418                                 if (current_task()->bsd_info != NULL) {
3419                                         procname = proc_name_address(current_task()->bsd_info);
3420                                 }
3421 #endif
3422
3423                                 printf("process %s[%d] denied attempt to escape CPU monitor"
3424                                         " (missing required entitlement).\n", procname, pid);
3425                         }
3426
3427                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
3428                         task->rusage_cpu_perthr_percentage = percentage;
3429                         task->rusage_cpu_perthr_interval = interval;
3430                         queue_iterate(&task->threads, thread, thread_t, task_threads) {
3431                                 set_astledger(thread);
3432                         }
3433                 } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
3434                         /*
3435                          * Currently, a proc-wide CPU limit always blocks if the limit is
3436                          * exceeded (LEDGER_ACTION_BLOCK).
3437                          */
3438                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PROC_LIMIT;
3439                         task->rusage_cpu_percentage = percentage;
3440                         task->rusage_cpu_interval = interval;
3441
3442                         limittime = (interval * percentage) / 100;
3443                         nanoseconds_to_absolutetime(limittime, &abstime);
3444
3445                         ledger_set_limit(task->ledger, task_ledgers.cpu_time, abstime, 0);
3446                         ledger_set_period(task->ledger, task_ledgers.cpu_time, interval);
3447                         ledger_set_action(task->ledger, task_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
3448                 }
3449         }
3450
3451         if (deadline != 0) {
3452                 assert(scope == TASK_RUSECPU_FLAGS_DEADLINE);
3453
3454                 /* if already in use, cancel and wait for it to cleanout */
3455                 if (task->rusage_cpu_callt != NULL) {
3456                         task_unlock(task);
3457                         thread_call_cancel_wait(task->rusage_cpu_callt);
3458                         task_lock(task);
3459                 }
3460                 if (task->rusage_cpu_callt == NULL) {
3461                         task->rusage_cpu_callt = thread_call_allocate_with_priority(task_action_cpuusage, (thread_call_param_t)task, THREAD_CALL_PRIORITY_KERNEL);
3462                 }
3463                 /* setup callout */
3464                 if (task->rusage_cpu_callt != 0) {
3465                         uint64_t save_abstime = 0;
3466
3467                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_DEADLINE;
3468                         task->rusage_cpu_deadline = deadline;
3469
3470                         nanoseconds_to_absolutetime(deadline, &abstime);
3471                         save_abstime = abstime;
3472                         clock_absolutetime_interval_to_deadline(save_abstime, &abstime);
3473                         thread_call_enter_delayed(task->rusage_cpu_callt, abstime);
3474                 }
3475         }
3476
3477         return(0);
3478 }
3479
3480 int
3481 task_clear_cpuusage(task_t task, int cpumon_entitled)
3482 {
3483         int retval = 0;
3484
3485         task_lock(task);
3486         retval = task_clear_cpuusage_locked(task, cpumon_entitled);
3487         task_unlock(task);
3488
3489         return(retval);
3490 }
3491
3492 int
3493 task_clear_cpuusage_locked(task_t task, int cpumon_entitled)
3494 {
3495         thread_call_t savecallt;
3496
3497         /* cancel percentage handling if set */
3498         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) {
3499                 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PROC_LIMIT;
3500                 ledger_set_limit(task->ledger, task_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
3501                 task->rusage_cpu_percentage = 0;
3502                 task->rusage_cpu_interval = 0;
3503         }
3504
3505         /*
3506          * Disable the CPU usage monitor.
3507          */
3508         if (cpumon_entitled) {
3509                 task_disable_cpumon(task);
3510         }
3511
3512         /* cancel deadline handling if set */
3513         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) {
3514                 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_DEADLINE;
3515                 if (task->rusage_cpu_callt != 0) {
3516                         savecallt = task->rusage_cpu_callt;
3517                         task->rusage_cpu_callt = NULL;
3518                         task->rusage_cpu_deadline = 0;
3519                         task_unlock(task);
3520                         thread_call_cancel_wait(savecallt);
3521                         thread_call_free(savecallt);
3522                         task_lock(task);
3523                 }
3524         }
3525         return(0);
3526 }
3527
3528 /* called by ledger unit to enforce action due to  resource usage criteria being met */
3529 void
3530 task_action_cpuusage(thread_call_param_t param0, __unused thread_call_param_t param1)
3531 {
3532         task_t task = (task_t)param0;
3533         (void)task_apply_resource_actions(task, TASK_POLICY_CPU_RESOURCE_USAGE);
3534         return;
3535 }
3536
3537
3538 /*
3539  * Routines for taskwatch and pidbind
3540  */
3541
3542
3543 /*
3544  * Routines for importance donation/inheritance/boosting
3545  */
3546
3547 static void
3548 task_importance_update_live_donor(task_t target_task)
3549 {
3550 #if IMPORTANCE_INHERITANCE
3551
3552         ipc_importance_task_t task_imp;
3553
3554         task_imp = ipc_importance_for_task(target_task, FALSE);
3555         if (IIT_NULL != task_imp) {
3556                 ipc_importance_task_update_live_donor(task_imp);
3557                 ipc_importance_task_release(task_imp);
3558         }
3559 #endif /* IMPORTANCE_INHERITANCE */
3560 }
3561
3562 void
3563 task_importance_mark_donor(task_t task, boolean_t donating)
3564 {
3565 #if IMPORTANCE_INHERITANCE
3566         ipc_importance_task_t task_imp;
3567
3568         task_imp = ipc_importance_for_task(task, FALSE);
3569         if (IIT_NULL != task_imp) {
3570                 ipc_importance_task_mark_donor(task_imp, donating);
3571                 ipc_importance_task_release(task_imp);
3572         }
3573 #endif /* IMPORTANCE_INHERITANCE */
3574 }
3575
3576 void
3577 task_importance_mark_live_donor(task_t task, boolean_t live_donating)
3578 {
3579 #if IMPORTANCE_INHERITANCE
3580         ipc_importance_task_t task_imp;
3581
3582         task_imp = ipc_importance_for_task(task, FALSE);
3583         if (IIT_NULL != task_imp) {
3584                 ipc_importance_task_mark_live_donor(task_imp, live_donating);
3585                 ipc_importance_task_release(task_imp);
3586         }
3587 #endif /* IMPORTANCE_INHERITANCE */
3588 }
3589
3590 void
3591 task_importance_mark_receiver(task_t task, boolean_t receiving)
3592 {
3593 #if IMPORTANCE_INHERITANCE
3594         ipc_importance_task_t task_imp;
3595
3596         task_imp = ipc_importance_for_task(task, FALSE);
3597         if (IIT_NULL != task_imp) {
3598                 ipc_importance_task_mark_receiver(task_imp, receiving);
3599                 ipc_importance_task_release(task_imp);
3600         }
3601 #endif /* IMPORTANCE_INHERITANCE */
3602 }
3603
3604 void
3605 task_importance_mark_denap_receiver(task_t task, boolean_t denap)
3606 {
3607 #if IMPORTANCE_INHERITANCE
3608         ipc_importance_task_t task_imp;
3609
3610         task_imp = ipc_importance_for_task(task, FALSE);
3611         if (IIT_NULL != task_imp) {
3612                 ipc_importance_task_mark_denap_receiver(task_imp, denap);
3613                 ipc_importance_task_release(task_imp);
3614         }
3615 #endif /* IMPORTANCE_INHERITANCE */
3616 }
3617
3618 void
3619 task_importance_reset(__imp_only task_t task)
3620 {
3621 #if IMPORTANCE_INHERITANCE
3622         ipc_importance_task_t task_imp;
3623
3624         /* TODO: Lower importance downstream before disconnect */
3625         task_imp = task->task_imp_base;
3626         ipc_importance_reset(task_imp, FALSE);
3627         task_importance_update_live_donor(task);
3628 #endif /* IMPORTANCE_INHERITANCE */
3629 }
3630
3631 #if IMPORTANCE_INHERITANCE
3632
3633 /*
3634  * Sets the task boost bit to the provided value.  Does NOT run the update function.
3635  *
3636  * Task lock must be held.
3637  */
3638 void
3639 task_set_boost_locked(task_t task, boolean_t boost_active)
3640 {
3641 #if IMPORTANCE_DEBUG
3642         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_START),
3643                                   proc_selfpid(), task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), 0);
3644 #endif
3645
3646         task->requested_policy.t_boosted = boost_active;
3647
3648 #if IMPORTANCE_DEBUG
3649         if (boost_active == TRUE){
3650                 DTRACE_BOOST2(boost, task_t, task, int, task_pid(task));
3651         } else {
3652                 DTRACE_BOOST2(unboost, task_t, task, int, task_pid(task));
3653         }
3654         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_END),
3655                                   proc_selfpid(), task_pid(task),
3656                                   trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), 0);
3657 #endif
3658 }
3659
3660 /*
3661  * Sets the task boost bit to the provided value and applies the update.
3662  *
3663  * Task lock must be held.  Must call update complete after unlocking the task.
3664  */
3665 void
3666 task_update_boost_locked(task_t task, boolean_t boost_active, task_pend_token_t pend_token)
3667 {
3668         task_set_boost_locked(task, boost_active);
3669
3670         task_policy_update_locked(task, THREAD_NULL, pend_token);
3671 }
3672
3673 /*
3674  * Check if this task should donate importance.
3675  *
3676  * May be called without taking the task lock. In that case, donor status can change
3677  * so you must check only once for each donation event.
3678  */
3679 boolean_t
3680 task_is_importance_donor(task_t task)
3681 {
3682         if (task->task_imp_base == IIT_NULL)
3683                 return FALSE;
3684         return ipc_importance_task_is_donor(task->task_imp_base);
3685 }
3686
3687 /*
3688  * Query the status of the task's donor mark.
3689  */
3690 boolean_t
3691 task_is_marked_importance_donor(task_t task)
3692 {
3693         if (task->task_imp_base == IIT_NULL)
3694                 return FALSE;
3695         return ipc_importance_task_is_marked_donor(task->task_imp_base);
3696 }
3697
3698 /*
3699  * Query the status of the task's live donor and donor mark.
3700  */
3701 boolean_t
3702 task_is_marked_live_importance_donor(task_t task)
3703 {
3704         if (task->task_imp_base == IIT_NULL)
3705                 return FALSE;
3706         return ipc_importance_task_is_marked_live_donor(task->task_imp_base);
3707 }
3708
3709
3710 /*
3711  * This routine may be called without holding task lock
3712  * since the value of imp_receiver can never be unset.
3713  */
3714 boolean_t
3715 task_is_importance_receiver(task_t task)
3716 {
3717         if (task->task_imp_base == IIT_NULL)
3718                 return FALSE;
3719         return ipc_importance_task_is_marked_receiver(task->task_imp_base);
3720 }
3721
3722 /*
3723  * Query the task's receiver mark.
3724  */
3725 boolean_t
3726 task_is_marked_importance_receiver(task_t task)
3727 {
3728         if (task->task_imp_base == IIT_NULL)
3729                 return FALSE;
3730         return ipc_importance_task_is_marked_receiver(task->task_imp_base);
3731 }
3732
3733 /*
3734  * This routine may be called without holding task lock
3735  * since the value of de-nap receiver can never be unset.
3736  */
3737 boolean_t
3738 task_is_importance_denap_receiver(task_t task)
3739 {
3740         if (task->task_imp_base == IIT_NULL)
3741                 return FALSE;
3742         return ipc_importance_task_is_denap_receiver(task->task_imp_base);
3743 }
3744
3745 /*
3746  * Query the task's de-nap receiver mark.
3747  */
3748 boolean_t
3749 task_is_marked_importance_denap_receiver(task_t task)
3750 {
3751         if (task->task_imp_base == IIT_NULL)
3752                 return FALSE;
3753         return ipc_importance_task_is_marked_denap_receiver(task->task_imp_base);
3754 }
3755
3756 /*
3757  * This routine may be called without holding task lock
3758  * since the value of imp_receiver can never be unset.
3759  */
3760 boolean_t
3761 task_is_importance_receiver_type(task_t task)
3762 {
3763         if (task->task_imp_base == IIT_NULL)
3764                 return FALSE;
3765         return (task_is_importance_receiver(task) ||
3766                 task_is_importance_denap_receiver(task));
3767 }
3768
3769 /*
3770  * External importance assertions are managed by the process in userspace
3771  * Internal importance assertions are the responsibility of the kernel
3772  * Assertions are changed from internal to external via task_importance_externalize_assertion
3773  */
3774
3775 int
3776 task_importance_hold_watchport_assertion(task_t target_task, uint32_t count)
3777 {
3778         ipc_importance_task_t task_imp;
3779         kern_return_t ret;
3780
3781         /* must already have set up an importance */
3782         task_imp = target_task->task_imp_base;
3783         assert(IIT_NULL != task_imp);
3784
3785         ret = ipc_importance_task_hold_internal_assertion(task_imp, count);
3786         return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3787 }
3788
3789 int
3790 task_importance_hold_internal_assertion(task_t target_task, uint32_t count)
3791 {
3792         ipc_importance_task_t task_imp;
3793         kern_return_t ret;
3794
3795         /* may be first time, so allow for possible importance setup */
3796         task_imp = ipc_importance_for_task(target_task, FALSE);
3797         if (IIT_NULL == task_imp) {
3798                 return EOVERFLOW;
3799         }
3800         ret = ipc_importance_task_hold_internal_assertion(task_imp, count);
3801         ipc_importance_task_release(task_imp);
3802
3803         return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3804 }
3805
3806 int
3807 task_importance_hold_file_lock_assertion(task_t target_task, uint32_t count)
3808 {
3809         ipc_importance_task_t task_imp;
3810         kern_return_t ret;
3811
3812         /* may be first time, so allow for possible importance setup */
3813         task_imp = ipc_importance_for_task(target_task, FALSE);
3814         if (IIT_NULL == task_imp) {
3815                 return EOVERFLOW;
3816         }
3817         ret = ipc_importance_task_hold_file_lock_assertion(task_imp, count);
3818         ipc_importance_task_release(task_imp);
3819
3820         return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3821 }
3822
3823 int
3824 task_importance_hold_legacy_external_assertion(task_t target_task, uint32_t count)
3825 {
3826         ipc_importance_task_t task_imp;
3827         kern_return_t ret;
3828
3829         /* must already have set up an importance */
3830         task_imp = target_task->task_imp_base;
3831         if (IIT_NULL == task_imp) {
3832                 return EOVERFLOW;
3833         }
3834         ret = ipc_importance_task_hold_legacy_external_assertion(task_imp, count);
3835         return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3836 }
3837
3838 int
3839 task_importance_drop_internal_assertion(task_t target_task, uint32_t count)
3840 {
3841         ipc_importance_task_t task_imp;
3842         kern_return_t ret;
3843
3844         /* must already have set up an importance */
3845         task_imp = target_task->task_imp_base;
3846         if (IIT_NULL == task_imp) {
3847                 return EOVERFLOW;
3848         }
3849         ret = ipc_importance_task_drop_internal_assertion(target_task->task_imp_base, count);
3850         return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3851 }
3852
3853 int
3854 task_importance_drop_file_lock_assertion(task_t target_task, uint32_t count)
3855 {
3856         ipc_importance_task_t task_imp;
3857         kern_return_t ret;
3858
3859         /* must already have set up an importance */
3860         task_imp = target_task->task_imp_base;
3861         if (IIT_NULL == task_imp) {
3862                 return EOVERFLOW;
3863         }
3864         ret = ipc_importance_task_drop_file_lock_assertion(target_task->task_imp_base, count);
3865         return (KERN_SUCCESS != ret) ? EOVERFLOW : 0;
3866 }
3867
3868 int
3869 task_importance_drop_legacy_external_assertion(task_t target_task, uint32_t count)
3870 {
3871         ipc_importance_task_t task_imp;
3872         kern_return_t ret;
3873
3874         /* must already have set up an importance */
3875         task_imp = target_task->task_imp_base;
3876         if (IIT_NULL == task_imp) {
3877                 return EOVERFLOW;
3878         }
3879         ret = ipc_importance_task_drop_legacy_external_assertion(task_imp, count);
3880         return (KERN_SUCCESS != ret) ? EOVERFLOW : 0;
3881 }
3882
3883 static void
3884 task_add_importance_watchport(task_t task, mach_port_t port, int *boostp)
3885 {
3886         int boost = 0;
3887
3888         __impdebug_only int released_pid = 0;
3889         __impdebug_only int pid = task_pid(task);
3890
3891         ipc_importance_task_t release_imp_task = IIT_NULL;
3892
3893         if (IP_VALID(port) != 0) {
3894                 ipc_importance_task_t new_imp_task = ipc_importance_for_task(task, FALSE);
3895
3896                 ip_lock(port);
3897
3898                 /*
3899                  * The port must have been marked tempowner already.
3900                  * This also filters out ports whose receive rights
3901                  * are already enqueued in a message, as you can't
3902                  * change the right's destination once it's already
3903                  * on its way.
3904                  */
3905                 if (port->ip_tempowner != 0) {
3906                         assert(port->ip_impdonation != 0);
3907
3908                         boost = port->ip_impcount;
3909                         if (IIT_NULL != port->ip_imp_task) {
3910                                 /*
3911                                  * if this port is already bound to a task,
3912                                  * release the task reference and drop any
3913                                  * watchport-forwarded boosts
3914                                  */
3915                                 release_imp_task = port->ip_imp_task;
3916                                 port->ip_imp_task = IIT_NULL;
3917                         }
3918
3919                         /* mark the port is watching another task (reference held in port->ip_imp_task) */
3920                         if (ipc_importance_task_is_marked_receiver(new_imp_task)) {
3921                                 port->ip_imp_task = new_imp_task;
3922                                 new_imp_task = IIT_NULL;
3923                         }
3924                 }
3925                 ip_unlock(port);
3926
3927                 if (IIT_NULL != new_imp_task) {
3928                         ipc_importance_task_release(new_imp_task);
3929                 }
3930
3931                 if (IIT_NULL != release_imp_task) {
3932                         if (boost > 0)
3933                                 ipc_importance_task_drop_internal_assertion(release_imp_task, boost);
3934
3935                         // released_pid = task_pid(release_imp_task); /* TODO: Need ref-safe way to get pid */
3936                         ipc_importance_task_release(release_imp_task);
3937                 }
3938 #if IMPORTANCE_DEBUG
3939                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_WATCHPORT, 0)) | DBG_FUNC_NONE,
3940                         proc_selfpid(), pid, boost, released_pid, 0);
3941 #endif /* IMPORTANCE_DEBUG */
3942         }
3943
3944         *boostp = boost;
3945         return;
3946 }
3947
3948 #endif /* IMPORTANCE_INHERITANCE */
3949
3950 /*
3951  * Routines for VM to query task importance
3952  */
3953
3954
3955 /*
3956  * Order to be considered while estimating importance
3957  * for low memory notification and purging purgeable memory.
3958  */
3959 #define TASK_IMPORTANCE_FOREGROUND     4
3960 #define TASK_IMPORTANCE_NOTDARWINBG    1
3961
3962
3963 /*
3964  * (Un)Mark the task as a privileged listener for memory notifications.
3965  * if marked, this task will be among the first to be notified amongst
3966  * the bulk of all other tasks when the system enters a pressure level
3967  * of interest to this task.
3968  */
3969 int
3970 task_low_mem_privileged_listener(task_t task, boolean_t new_value, boolean_t *old_value)
3971 {
3972         if (old_value != NULL) {
3973                 *old_value = (boolean_t)task->low_mem_privileged_listener;
3974         } else {
3975                 task_lock(task);
3976                 task->low_mem_privileged_listener = (uint32_t)new_value;
3977                 task_unlock(task);
3978         }
3979
3980         return 0;
3981 }
3982
3983 /*
3984  * Checks if the task is already notified.
3985  *
3986  * Condition: task lock should be held while calling this function.
3987  */
3988 boolean_t
3989 task_has_been_notified(task_t task, int pressurelevel)
3990 {
3991         if (task == NULL) {
3992                 return FALSE;
3993         }
3994
3995         if (pressurelevel == kVMPressureWarning)
3996                 return (task->low_mem_notified_warn ? TRUE : FALSE);
3997         else if (pressurelevel == kVMPressureCritical)
3998                 return (task->low_mem_notified_critical ? TRUE : FALSE);
3999         else
4000                 return TRUE;
4001 }
4002
4003
4004 /*
4005  * Checks if the task is used for purging.
4006  *
4007  * Condition: task lock should be held while calling this function.
4008  */
4009 boolean_t
4010 task_used_for_purging(task_t task, int pressurelevel)
4011 {
4012         if (task == NULL) {
4013                 return FALSE;
4014         }
4015
4016         if (pressurelevel == kVMPressureWarning)
4017                 return (task->purged_memory_warn ? TRUE : FALSE);
4018         else if (pressurelevel == kVMPressureCritical)
4019                 return (task->purged_memory_critical ? TRUE : FALSE);
4020         else
4021                 return TRUE;
4022 }
4023
4024
4025 /*
4026  * Mark the task as notified with memory notification.
4027  *
4028  * Condition: task lock should be held while calling this function.
4029  */
4030 void
4031 task_mark_has_been_notified(task_t task, int pressurelevel)
4032 {
4033         if (task == NULL) {
4034                 return;
4035         }
4036
4037         if (pressurelevel == kVMPressureWarning)
4038                 task->low_mem_notified_warn = 1;
4039         else if (pressurelevel == kVMPressureCritical)
4040                 task->low_mem_notified_critical = 1;
4041 }
4042
4043
4044 /*
4045  * Mark the task as purged.
4046  *
4047  * Condition: task lock should be held while calling this function.
4048  */
4049 void
4050 task_mark_used_for_purging(task_t task, int pressurelevel)
4051 {
4052         if (task == NULL) {
4053                 return;
4054         }
4055
4056         if (pressurelevel == kVMPressureWarning)
4057                 task->purged_memory_warn = 1;
4058         else if (pressurelevel == kVMPressureCritical)
4059                 task->purged_memory_critical = 1;
4060 }
4061
4062
4063 /*
4064  * Mark the task eligible for low memory notification.
4065  *
4066  * Condition: task lock should be held while calling this function.
4067  */
4068 void
4069 task_clear_has_been_notified(task_t task, int pressurelevel)
4070 {
4071         if (task == NULL) {
4072                 return;
4073         }
4074
4075         if (pressurelevel == kVMPressureWarning)
4076                 task->low_mem_notified_warn = 0;
4077         else if (pressurelevel == kVMPressureCritical)
4078                 task->low_mem_notified_critical = 0;
4079 }
4080
4081
4082 /*
4083  * Mark the task eligible for purging its purgeable memory.
4084  *
4085  * Condition: task lock should be held while calling this function.
4086  */
4087 void
4088 task_clear_used_for_purging(task_t task)
4089 {
4090         if (task == NULL) {
4091                 return;
4092         }
4093
4094         task->purged_memory_warn = 0;
4095         task->purged_memory_critical = 0;
4096 }
4097
4098
4099 /*
4100  * Estimate task importance for purging its purgeable memory
4101  * and low memory notification.
4102  *
4103  * Importance is calculated in the following order of criteria:
4104  * -Task role : Background vs Foreground
4105  * -Boost status: Not boosted vs Boosted
4106  * -Darwin BG status.
4107  *
4108  * Returns: Estimated task importance. Less important task will have lower
4109  *          estimated importance.
4110  */
4111 int
4112 task_importance_estimate(task_t task)
4113 {
4114         int task_importance = 0;
4115
4116         if (task == NULL) {
4117                 return 0;
4118         }
4119
4120         if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) == TASK_FOREGROUND_APPLICATION)
4121                         task_importance += TASK_IMPORTANCE_FOREGROUND;
4122
4123         if (proc_get_effective_task_policy(task, TASK_POLICY_DARWIN_BG) == 0)
4124                         task_importance += TASK_IMPORTANCE_NOTDARWINBG;
4125
4126         return task_importance;
4127 }
4128