osfmk/kern/task_policy.c

   1 /*
   2  * Copyright (c) 2000-2016 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <kern/policy_internal.h>
  30 #include <mach/task_policy.h>
  31
  32 #include <mach/mach_types.h>
  33 #include <mach/task_server.h>
  34
  35 #include <kern/host.h>                  /* host_priv_self()        */
  36 #include <mach/host_priv.h>             /* host_get_special_port() */
  37 #include <mach/host_special_ports.h>    /* RESOURCE_NOTIFY_PORT    */
  38 #include <kern/sched.h>
  39 #include <kern/task.h>
  40 #include <mach/thread_policy.h>
  41 #include <sys/errno.h>
  42 #include <sys/resource.h>
  43 #include <machine/limits.h>
  44 #include <kern/ledger.h>
  45 #include <kern/thread_call.h>
  46 #include <kern/sfi.h>
  47 #include <kern/coalition.h>
  48 #if CONFIG_TELEMETRY
  49 #include <kern/telemetry.h>
  50 #endif
  51 #if CONFIG_EMBEDDED
  52 #include <kern/kalloc.h>
  53 #include <sys/errno.h>
  54 #endif /* CONFIG_EMBEDDED */
  55
  56 #if IMPORTANCE_INHERITANCE
  57 #include <ipc/ipc_importance.h>
  58 #if IMPORTANCE_TRACE
  59 #include <mach/machine/sdt.h>
  60 #endif /* IMPORTANCE_TRACE */
  61 #endif /* IMPORTANCE_INHERITACE */
  62
  63 #include <sys/kdebug.h>
  64
  65 /*
  66  *  Task Policy
  67  *
  68  *  This subsystem manages task and thread IO priority and backgrounding,
  69  *  as well as importance inheritance, process suppression, task QoS, and apptype.
  70  *  These properties have a suprising number of complex interactions, so they are
  71  *  centralized here in one state machine to simplify the implementation of those interactions.
  72  *
  73  *  Architecture:
  74  *  Threads and tasks have two policy fields: requested, effective.
  75  *  Requested represents the wishes of each interface that influences task policy.
  76  *  Effective represents the distillation of that policy into a set of behaviors.
  77  *
  78  *  Each thread making a modification in the policy system passes a 'pending' struct,
  79  *  which tracks updates that will be applied after dropping the policy engine lock.
  80  *
  81  *  Each interface that has an input into the task policy state machine controls a field in requested.
  82  *  If the interface has a getter, it returns what is in the field in requested, but that is
  83  *  not necessarily what is actually in effect.
  84  *
  85  *  All kernel subsystems that behave differently based on task policy call into
  86  *  the proc_get_effective_(task|thread)_policy functions, which return the decision of the task policy state machine
  87  *  for that subsystem by querying only the 'effective' field.
  88  *
  89  *  Policy change operations:
  90  *  Here are the steps to change a policy on a task or thread:
  91  *  1) Lock task
  92  *  2) Change requested field for the relevant policy
  93  *  3) Run a task policy update, which recalculates effective based on requested,
  94  *     then takes a diff between the old and new versions of requested and calls the relevant
  95  *     other subsystems to apply these changes, and updates the pending field.
  96  *  4) Unlock task
  97  *  5) Run task policy update complete, which looks at the pending field to update
  98  *     subsystems which cannot be touched while holding the task lock.
  99  *
 100  *  To add a new requested policy, add the field in the requested struct, the flavor in task.h,
 101  *  the setter and getter in proc_(set|get)_task_policy*,
 102  *  then set up the effects of that behavior in task_policy_update*. If the policy manifests
 103  *  itself as a distinct effective policy, add it to the effective struct and add it to the
 104  *  proc_get_effective_task_policy accessor.
 105  *
 106  *  Most policies are set via proc_set_task_policy, but policies that don't fit that interface
 107  *  roll their own lock/set/update/unlock/complete code inside this file.
 108  *
 109  *
 110  *  Suppression policy
 111  *
 112  *  These are a set of behaviors that can be requested for a task.  They currently have specific
 113  *  implied actions when they're enabled, but they may be made customizable in the future.
 114  *
 115  *  When the affected task is boosted, we temporarily disable the suppression behaviors
 116  *  so that the affected process has a chance to run so it can call the API to permanently
 117  *  disable the suppression behaviors.
 118  *
 119  *  Locking
 120  *
 121  *  Changing task policy on a task takes the task lock.
 122  *  Changing task policy on a thread takes the thread mutex.
 123  *  Task policy changes that affect threads will take each thread's mutex to update it if necessary.
 124  *
 125  *  Querying the effective policy does not take a lock, because callers
 126  *  may run in interrupt context or other place where locks are not OK.
 127  *
 128  *  This means that any notification of state change needs to be externally synchronized.
 129  *  We do this by idempotent callouts after the state has changed to ask
 130  *  other subsystems to update their view of the world.
 131  *
 132  * TODO: Move all cpu/wakes/io monitor code into a separate file
 133  * TODO: Move all importance code over to importance subsystem
 134  * TODO: Move all taskwatch code into a separate file
 135  * TODO: Move all VM importance code into a separate file
 136  */
 137
 138 /* Task policy related helper functions */
 139 static void proc_set_task_policy_locked(task_t task, int category, int flavor, int value, int value2);
 140
 141 static void task_policy_update_locked(task_t task, task_pend_token_t pend_token);
 142 static void task_policy_update_internal_locked(task_t task, boolean_t in_create, task_pend_token_t pend_token);
 143
 144 /* For attributes that have two scalars as input/output */
 145 static void proc_set_task_policy2(task_t task, int category, int flavor, int value1, int value2);
 146 static void proc_get_task_policy2(task_t task, int category, int flavor, int *value1, int *value2);
 147
 148 static boolean_t task_policy_update_coalition_focal_tasks(task_t task, int prev_role, int next_role, task_pend_token_t pend_token);
 149
 150 static uint64_t task_requested_bitfield(task_t task);
 151 static uint64_t task_effective_bitfield(task_t task);
 152
 153 /* Convenience functions for munging a policy bitfield into a tracepoint */
 154 static uintptr_t trequested_0(task_t task);
 155 static uintptr_t trequested_1(task_t task);
 156 static uintptr_t teffective_0(task_t task);
 157 static uintptr_t teffective_1(task_t task);
 158
 159 /* CPU limits helper functions */
 160 static int task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int entitled);
 161 static int task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope);
 162 static int task_enable_cpumon_locked(task_t task);
 163 static int task_disable_cpumon(task_t task);
 164 static int task_clear_cpuusage_locked(task_t task, int cpumon_entitled);
 165 static int task_apply_resource_actions(task_t task, int type);
 166 static void task_action_cpuusage(thread_call_param_t param0, thread_call_param_t param1);
 167
 168 #ifdef MACH_BSD
 169 typedef struct proc *   proc_t;
 170 int                     proc_pid(void *proc);
 171 extern int              proc_selfpid(void);
 172 extern char *           proc_name_address(void *p);
 173 extern char *           proc_best_name(proc_t proc);
 174
 175 extern int proc_pidpathinfo_internal(proc_t p, uint64_t arg,
 176     char *buffer, uint32_t buffersize,
 177     int32_t *retval);
 178 #endif /* MACH_BSD */
 179
 180
 181 #if CONFIG_EMBEDDED
 182 /* TODO: make CONFIG_TASKWATCH */
 183 /* Taskwatch related helper functions */
 184 static void set_thread_appbg(thread_t thread, int setbg, int importance);
 185 static void add_taskwatch_locked(task_t task, task_watch_t * twp);
 186 static void remove_taskwatch_locked(task_t task, task_watch_t * twp);
 187 static void task_watch_lock(void);
 188 static void task_watch_unlock(void);
 189 static void apply_appstate_watchers(task_t task);
 190
 191 typedef struct task_watcher {
 192         queue_chain_t   tw_links;       /* queueing of threads */
 193         task_t          tw_task;        /* task that is being watched */
 194         thread_t        tw_thread;      /* thread that is watching the watch_task */
 195         int             tw_state;       /* the current app state of the thread */
 196         int             tw_importance;  /* importance prior to backgrounding */
 197 } task_watch_t;
 198
 199 typedef struct thread_watchlist {
 200         thread_t        thread;         /* thread being worked on for taskwatch action */
 201         int             importance;     /* importance to be restored if thread is being made active */
 202 } thread_watchlist_t;
 203
 204 #endif /* CONFIG_EMBEDDED */
 205
 206 extern int memorystatus_update_priority_for_appnap(proc_t p, boolean_t is_appnap);
 207
 208 /* Importance Inheritance related helper functions */
 209
 210 #if IMPORTANCE_INHERITANCE
 211
 212 static void task_importance_mark_live_donor(task_t task, boolean_t donating);
 213 static void task_importance_mark_receiver(task_t task, boolean_t receiving);
 214 static void task_importance_mark_denap_receiver(task_t task, boolean_t denap);
 215
 216 static boolean_t task_is_marked_live_importance_donor(task_t task);
 217 static boolean_t task_is_importance_receiver(task_t task);
 218 static boolean_t task_is_importance_denap_receiver(task_t task);
 219
 220 static int task_importance_hold_internal_assertion(task_t target_task, uint32_t count);
 221
 222 static void task_add_importance_watchport(task_t task, mach_port_t port, int *boostp);
 223 static void task_importance_update_live_donor(task_t target_task);
 224
 225 static void task_set_boost_locked(task_t task, boolean_t boost_active);
 226
 227 #endif /* IMPORTANCE_INHERITANCE */
 228
 229 #if IMPORTANCE_TRACE
 230 #define __imptrace_only
 231 #else /* IMPORTANCE_TRACE */
 232 #define __imptrace_only __unused
 233 #endif /* !IMPORTANCE_TRACE */
 234
 235 #if IMPORTANCE_INHERITANCE
 236 #define __imp_only
 237 #else
 238 #define __imp_only __unused
 239 #endif
 240
 241 /*
 242  * Default parameters for certain policies
 243  */
 244
 245 int proc_standard_daemon_tier = THROTTLE_LEVEL_TIER1;
 246 int proc_suppressed_disk_tier = THROTTLE_LEVEL_TIER1;
 247 int proc_tal_disk_tier        = THROTTLE_LEVEL_TIER1;
 248
 249 int proc_graphics_timer_qos   = (LATENCY_QOS_TIER_0 & 0xFF);
 250
 251 const int proc_default_bg_iotier  = THROTTLE_LEVEL_TIER2;
 252
 253 /* Latency/throughput QoS fields remain zeroed, i.e. TIER_UNSPECIFIED at creation */
 254 const struct task_requested_policy default_task_requested_policy = {
 255         .trp_bg_iotier = proc_default_bg_iotier
 256 };
 257 const struct task_effective_policy default_task_effective_policy = {};
 258
 259 /*
 260  * Default parameters for CPU usage monitor.
 261  *
 262  * Default setting is 50% over 3 minutes.
 263  */
 264 #define         DEFAULT_CPUMON_PERCENTAGE 50
 265 #define         DEFAULT_CPUMON_INTERVAL   (3 * 60)
 266
 267 uint8_t         proc_max_cpumon_percentage;
 268 uint64_t        proc_max_cpumon_interval;
 269
 270
 271 kern_return_t
 272 qos_latency_policy_validate(task_latency_qos_t ltier)
 273 {
 274         if ((ltier != LATENCY_QOS_TIER_UNSPECIFIED) &&
 275             ((ltier > LATENCY_QOS_TIER_5) || (ltier < LATENCY_QOS_TIER_0))) {
 276                 return KERN_INVALID_ARGUMENT;
 277         }
 278
 279         return KERN_SUCCESS;
 280 }
 281
 282 kern_return_t
 283 qos_throughput_policy_validate(task_throughput_qos_t ttier)
 284 {
 285         if ((ttier != THROUGHPUT_QOS_TIER_UNSPECIFIED) &&
 286             ((ttier > THROUGHPUT_QOS_TIER_5) || (ttier < THROUGHPUT_QOS_TIER_0))) {
 287                 return KERN_INVALID_ARGUMENT;
 288         }
 289
 290         return KERN_SUCCESS;
 291 }
 292
 293 static kern_return_t
 294 task_qos_policy_validate(task_qos_policy_t qosinfo, mach_msg_type_number_t count)
 295 {
 296         if (count < TASK_QOS_POLICY_COUNT) {
 297                 return KERN_INVALID_ARGUMENT;
 298         }
 299
 300         task_latency_qos_t ltier = qosinfo->task_latency_qos_tier;
 301         task_throughput_qos_t ttier = qosinfo->task_throughput_qos_tier;
 302
 303         kern_return_t kr = qos_latency_policy_validate(ltier);
 304
 305         if (kr != KERN_SUCCESS) {
 306                 return kr;
 307         }
 308
 309         kr = qos_throughput_policy_validate(ttier);
 310
 311         return kr;
 312 }
 313
 314 uint32_t
 315 qos_extract(uint32_t qv)
 316 {
 317         return qv & 0xFF;
 318 }
 319
 320 uint32_t
 321 qos_latency_policy_package(uint32_t qv)
 322 {
 323         return (qv == LATENCY_QOS_TIER_UNSPECIFIED) ? LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | qv);
 324 }
 325
 326 uint32_t
 327 qos_throughput_policy_package(uint32_t qv)
 328 {
 329         return (qv == THROUGHPUT_QOS_TIER_UNSPECIFIED) ? THROUGHPUT_QOS_TIER_UNSPECIFIED : ((0xFE << 16) | qv);
 330 }
 331
 332 #define TASK_POLICY_SUPPRESSION_DISABLE  0x1
 333 #define TASK_POLICY_SUPPRESSION_IOTIER2  0x2
 334 #define TASK_POLICY_SUPPRESSION_NONDONOR 0x4
 335 /* TEMPORARY boot-arg controlling task_policy suppression (App Nap) */
 336 static boolean_t task_policy_suppression_flags = TASK_POLICY_SUPPRESSION_IOTIER2 |
 337     TASK_POLICY_SUPPRESSION_NONDONOR;
 338
 339 kern_return_t
 340 task_policy_set(
 341         task_t                                  task,
 342         task_policy_flavor_t    flavor,
 343         task_policy_t                   policy_info,
 344         mach_msg_type_number_t  count)
 345 {
 346         kern_return_t           result = KERN_SUCCESS;
 347
 348         if (task == TASK_NULL || task == kernel_task) {
 349                 return KERN_INVALID_ARGUMENT;
 350         }
 351
 352         switch (flavor) {
 353         case TASK_CATEGORY_POLICY: {
 354                 task_category_policy_t info = (task_category_policy_t)policy_info;
 355
 356                 if (count < TASK_CATEGORY_POLICY_COUNT) {
 357                         return KERN_INVALID_ARGUMENT;
 358                 }
 359
 360 #if CONFIG_EMBEDDED
 361                 /* On embedded, you can't modify your own role. */
 362                 if (current_task() == task) {
 363                         return KERN_INVALID_ARGUMENT;
 364                 }
 365 #endif
 366
 367                 switch (info->role) {
 368                 case TASK_FOREGROUND_APPLICATION:
 369                 case TASK_BACKGROUND_APPLICATION:
 370                 case TASK_DEFAULT_APPLICATION:
 371                         proc_set_task_policy(task,
 372                             TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
 373                             info->role);
 374                         break;
 375
 376                 case TASK_CONTROL_APPLICATION:
 377                         if (task != current_task() || task->sec_token.val[0] != 0) {
 378                                 result = KERN_INVALID_ARGUMENT;
 379                         } else {
 380                                 proc_set_task_policy(task,
 381                                     TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
 382                                     info->role);
 383                         }
 384                         break;
 385
 386                 case TASK_GRAPHICS_SERVER:
 387                         /* TODO: Restrict this role to FCFS <rdar://problem/12552788> */
 388                         if (task != current_task() || task->sec_token.val[0] != 0) {
 389                                 result = KERN_INVALID_ARGUMENT;
 390                         } else {
 391                                 proc_set_task_policy(task,
 392                                     TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
 393                                     info->role);
 394                         }
 395                         break;
 396                 default:
 397                         result = KERN_INVALID_ARGUMENT;
 398                         break;
 399                 } /* switch (info->role) */
 400
 401                 break;
 402         }
 403
 404 /* Desired energy-efficiency/performance "quality-of-service" */
 405         case TASK_BASE_QOS_POLICY:
 406         case TASK_OVERRIDE_QOS_POLICY:
 407         {
 408                 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
 409                 kern_return_t kr = task_qos_policy_validate(qosinfo, count);
 410
 411                 if (kr != KERN_SUCCESS) {
 412                         return kr;
 413                 }
 414
 415
 416                 uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier);
 417                 uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier);
 418
 419                 proc_set_task_policy2(task, TASK_POLICY_ATTRIBUTE,
 420                     flavor == TASK_BASE_QOS_POLICY ? TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS : TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS,
 421                     lqos, tqos);
 422         }
 423         break;
 424
 425         case TASK_BASE_LATENCY_QOS_POLICY:
 426         {
 427                 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
 428                 kern_return_t kr = task_qos_policy_validate(qosinfo, count);
 429
 430                 if (kr != KERN_SUCCESS) {
 431                         return kr;
 432                 }
 433
 434                 uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier);
 435
 436                 proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE, TASK_BASE_LATENCY_QOS_POLICY, lqos);
 437         }
 438         break;
 439
 440         case TASK_BASE_THROUGHPUT_QOS_POLICY:
 441         {
 442                 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
 443                 kern_return_t kr = task_qos_policy_validate(qosinfo, count);
 444
 445                 if (kr != KERN_SUCCESS) {
 446                         return kr;
 447                 }
 448
 449                 uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier);
 450
 451                 proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE, TASK_BASE_THROUGHPUT_QOS_POLICY, tqos);
 452         }
 453         break;
 454
 455         case TASK_SUPPRESSION_POLICY:
 456         {
 457 #if CONFIG_EMBEDDED
 458                 /*
 459                  * Suppression policy is not enabled for embedded
 460                  * because apps aren't marked as denap receivers
 461                  */
 462                 result = KERN_INVALID_ARGUMENT;
 463                 break;
 464 #else /* CONFIG_EMBEDDED */
 465
 466                 task_suppression_policy_t info = (task_suppression_policy_t)policy_info;
 467
 468                 if (count < TASK_SUPPRESSION_POLICY_COUNT) {
 469                         return KERN_INVALID_ARGUMENT;
 470                 }
 471
 472                 struct task_qos_policy qosinfo;
 473
 474                 qosinfo.task_latency_qos_tier = info->timer_throttle;
 475                 qosinfo.task_throughput_qos_tier = info->throughput_qos;
 476
 477                 kern_return_t kr = task_qos_policy_validate(&qosinfo, TASK_QOS_POLICY_COUNT);
 478
 479                 if (kr != KERN_SUCCESS) {
 480                         return kr;
 481                 }
 482
 483                 /* TEMPORARY disablement of task suppression */
 484                 if (info->active &&
 485                     (task_policy_suppression_flags & TASK_POLICY_SUPPRESSION_DISABLE)) {
 486                         return KERN_SUCCESS;
 487                 }
 488
 489                 struct task_pend_token pend_token = {};
 490
 491                 task_lock(task);
 492
 493                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 494                     (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_START,
 495                     proc_selfpid(), task_pid(task), trequested_0(task),
 496                     trequested_1(task), 0);
 497
 498                 task->requested_policy.trp_sup_active      = (info->active)         ? 1 : 0;
 499                 task->requested_policy.trp_sup_lowpri_cpu  = (info->lowpri_cpu)     ? 1 : 0;
 500                 task->requested_policy.trp_sup_timer       = qos_extract(info->timer_throttle);
 501                 task->requested_policy.trp_sup_disk        = (info->disk_throttle)  ? 1 : 0;
 502                 task->requested_policy.trp_sup_throughput  = qos_extract(info->throughput_qos);
 503                 task->requested_policy.trp_sup_cpu         = (info->suppressed_cpu) ? 1 : 0;
 504                 task->requested_policy.trp_sup_bg_sockets  = (info->background_sockets) ? 1 : 0;
 505
 506                 task_policy_update_locked(task, &pend_token);
 507
 508                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 509                     (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_END,
 510                     proc_selfpid(), task_pid(task), trequested_0(task),
 511                     trequested_1(task), 0);
 512
 513                 task_unlock(task);
 514
 515                 task_policy_update_complete_unlocked(task, &pend_token);
 516
 517                 break;
 518
 519 #endif /* CONFIG_EMBEDDED */
 520         }
 521
 522         default:
 523                 result = KERN_INVALID_ARGUMENT;
 524                 break;
 525         }
 526
 527         return result;
 528 }
 529
 530 /* Sets BSD 'nice' value on the task */
 531 kern_return_t
 532 task_importance(
 533         task_t                          task,
 534         integer_t                       importance)
 535 {
 536         if (task == TASK_NULL || task == kernel_task) {
 537                 return KERN_INVALID_ARGUMENT;
 538         }
 539
 540         task_lock(task);
 541
 542         if (!task->active) {
 543                 task_unlock(task);
 544
 545                 return KERN_TERMINATED;
 546         }
 547
 548         if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) >= TASK_CONTROL_APPLICATION) {
 549                 task_unlock(task);
 550
 551                 return KERN_INVALID_ARGUMENT;
 552         }
 553
 554         task->importance = importance;
 555
 556         struct task_pend_token pend_token = {};
 557
 558         task_policy_update_locked(task, &pend_token);
 559
 560         task_unlock(task);
 561
 562         task_policy_update_complete_unlocked(task, &pend_token);
 563
 564         return KERN_SUCCESS;
 565 }
 566
 567 kern_return_t
 568 task_policy_get(
 569         task_t                                  task,
 570         task_policy_flavor_t    flavor,
 571         task_policy_t                   policy_info,
 572         mach_msg_type_number_t  *count,
 573         boolean_t                               *get_default)
 574 {
 575         if (task == TASK_NULL || task == kernel_task) {
 576                 return KERN_INVALID_ARGUMENT;
 577         }
 578
 579         switch (flavor) {
 580         case TASK_CATEGORY_POLICY:
 581         {
 582                 task_category_policy_t          info = (task_category_policy_t)policy_info;
 583
 584                 if (*count < TASK_CATEGORY_POLICY_COUNT) {
 585                         return KERN_INVALID_ARGUMENT;
 586                 }
 587
 588                 if (*get_default) {
 589                         info->role = TASK_UNSPECIFIED;
 590                 } else {
 591                         info->role = proc_get_task_policy(task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE);
 592                 }
 593                 break;
 594         }
 595
 596         case TASK_BASE_QOS_POLICY: /* FALLTHRU */
 597         case TASK_OVERRIDE_QOS_POLICY:
 598         {
 599                 task_qos_policy_t info = (task_qos_policy_t)policy_info;
 600
 601                 if (*count < TASK_QOS_POLICY_COUNT) {
 602                         return KERN_INVALID_ARGUMENT;
 603                 }
 604
 605                 if (*get_default) {
 606                         info->task_latency_qos_tier = LATENCY_QOS_TIER_UNSPECIFIED;
 607                         info->task_throughput_qos_tier = THROUGHPUT_QOS_TIER_UNSPECIFIED;
 608                 } else if (flavor == TASK_BASE_QOS_POLICY) {
 609                         int value1, value2;
 610
 611                         proc_get_task_policy2(task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2);
 612
 613                         info->task_latency_qos_tier = qos_latency_policy_package(value1);
 614                         info->task_throughput_qos_tier = qos_throughput_policy_package(value2);
 615                 } else if (flavor == TASK_OVERRIDE_QOS_POLICY) {
 616                         int value1, value2;
 617
 618                         proc_get_task_policy2(task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2);
 619
 620                         info->task_latency_qos_tier = qos_latency_policy_package(value1);
 621                         info->task_throughput_qos_tier = qos_throughput_policy_package(value2);
 622                 }
 623
 624                 break;
 625         }
 626
 627         case TASK_POLICY_STATE:
 628         {
 629                 task_policy_state_t info = (task_policy_state_t)policy_info;
 630
 631                 if (*count < TASK_POLICY_STATE_COUNT) {
 632                         return KERN_INVALID_ARGUMENT;
 633                 }
 634
 635                 /* Only root can get this info */
 636                 if (current_task()->sec_token.val[0] != 0) {
 637                         return KERN_PROTECTION_FAILURE;
 638                 }
 639
 640                 if (*get_default) {
 641                         info->requested = 0;
 642                         info->effective = 0;
 643                         info->pending = 0;
 644                         info->imp_assertcnt = 0;
 645                         info->imp_externcnt = 0;
 646                         info->flags = 0;
 647                         info->imp_transitions = 0;
 648                 } else {
 649                         task_lock(task);
 650
 651                         info->requested = task_requested_bitfield(task);
 652                         info->effective = task_effective_bitfield(task);
 653                         info->pending   = 0;
 654
 655                         info->tps_requested_policy = *(uint64_t*)(&task->requested_policy);
 656                         info->tps_effective_policy = *(uint64_t*)(&task->effective_policy);
 657
 658                         info->flags = 0;
 659                         if (task->task_imp_base != NULL) {
 660                                 info->imp_assertcnt = task->task_imp_base->iit_assertcnt;
 661                                 info->imp_externcnt = IIT_EXTERN(task->task_imp_base);
 662                                 info->flags |= (task_is_marked_importance_receiver(task) ? TASK_IMP_RECEIVER : 0);
 663                                 info->flags |= (task_is_marked_importance_denap_receiver(task) ? TASK_DENAP_RECEIVER : 0);
 664                                 info->flags |= (task_is_marked_importance_donor(task) ? TASK_IMP_DONOR : 0);
 665                                 info->flags |= (task_is_marked_live_importance_donor(task) ? TASK_IMP_LIVE_DONOR : 0);
 666                                 info->imp_transitions = task->task_imp_base->iit_transitions;
 667                         } else {
 668                                 info->imp_assertcnt = 0;
 669                                 info->imp_externcnt = 0;
 670                                 info->imp_transitions = 0;
 671                         }
 672                         task_unlock(task);
 673                 }
 674
 675                 break;
 676         }
 677
 678         case TASK_SUPPRESSION_POLICY:
 679         {
 680                 task_suppression_policy_t info = (task_suppression_policy_t)policy_info;
 681
 682                 if (*count < TASK_SUPPRESSION_POLICY_COUNT) {
 683                         return KERN_INVALID_ARGUMENT;
 684                 }
 685
 686                 task_lock(task);
 687
 688                 if (*get_default) {
 689                         info->active            = 0;
 690                         info->lowpri_cpu        = 0;
 691                         info->timer_throttle    = LATENCY_QOS_TIER_UNSPECIFIED;
 692                         info->disk_throttle     = 0;
 693                         info->cpu_limit         = 0;
 694                         info->suspend           = 0;
 695                         info->throughput_qos    = 0;
 696                         info->suppressed_cpu    = 0;
 697                 } else {
 698                         info->active            = task->requested_policy.trp_sup_active;
 699                         info->lowpri_cpu        = task->requested_policy.trp_sup_lowpri_cpu;
 700                         info->timer_throttle    = qos_latency_policy_package(task->requested_policy.trp_sup_timer);
 701                         info->disk_throttle     = task->requested_policy.trp_sup_disk;
 702                         info->cpu_limit         = 0;
 703                         info->suspend           = 0;
 704                         info->throughput_qos    = qos_throughput_policy_package(task->requested_policy.trp_sup_throughput);
 705                         info->suppressed_cpu    = task->requested_policy.trp_sup_cpu;
 706                         info->background_sockets = task->requested_policy.trp_sup_bg_sockets;
 707                 }
 708
 709                 task_unlock(task);
 710                 break;
 711         }
 712
 713         default:
 714                 return KERN_INVALID_ARGUMENT;
 715         }
 716
 717         return KERN_SUCCESS;
 718 }
 719
 720 /*
 721  * Called at task creation
 722  * We calculate the correct effective but don't apply it to anything yet.
 723  * The threads, etc will inherit from the task as they get created.
 724  */
 725 void
 726 task_policy_create(task_t task, task_t parent_task)
 727 {
 728         task->requested_policy.trp_apptype          = parent_task->requested_policy.trp_apptype;
 729
 730         task->requested_policy.trp_int_darwinbg     = parent_task->requested_policy.trp_int_darwinbg;
 731         task->requested_policy.trp_ext_darwinbg     = parent_task->requested_policy.trp_ext_darwinbg;
 732         task->requested_policy.trp_int_iotier       = parent_task->requested_policy.trp_int_iotier;
 733         task->requested_policy.trp_ext_iotier       = parent_task->requested_policy.trp_ext_iotier;
 734         task->requested_policy.trp_int_iopassive    = parent_task->requested_policy.trp_int_iopassive;
 735         task->requested_policy.trp_ext_iopassive    = parent_task->requested_policy.trp_ext_iopassive;
 736         task->requested_policy.trp_bg_iotier        = parent_task->requested_policy.trp_bg_iotier;
 737         task->requested_policy.trp_terminated       = parent_task->requested_policy.trp_terminated;
 738         task->requested_policy.trp_qos_clamp        = parent_task->requested_policy.trp_qos_clamp;
 739
 740         if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE && !task_is_exec_copy(task)) {
 741                 /* Do not update the apptype for exec copy task */
 742                 if (parent_task->requested_policy.trp_boosted) {
 743                         task->requested_policy.trp_apptype = TASK_APPTYPE_DAEMON_INTERACTIVE;
 744                         task_importance_mark_donor(task, TRUE);
 745                 } else {
 746                         task->requested_policy.trp_apptype = TASK_APPTYPE_DAEMON_BACKGROUND;
 747                         task_importance_mark_receiver(task, FALSE);
 748                 }
 749         }
 750
 751         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 752             (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_START,
 753             task_pid(task), teffective_0(task),
 754             teffective_1(task), task->priority, 0);
 755
 756         task_policy_update_internal_locked(task, TRUE, NULL);
 757
 758         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 759             (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_END,
 760             task_pid(task), teffective_0(task),
 761             teffective_1(task), task->priority, 0);
 762
 763         task_importance_update_live_donor(task);
 764 }
 765
 766
 767 static void
 768 task_policy_update_locked(task_t task, task_pend_token_t pend_token)
 769 {
 770         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 771             (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_TASK) | DBG_FUNC_START),
 772             task_pid(task), teffective_0(task),
 773             teffective_1(task), task->priority, 0);
 774
 775         task_policy_update_internal_locked(task, FALSE, pend_token);
 776
 777         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 778             (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_TASK)) | DBG_FUNC_END,
 779             task_pid(task), teffective_0(task),
 780             teffective_1(task), task->priority, 0);
 781 }
 782
 783 /*
 784  * One state update function TO RULE THEM ALL
 785  *
 786  * This function updates the task or thread effective policy fields
 787  * and pushes the results to the relevant subsystems.
 788  *
 789  * Must call update_complete after unlocking the task,
 790  * as some subsystems cannot be updated while holding the task lock.
 791  *
 792  * Called with task locked, not thread
 793  */
 794
 795 static void
 796 task_policy_update_internal_locked(task_t task, boolean_t in_create, task_pend_token_t pend_token)
 797 {
 798         /*
 799          * Step 1:
 800          *  Gather requested policy
 801          */
 802
 803         struct task_requested_policy requested = task->requested_policy;
 804
 805         /*
 806          * Step 2:
 807          *  Calculate new effective policies from requested policy and task state
 808          *  Rules:
 809          *      Don't change requested, it won't take effect
 810          */
 811
 812         struct task_effective_policy next = {};
 813
 814         /* Update task role */
 815         next.tep_role = requested.trp_role;
 816
 817         /* Set task qos clamp and ceiling */
 818         next.tep_qos_clamp = requested.trp_qos_clamp;
 819
 820         if (requested.trp_apptype == TASK_APPTYPE_APP_DEFAULT ||
 821             requested.trp_apptype == TASK_APPTYPE_APP_TAL) {
 822                 switch (next.tep_role) {
 823                 case TASK_FOREGROUND_APPLICATION:
 824                         /* Foreground apps get urgent scheduler priority */
 825                         next.tep_qos_ui_is_urgent = 1;
 826                         next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED;
 827                         break;
 828
 829                 case TASK_BACKGROUND_APPLICATION:
 830                         /* This is really 'non-focal but on-screen' */
 831                         next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED;
 832                         break;
 833
 834                 case TASK_DEFAULT_APPLICATION:
 835                         /* This is 'may render UI but we don't know if it's focal/nonfocal' */
 836                         next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED;
 837                         break;
 838
 839                 case TASK_NONUI_APPLICATION:
 840                         /* i.e. 'off-screen' */
 841                         next.tep_qos_ceiling = THREAD_QOS_LEGACY;
 842                         break;
 843
 844                 case TASK_CONTROL_APPLICATION:
 845                 case TASK_GRAPHICS_SERVER:
 846                         next.tep_qos_ui_is_urgent = 1;
 847                         next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED;
 848                         break;
 849
 850                 case TASK_THROTTLE_APPLICATION:
 851                         /* i.e. 'TAL launch' */
 852                         next.tep_qos_ceiling = THREAD_QOS_UTILITY;
 853                         break;
 854
 855                 case TASK_DARWINBG_APPLICATION:
 856                         /* i.e. 'DARWIN_BG throttled background application' */
 857                         next.tep_qos_ceiling = THREAD_QOS_BACKGROUND;
 858                         break;
 859
 860                 case TASK_UNSPECIFIED:
 861                 default:
 862                         /* Apps that don't have an application role get
 863                          * USER_INTERACTIVE and USER_INITIATED squashed to LEGACY */
 864                         next.tep_qos_ceiling = THREAD_QOS_LEGACY;
 865                         break;
 866                 }
 867         } else {
 868                 /* Daemons and dext get USER_INTERACTIVE squashed to USER_INITIATED */
 869                 next.tep_qos_ceiling = THREAD_QOS_USER_INITIATED;
 870         }
 871
 872         /* Calculate DARWIN_BG */
 873         boolean_t wants_darwinbg        = FALSE;
 874         boolean_t wants_all_sockets_bg  = FALSE; /* Do I want my existing sockets to be bg */
 875         boolean_t wants_watchersbg      = FALSE; /* Do I want my pidbound threads to be bg */
 876
 877         /*
 878          * If DARWIN_BG has been requested at either level, it's engaged.
 879          * Only true DARWIN_BG changes cause watchers to transition.
 880          *
 881          * Backgrounding due to apptype does.
 882          */
 883         if (requested.trp_int_darwinbg || requested.trp_ext_darwinbg ||
 884             next.tep_role == TASK_DARWINBG_APPLICATION) {
 885                 wants_watchersbg = wants_all_sockets_bg = wants_darwinbg = TRUE;
 886         }
 887
 888         /*
 889          * Deprecated TAL implementation for TAL apptype
 890          * Background TAL apps are throttled when TAL is enabled
 891          */
 892         if (requested.trp_apptype == TASK_APPTYPE_APP_TAL &&
 893             requested.trp_role == TASK_BACKGROUND_APPLICATION &&
 894             requested.trp_tal_enabled == 1) {
 895                 next.tep_tal_engaged = 1;
 896         }
 897
 898         /* New TAL implementation based on TAL role alone, works for all apps */
 899         if ((requested.trp_apptype == TASK_APPTYPE_APP_DEFAULT ||
 900             requested.trp_apptype == TASK_APPTYPE_APP_TAL) &&
 901             requested.trp_role == TASK_THROTTLE_APPLICATION) {
 902                 next.tep_tal_engaged = 1;
 903         }
 904
 905         /* Adaptive daemons are DARWIN_BG unless boosted, and don't get network throttled. */
 906         if (requested.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE &&
 907             requested.trp_boosted == 0) {
 908                 wants_darwinbg = TRUE;
 909         }
 910
 911         /* Background daemons are always DARWIN_BG, no exceptions, and don't get network throttled. */
 912         if (requested.trp_apptype == TASK_APPTYPE_DAEMON_BACKGROUND) {
 913                 wants_darwinbg = TRUE;
 914         }
 915
 916         if (next.tep_qos_clamp == THREAD_QOS_BACKGROUND || next.tep_qos_clamp == THREAD_QOS_MAINTENANCE) {
 917                 wants_darwinbg = TRUE;
 918         }
 919
 920         /* Calculate side effects of DARWIN_BG */
 921
 922         if (wants_darwinbg) {
 923                 next.tep_darwinbg = 1;
 924                 /* darwinbg tasks always create bg sockets, but we don't always loop over all sockets */
 925                 next.tep_new_sockets_bg = 1;
 926                 next.tep_lowpri_cpu = 1;
 927         }
 928
 929         if (wants_all_sockets_bg) {
 930                 next.tep_all_sockets_bg = 1;
 931         }
 932
 933         if (wants_watchersbg) {
 934                 next.tep_watchers_bg = 1;
 935         }
 936
 937         /* Calculate low CPU priority */
 938
 939         boolean_t wants_lowpri_cpu = FALSE;
 940
 941         if (wants_darwinbg) {
 942                 wants_lowpri_cpu = TRUE;
 943         }
 944
 945         if (next.tep_tal_engaged) {
 946                 wants_lowpri_cpu = TRUE;
 947         }
 948
 949         if (requested.trp_sup_lowpri_cpu && requested.trp_boosted == 0) {
 950                 wants_lowpri_cpu = TRUE;
 951         }
 952
 953         if (wants_lowpri_cpu) {
 954                 next.tep_lowpri_cpu = 1;
 955         }
 956
 957         /* Calculate IO policy */
 958
 959         /* Update BG IO policy (so we can see if it has changed) */
 960         next.tep_bg_iotier = requested.trp_bg_iotier;
 961
 962         int iopol = THROTTLE_LEVEL_TIER0;
 963
 964         if (wants_darwinbg) {
 965                 iopol = MAX(iopol, requested.trp_bg_iotier);
 966         }
 967
 968         if (requested.trp_apptype == TASK_APPTYPE_DAEMON_STANDARD) {
 969                 iopol = MAX(iopol, proc_standard_daemon_tier);
 970         }
 971
 972         if (requested.trp_sup_disk && requested.trp_boosted == 0) {
 973                 iopol = MAX(iopol, proc_suppressed_disk_tier);
 974         }
 975
 976         if (next.tep_tal_engaged) {
 977                 iopol = MAX(iopol, proc_tal_disk_tier);
 978         }
 979
 980         if (next.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
 981                 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.tep_qos_clamp]);
 982         }
 983
 984         iopol = MAX(iopol, requested.trp_int_iotier);
 985         iopol = MAX(iopol, requested.trp_ext_iotier);
 986
 987         next.tep_io_tier = iopol;
 988
 989         /* Calculate Passive IO policy */
 990
 991         if (requested.trp_ext_iopassive || requested.trp_int_iopassive) {
 992                 next.tep_io_passive = 1;
 993         }
 994
 995         /* Calculate suppression-active flag */
 996         boolean_t appnap_transition = FALSE;
 997
 998         if (requested.trp_sup_active && requested.trp_boosted == 0) {
 999                 next.tep_sup_active = 1;
1000         }
1001
1002         if (task->effective_policy.tep_sup_active != next.tep_sup_active) {
1003                 appnap_transition = TRUE;
1004         }
1005
1006         /* Calculate timer QOS */
1007         int latency_qos = requested.trp_base_latency_qos;
1008
1009         if (requested.trp_sup_timer && requested.trp_boosted == 0) {
1010                 latency_qos = requested.trp_sup_timer;
1011         }
1012
1013         if (next.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1014                 latency_qos = MAX(latency_qos, (int)thread_qos_policy_params.qos_latency_qos[next.tep_qos_clamp]);
1015         }
1016
1017         if (requested.trp_over_latency_qos != 0) {
1018                 latency_qos = requested.trp_over_latency_qos;
1019         }
1020
1021         /* Treat the windowserver special */
1022         if (requested.trp_role == TASK_GRAPHICS_SERVER) {
1023                 latency_qos = proc_graphics_timer_qos;
1024         }
1025
1026         next.tep_latency_qos = latency_qos;
1027
1028         /* Calculate throughput QOS */
1029         int through_qos = requested.trp_base_through_qos;
1030
1031         if (requested.trp_sup_throughput && requested.trp_boosted == 0) {
1032                 through_qos = requested.trp_sup_throughput;
1033         }
1034
1035         if (next.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1036                 through_qos = MAX(through_qos, (int)thread_qos_policy_params.qos_through_qos[next.tep_qos_clamp]);
1037         }
1038
1039         if (requested.trp_over_through_qos != 0) {
1040                 through_qos = requested.trp_over_through_qos;
1041         }
1042
1043         next.tep_through_qos = through_qos;
1044
1045         /* Calculate suppressed CPU priority */
1046         if (requested.trp_sup_cpu && requested.trp_boosted == 0) {
1047                 next.tep_suppressed_cpu = 1;
1048         }
1049
1050         /*
1051          * Calculate background sockets
1052          * Don't take into account boosting to limit transition frequency.
1053          */
1054         if (requested.trp_sup_bg_sockets) {
1055                 next.tep_all_sockets_bg = 1;
1056                 next.tep_new_sockets_bg = 1;
1057         }
1058
1059         /* Apply SFI Managed class bit */
1060         next.tep_sfi_managed = requested.trp_sfi_managed;
1061
1062         /* Calculate 'live donor' status for live importance */
1063         switch (requested.trp_apptype) {
1064         case TASK_APPTYPE_APP_TAL:
1065         case TASK_APPTYPE_APP_DEFAULT:
1066                 if (requested.trp_ext_darwinbg == 1 ||
1067                     (next.tep_sup_active == 1 &&
1068                     (task_policy_suppression_flags & TASK_POLICY_SUPPRESSION_NONDONOR)) ||
1069                     next.tep_role == TASK_DARWINBG_APPLICATION) {
1070                         next.tep_live_donor = 0;
1071                 } else {
1072                         next.tep_live_donor = 1;
1073                 }
1074                 break;
1075
1076         case TASK_APPTYPE_DAEMON_INTERACTIVE:
1077         case TASK_APPTYPE_DAEMON_STANDARD:
1078         case TASK_APPTYPE_DAEMON_ADAPTIVE:
1079         case TASK_APPTYPE_DAEMON_BACKGROUND:
1080         case TASK_APPTYPE_DRIVER:
1081         default:
1082                 next.tep_live_donor = 0;
1083                 break;
1084         }
1085
1086         if (requested.trp_terminated) {
1087                 /*
1088                  * Shoot down the throttles that slow down exit or response to SIGTERM
1089                  * We don't need to shoot down:
1090                  * passive        (don't want to cause others to throttle)
1091                  * all_sockets_bg (don't need to iterate FDs on every exit)
1092                  * new_sockets_bg (doesn't matter for exiting process)
1093                  * pidsuspend     (jetsam-ed BG process shouldn't run again)
1094                  * watchers_bg    (watcher threads don't need to be unthrottled)
1095                  * latency_qos    (affects userspace timers only)
1096                  */
1097
1098                 next.tep_terminated     = 1;
1099                 next.tep_darwinbg       = 0;
1100                 next.tep_lowpri_cpu     = 0;
1101                 next.tep_io_tier        = THROTTLE_LEVEL_TIER0;
1102                 next.tep_tal_engaged    = 0;
1103                 next.tep_role           = TASK_UNSPECIFIED;
1104                 next.tep_suppressed_cpu = 0;
1105         }
1106
1107         /*
1108          * Step 3:
1109          *  Swap out old policy for new policy
1110          */
1111
1112         struct task_effective_policy prev = task->effective_policy;
1113
1114         /* This is the point where the new values become visible to other threads */
1115         task->effective_policy = next;
1116
1117         /* Don't do anything further to a half-formed task */
1118         if (in_create) {
1119                 return;
1120         }
1121
1122         if (task == kernel_task) {
1123                 panic("Attempting to set task policy on kernel_task");
1124         }
1125
1126         /*
1127          * Step 4:
1128          *  Pend updates that can't be done while holding the task lock
1129          */
1130
1131         if (prev.tep_all_sockets_bg != next.tep_all_sockets_bg) {
1132                 pend_token->tpt_update_sockets = 1;
1133         }
1134
1135         /* Only re-scan the timer list if the qos level is getting less strong */
1136         if (prev.tep_latency_qos > next.tep_latency_qos) {
1137                 pend_token->tpt_update_timers = 1;
1138         }
1139
1140 #if CONFIG_EMBEDDED
1141         if (prev.tep_watchers_bg != next.tep_watchers_bg) {
1142                 pend_token->tpt_update_watchers = 1;
1143         }
1144 #endif /* CONFIG_EMBEDDED */
1145
1146         if (prev.tep_live_donor != next.tep_live_donor) {
1147                 pend_token->tpt_update_live_donor = 1;
1148         }
1149
1150         /*
1151          * Step 5:
1152          *  Update other subsystems as necessary if something has changed
1153          */
1154
1155         boolean_t update_threads = FALSE, update_sfi = FALSE;
1156
1157         /*
1158          * Check for the attributes that thread_policy_update_internal_locked() consults,
1159          *  and trigger thread policy re-evaluation.
1160          */
1161         if (prev.tep_io_tier != next.tep_io_tier ||
1162             prev.tep_bg_iotier != next.tep_bg_iotier ||
1163             prev.tep_io_passive != next.tep_io_passive ||
1164             prev.tep_darwinbg != next.tep_darwinbg ||
1165             prev.tep_qos_clamp != next.tep_qos_clamp ||
1166             prev.tep_qos_ceiling != next.tep_qos_ceiling ||
1167             prev.tep_qos_ui_is_urgent != next.tep_qos_ui_is_urgent ||
1168             prev.tep_latency_qos != next.tep_latency_qos ||
1169             prev.tep_through_qos != next.tep_through_qos ||
1170             prev.tep_lowpri_cpu != next.tep_lowpri_cpu ||
1171             prev.tep_new_sockets_bg != next.tep_new_sockets_bg ||
1172             prev.tep_terminated != next.tep_terminated) {
1173                 update_threads = TRUE;
1174         }
1175
1176         /*
1177          * Check for the attributes that sfi_thread_classify() consults,
1178          *  and trigger SFI re-evaluation.
1179          */
1180         if (prev.tep_latency_qos != next.tep_latency_qos ||
1181             prev.tep_role != next.tep_role ||
1182             prev.tep_sfi_managed != next.tep_sfi_managed) {
1183                 update_sfi = TRUE;
1184         }
1185
1186         /* Reflect task role transitions into the coalition role counters */
1187         if (prev.tep_role != next.tep_role) {
1188                 if (task_policy_update_coalition_focal_tasks(task, prev.tep_role, next.tep_role, pend_token)) {
1189                         update_sfi = TRUE;
1190                 }
1191         }
1192
1193         boolean_t update_priority = FALSE;
1194
1195         int priority     = BASEPRI_DEFAULT;
1196         int max_priority = MAXPRI_USER;
1197
1198         if (next.tep_lowpri_cpu) {
1199                 priority = MAXPRI_THROTTLE;
1200                 max_priority = MAXPRI_THROTTLE;
1201         } else if (next.tep_suppressed_cpu) {
1202                 priority = MAXPRI_SUPPRESSED;
1203                 max_priority = MAXPRI_SUPPRESSED;
1204         } else {
1205                 switch (next.tep_role) {
1206                 case TASK_CONTROL_APPLICATION:
1207                         priority = BASEPRI_CONTROL;
1208                         break;
1209                 case TASK_GRAPHICS_SERVER:
1210                         priority = BASEPRI_GRAPHICS;
1211                         max_priority = MAXPRI_RESERVED;
1212                         break;
1213                 default:
1214                         break;
1215                 }
1216
1217                 /* factor in 'nice' value */
1218                 priority += task->importance;
1219
1220                 if (task->effective_policy.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1221                         int qos_clamp_priority = thread_qos_policy_params.qos_pri[task->effective_policy.tep_qos_clamp];
1222
1223                         priority        = MIN(priority, qos_clamp_priority);
1224                         max_priority    = MIN(max_priority, qos_clamp_priority);
1225                 }
1226
1227                 if (priority > max_priority) {
1228                         priority = max_priority;
1229                 } else if (priority < MINPRI) {
1230                         priority = MINPRI;
1231                 }
1232         }
1233
1234         assert(priority <= max_priority);
1235
1236         /* avoid extra work if priority isn't changing */
1237         if (priority != task->priority ||
1238             max_priority != task->max_priority) {
1239                 /* update the scheduling priority for the task */
1240                 task->max_priority  = max_priority;
1241                 task->priority      = priority;
1242                 update_priority     = TRUE;
1243         }
1244
1245         /* Loop over the threads in the task:
1246          * only once
1247          * only if necessary
1248          * with one thread mutex hold per thread
1249          */
1250         if (update_threads || update_priority || update_sfi) {
1251                 thread_t thread;
1252
1253                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1254                         struct task_pend_token thread_pend_token = {};
1255
1256                         if (update_sfi) {
1257                                 thread_pend_token.tpt_update_thread_sfi = 1;
1258                         }
1259
1260                         if (update_priority || update_threads) {
1261                                 thread_policy_update_tasklocked(thread,
1262                                     task->priority, task->max_priority,
1263                                     &thread_pend_token);
1264                         }
1265
1266                         assert(!thread_pend_token.tpt_update_sockets);
1267
1268                         // Slightly risky, as we still hold the task lock...
1269                         thread_policy_update_complete_unlocked(thread, &thread_pend_token);
1270                 }
1271         }
1272
1273         /*
1274          * Use the app-nap transitions to influence the
1275          * transition of the process within the jetsam band
1276          * [and optionally its live-donor status]
1277          * On macOS only.
1278          */
1279         if (appnap_transition == TRUE) {
1280                 if (task->effective_policy.tep_sup_active == 1) {
1281                         memorystatus_update_priority_for_appnap(((proc_t) task->bsd_info), TRUE);
1282                 } else {
1283                         memorystatus_update_priority_for_appnap(((proc_t) task->bsd_info), FALSE);
1284                 }
1285         }
1286 }
1287
1288
1289 /*
1290  * Yet another layering violation. We reach out and bang on the coalition directly.
1291  */
1292 static boolean_t
1293 task_policy_update_coalition_focal_tasks(task_t            task,
1294     int               prev_role,
1295     int               next_role,
1296     task_pend_token_t pend_token)
1297 {
1298         boolean_t sfi_transition = FALSE;
1299         uint32_t new_count = 0;
1300
1301         /* task moving into/out-of the foreground */
1302         if (prev_role != TASK_FOREGROUND_APPLICATION && next_role == TASK_FOREGROUND_APPLICATION) {
1303                 if (task_coalition_adjust_focal_count(task, 1, &new_count) && (new_count == 1)) {
1304                         sfi_transition = TRUE;
1305                         pend_token->tpt_update_tg_ui_flag = TRUE;
1306                 }
1307         } else if (prev_role == TASK_FOREGROUND_APPLICATION && next_role != TASK_FOREGROUND_APPLICATION) {
1308                 if (task_coalition_adjust_focal_count(task, -1, &new_count) && (new_count == 0)) {
1309                         sfi_transition = TRUE;
1310                         pend_token->tpt_update_tg_ui_flag = TRUE;
1311                 }
1312         }
1313
1314         /* task moving into/out-of background */
1315         if (prev_role != TASK_BACKGROUND_APPLICATION && next_role == TASK_BACKGROUND_APPLICATION) {
1316                 if (task_coalition_adjust_nonfocal_count(task, 1, &new_count) && (new_count == 1)) {
1317                         sfi_transition = TRUE;
1318                 }
1319         } else if (prev_role == TASK_BACKGROUND_APPLICATION && next_role != TASK_BACKGROUND_APPLICATION) {
1320                 if (task_coalition_adjust_nonfocal_count(task, -1, &new_count) && (new_count == 0)) {
1321                         sfi_transition = TRUE;
1322                 }
1323         }
1324
1325         if (sfi_transition) {
1326                 pend_token->tpt_update_coal_sfi = 1;
1327         }
1328         return sfi_transition;
1329 }
1330
1331 #if CONFIG_SCHED_SFI
1332
1333 /* coalition object is locked */
1334 static void
1335 task_sfi_reevaluate_cb(coalition_t coal, void *ctx, task_t task)
1336 {
1337         thread_t thread;
1338
1339         /* unused for now */
1340         (void)coal;
1341
1342         /* skip the task we're re-evaluating on behalf of: it's already updated */
1343         if (task == (task_t)ctx) {
1344                 return;
1345         }
1346
1347         task_lock(task);
1348
1349         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1350                 sfi_reevaluate(thread);
1351         }
1352
1353         task_unlock(task);
1354 }
1355 #endif /* CONFIG_SCHED_SFI */
1356
1357 /*
1358  * Called with task unlocked to do things that can't be done while holding the task lock
1359  */
1360 void
1361 task_policy_update_complete_unlocked(task_t task, task_pend_token_t pend_token)
1362 {
1363 #ifdef MACH_BSD
1364         if (pend_token->tpt_update_sockets) {
1365                 proc_apply_task_networkbg(task->bsd_info, THREAD_NULL);
1366         }
1367 #endif /* MACH_BSD */
1368
1369         /* The timer throttle has been removed or reduced, we need to look for expired timers and fire them */
1370         if (pend_token->tpt_update_timers) {
1371                 ml_timer_evaluate();
1372         }
1373
1374 #if CONFIG_EMBEDDED
1375         if (pend_token->tpt_update_watchers) {
1376                 apply_appstate_watchers(task);
1377         }
1378 #endif /* CONFIG_EMBEDDED */
1379
1380         if (pend_token->tpt_update_live_donor) {
1381                 task_importance_update_live_donor(task);
1382         }
1383
1384 #if CONFIG_SCHED_SFI
1385         /* use the resource coalition for SFI re-evaluation */
1386         if (pend_token->tpt_update_coal_sfi) {
1387                 coalition_for_each_task(task->coalition[COALITION_TYPE_RESOURCE],
1388                     (void *)task, task_sfi_reevaluate_cb);
1389         }
1390 #endif /* CONFIG_SCHED_SFI */
1391
1392 }
1393
1394 /*
1395  * Initiate a task policy state transition
1396  *
1397  * Everything that modifies requested except functions that need to hold the task lock
1398  * should use this function
1399  *
1400  * Argument validation should be performed before reaching this point.
1401  *
1402  * TODO: Do we need to check task->active?
1403  */
1404 void
1405 proc_set_task_policy(task_t     task,
1406     int        category,
1407     int        flavor,
1408     int        value)
1409 {
1410         struct task_pend_token pend_token = {};
1411
1412         task_lock(task);
1413
1414         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1415             (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_TASK))) | DBG_FUNC_START,
1416             task_pid(task), trequested_0(task),
1417             trequested_1(task), value, 0);
1418
1419         proc_set_task_policy_locked(task, category, flavor, value, 0);
1420
1421         task_policy_update_locked(task, &pend_token);
1422
1423
1424         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1425             (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_TASK))) | DBG_FUNC_END,
1426             task_pid(task), trequested_0(task),
1427             trequested_1(task), tpending(&pend_token), 0);
1428
1429         task_unlock(task);
1430
1431         task_policy_update_complete_unlocked(task, &pend_token);
1432 }
1433
1434 /*
1435  * Variant of proc_set_task_policy() that sets two scalars in the requested policy structure.
1436  * Same locking rules apply.
1437  */
1438 void
1439 proc_set_task_policy2(task_t    task,
1440     int       category,
1441     int       flavor,
1442     int       value,
1443     int       value2)
1444 {
1445         struct task_pend_token pend_token = {};
1446
1447         task_lock(task);
1448
1449         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1450             (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_TASK))) | DBG_FUNC_START,
1451             task_pid(task), trequested_0(task),
1452             trequested_1(task), value, 0);
1453
1454         proc_set_task_policy_locked(task, category, flavor, value, value2);
1455
1456         task_policy_update_locked(task, &pend_token);
1457
1458         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1459             (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_TASK))) | DBG_FUNC_END,
1460             task_pid(task), trequested_0(task),
1461             trequested_1(task), tpending(&pend_token), 0);
1462
1463         task_unlock(task);
1464
1465         task_policy_update_complete_unlocked(task, &pend_token);
1466 }
1467
1468 /*
1469  * Set the requested state for a specific flavor to a specific value.
1470  *
1471  *  TODO:
1472  *  Verify that arguments to non iopol things are 1 or 0
1473  */
1474 static void
1475 proc_set_task_policy_locked(task_t      task,
1476     int         category,
1477     int         flavor,
1478     int         value,
1479     int         value2)
1480 {
1481         int tier, passive;
1482
1483         struct task_requested_policy requested = task->requested_policy;
1484
1485         switch (flavor) {
1486         /* Category: EXTERNAL and INTERNAL */
1487
1488         case TASK_POLICY_DARWIN_BG:
1489                 if (category == TASK_POLICY_EXTERNAL) {
1490                         requested.trp_ext_darwinbg = value;
1491                 } else {
1492                         requested.trp_int_darwinbg = value;
1493                 }
1494                 break;
1495
1496         case TASK_POLICY_IOPOL:
1497                 proc_iopol_to_tier(value, &tier, &passive);
1498                 if (category == TASK_POLICY_EXTERNAL) {
1499                         requested.trp_ext_iotier  = tier;
1500                         requested.trp_ext_iopassive = passive;
1501                 } else {
1502                         requested.trp_int_iotier  = tier;
1503                         requested.trp_int_iopassive = passive;
1504                 }
1505                 break;
1506
1507         case TASK_POLICY_IO:
1508                 if (category == TASK_POLICY_EXTERNAL) {
1509                         requested.trp_ext_iotier = value;
1510                 } else {
1511                         requested.trp_int_iotier = value;
1512                 }
1513                 break;
1514
1515         case TASK_POLICY_PASSIVE_IO:
1516                 if (category == TASK_POLICY_EXTERNAL) {
1517                         requested.trp_ext_iopassive = value;
1518                 } else {
1519                         requested.trp_int_iopassive = value;
1520                 }
1521                 break;
1522
1523         /* Category: INTERNAL */
1524
1525         case TASK_POLICY_DARWIN_BG_IOPOL:
1526                 assert(category == TASK_POLICY_INTERNAL);
1527                 proc_iopol_to_tier(value, &tier, &passive);
1528                 requested.trp_bg_iotier = tier;
1529                 break;
1530
1531         /* Category: ATTRIBUTE */
1532
1533         case TASK_POLICY_TAL:
1534                 assert(category == TASK_POLICY_ATTRIBUTE);
1535                 requested.trp_tal_enabled = value;
1536                 break;
1537
1538         case TASK_POLICY_BOOST:
1539                 assert(category == TASK_POLICY_ATTRIBUTE);
1540                 requested.trp_boosted = value;
1541                 break;
1542
1543         case TASK_POLICY_ROLE:
1544                 assert(category == TASK_POLICY_ATTRIBUTE);
1545                 requested.trp_role = value;
1546                 break;
1547
1548         case TASK_POLICY_TERMINATED:
1549                 assert(category == TASK_POLICY_ATTRIBUTE);
1550                 requested.trp_terminated = value;
1551                 break;
1552
1553         case TASK_BASE_LATENCY_QOS_POLICY:
1554                 assert(category == TASK_POLICY_ATTRIBUTE);
1555                 requested.trp_base_latency_qos = value;
1556                 break;
1557
1558         case TASK_BASE_THROUGHPUT_QOS_POLICY:
1559                 assert(category == TASK_POLICY_ATTRIBUTE);
1560                 requested.trp_base_through_qos = value;
1561                 break;
1562
1563         case TASK_POLICY_SFI_MANAGED:
1564                 assert(category == TASK_POLICY_ATTRIBUTE);
1565                 requested.trp_sfi_managed = value;
1566                 break;
1567
1568         case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS:
1569                 assert(category == TASK_POLICY_ATTRIBUTE);
1570                 requested.trp_base_latency_qos = value;
1571                 requested.trp_base_through_qos = value2;
1572                 break;
1573
1574         case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS:
1575                 assert(category == TASK_POLICY_ATTRIBUTE);
1576                 requested.trp_over_latency_qos = value;
1577                 requested.trp_over_through_qos = value2;
1578                 break;
1579
1580         default:
1581                 panic("unknown task policy: %d %d %d %d", category, flavor, value, value2);
1582                 break;
1583         }
1584
1585         task->requested_policy = requested;
1586 }
1587
1588 /*
1589  * Gets what you set. Effective values may be different.
1590  */
1591 int
1592 proc_get_task_policy(task_t     task,
1593     int        category,
1594     int        flavor)
1595 {
1596         int value = 0;
1597
1598         task_lock(task);
1599
1600         struct task_requested_policy requested = task->requested_policy;
1601
1602         switch (flavor) {
1603         case TASK_POLICY_DARWIN_BG:
1604                 if (category == TASK_POLICY_EXTERNAL) {
1605                         value = requested.trp_ext_darwinbg;
1606                 } else {
1607                         value = requested.trp_int_darwinbg;
1608                 }
1609                 break;
1610         case TASK_POLICY_IOPOL:
1611                 if (category == TASK_POLICY_EXTERNAL) {
1612                         value = proc_tier_to_iopol(requested.trp_ext_iotier,
1613                             requested.trp_ext_iopassive);
1614                 } else {
1615                         value = proc_tier_to_iopol(requested.trp_int_iotier,
1616                             requested.trp_int_iopassive);
1617                 }
1618                 break;
1619         case TASK_POLICY_IO:
1620                 if (category == TASK_POLICY_EXTERNAL) {
1621                         value = requested.trp_ext_iotier;
1622                 } else {
1623                         value = requested.trp_int_iotier;
1624                 }
1625                 break;
1626         case TASK_POLICY_PASSIVE_IO:
1627                 if (category == TASK_POLICY_EXTERNAL) {
1628                         value = requested.trp_ext_iopassive;
1629                 } else {
1630                         value = requested.trp_int_iopassive;
1631                 }
1632                 break;
1633         case TASK_POLICY_DARWIN_BG_IOPOL:
1634                 assert(category == TASK_POLICY_ATTRIBUTE);
1635                 value = proc_tier_to_iopol(requested.trp_bg_iotier, 0);
1636                 break;
1637         case TASK_POLICY_ROLE:
1638                 assert(category == TASK_POLICY_ATTRIBUTE);
1639                 value = requested.trp_role;
1640                 break;
1641         case TASK_POLICY_SFI_MANAGED:
1642                 assert(category == TASK_POLICY_ATTRIBUTE);
1643                 value = requested.trp_sfi_managed;
1644                 break;
1645         default:
1646                 panic("unknown policy_flavor %d", flavor);
1647                 break;
1648         }
1649
1650         task_unlock(task);
1651
1652         return value;
1653 }
1654
1655 /*
1656  * Variant of proc_get_task_policy() that returns two scalar outputs.
1657  */
1658 void
1659 proc_get_task_policy2(task_t task,
1660     __assert_only int category,
1661     int flavor,
1662     int *value1,
1663     int *value2)
1664 {
1665         task_lock(task);
1666
1667         struct task_requested_policy requested = task->requested_policy;
1668
1669         switch (flavor) {
1670         case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS:
1671                 assert(category == TASK_POLICY_ATTRIBUTE);
1672                 *value1 = requested.trp_base_latency_qos;
1673                 *value2 = requested.trp_base_through_qos;
1674                 break;
1675
1676         case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS:
1677                 assert(category == TASK_POLICY_ATTRIBUTE);
1678                 *value1 = requested.trp_over_latency_qos;
1679                 *value2 = requested.trp_over_through_qos;
1680                 break;
1681
1682         default:
1683                 panic("unknown policy_flavor %d", flavor);
1684                 break;
1685         }
1686
1687         task_unlock(task);
1688 }
1689
1690 /*
1691  * Function for querying effective state for relevant subsystems
1692  * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
1693  *
1694  * ONLY the relevant subsystem should query this.
1695  * NEVER take a value from the 'effective' function and stuff it into a setter.
1696  *
1697  * NOTE: This accessor does not take the task lock.
1698  * Notifications of state updates need to be externally synchronized with state queries.
1699  * This routine *MUST* remain interrupt safe, as it is potentially invoked
1700  * within the context of a timer interrupt.  It is also called in KDP context for stackshot.
1701  */
1702 int
1703 proc_get_effective_task_policy(task_t   task,
1704     int      flavor)
1705 {
1706         int value = 0;
1707
1708         switch (flavor) {
1709         case TASK_POLICY_DARWIN_BG:
1710                 /*
1711                  * This backs the KPI call proc_pidbackgrounded to find
1712                  * out if a pid is backgrounded.
1713                  * It is used to communicate state to the VM system, as well as
1714                  * prioritizing requests to the graphics system.
1715                  * Returns 1 for background mode, 0 for normal mode
1716                  */
1717                 value = task->effective_policy.tep_darwinbg;
1718                 break;
1719         case TASK_POLICY_ALL_SOCKETS_BG:
1720                 /*
1721                  * do_background_socket() calls this to determine what it should do to the proc's sockets
1722                  * Returns 1 for background mode, 0 for normal mode
1723                  *
1724                  * This consults both thread and task so un-DBGing a thread while the task is BG
1725                  * doesn't get you out of the network throttle.
1726                  */
1727                 value = task->effective_policy.tep_all_sockets_bg;
1728                 break;
1729         case TASK_POLICY_SUP_ACTIVE:
1730                 /*
1731                  * Is the task in AppNap? This is used to determine the urgency
1732                  * that's passed to the performance management subsystem for threads
1733                  * that are running at a priority <= MAXPRI_THROTTLE.
1734                  */
1735                 value = task->effective_policy.tep_sup_active;
1736                 break;
1737         case TASK_POLICY_LATENCY_QOS:
1738                 /*
1739                  * timer arming calls into here to find out the timer coalescing level
1740                  * Returns a QoS tier (0-6)
1741                  */
1742                 value = task->effective_policy.tep_latency_qos;
1743                 break;
1744         case TASK_POLICY_THROUGH_QOS:
1745                 /*
1746                  * This value is passed into the urgency callout from the scheduler
1747                  * to the performance management subsystem.
1748                  * Returns a QoS tier (0-6)
1749                  */
1750                 value = task->effective_policy.tep_through_qos;
1751                 break;
1752         case TASK_POLICY_ROLE:
1753                 /*
1754                  * This controls various things that ask whether a process is foreground,
1755                  * like SFI, VM, access to GPU, etc
1756                  */
1757                 value = task->effective_policy.tep_role;
1758                 break;
1759         case TASK_POLICY_WATCHERS_BG:
1760                 /*
1761                  * This controls whether or not a thread watching this process should be BG.
1762                  */
1763                 value = task->effective_policy.tep_watchers_bg;
1764                 break;
1765         case TASK_POLICY_SFI_MANAGED:
1766                 /*
1767                  * This controls whether or not a process is targeted for specific control by thermald.
1768                  */
1769                 value = task->effective_policy.tep_sfi_managed;
1770                 break;
1771         default:
1772                 panic("unknown policy_flavor %d", flavor);
1773                 break;
1774         }
1775
1776         return value;
1777 }
1778
1779 /*
1780  * Convert from IOPOL_* values to throttle tiers.
1781  *
1782  * TODO: Can this be made more compact, like an array lookup
1783  * Note that it is possible to support e.g. IOPOL_PASSIVE_STANDARD in the future
1784  */
1785
1786 void
1787 proc_iopol_to_tier(int iopolicy, int *tier, int *passive)
1788 {
1789         *passive = 0;
1790         *tier = 0;
1791         switch (iopolicy) {
1792         case IOPOL_IMPORTANT:
1793                 *tier = THROTTLE_LEVEL_TIER0;
1794                 break;
1795         case IOPOL_PASSIVE:
1796                 *tier = THROTTLE_LEVEL_TIER0;
1797                 *passive = 1;
1798                 break;
1799         case IOPOL_STANDARD:
1800                 *tier = THROTTLE_LEVEL_TIER1;
1801                 break;
1802         case IOPOL_UTILITY:
1803                 *tier = THROTTLE_LEVEL_TIER2;
1804                 break;
1805         case IOPOL_THROTTLE:
1806                 *tier = THROTTLE_LEVEL_TIER3;
1807                 break;
1808         default:
1809                 panic("unknown I/O policy %d", iopolicy);
1810                 break;
1811         }
1812 }
1813
1814 int
1815 proc_tier_to_iopol(int tier, int passive)
1816 {
1817         if (passive == 1) {
1818                 switch (tier) {
1819                 case THROTTLE_LEVEL_TIER0:
1820                         return IOPOL_PASSIVE;
1821                 default:
1822                         panic("unknown passive tier %d", tier);
1823                         return IOPOL_DEFAULT;
1824                 }
1825         } else {
1826                 switch (tier) {
1827                 case THROTTLE_LEVEL_NONE:
1828                 case THROTTLE_LEVEL_TIER0:
1829                         return IOPOL_DEFAULT;
1830                 case THROTTLE_LEVEL_TIER1:
1831                         return IOPOL_STANDARD;
1832                 case THROTTLE_LEVEL_TIER2:
1833                         return IOPOL_UTILITY;
1834                 case THROTTLE_LEVEL_TIER3:
1835                         return IOPOL_THROTTLE;
1836                 default:
1837                         panic("unknown tier %d", tier);
1838                         return IOPOL_DEFAULT;
1839                 }
1840         }
1841 }
1842
1843 int
1844 proc_darwin_role_to_task_role(int darwin_role, int* task_role)
1845 {
1846         integer_t role = TASK_UNSPECIFIED;
1847
1848         switch (darwin_role) {
1849         case PRIO_DARWIN_ROLE_DEFAULT:
1850                 role = TASK_UNSPECIFIED;
1851                 break;
1852         case PRIO_DARWIN_ROLE_UI_FOCAL:
1853                 role = TASK_FOREGROUND_APPLICATION;
1854                 break;
1855         case PRIO_DARWIN_ROLE_UI:
1856                 role = TASK_DEFAULT_APPLICATION;
1857                 break;
1858         case PRIO_DARWIN_ROLE_NON_UI:
1859                 role = TASK_NONUI_APPLICATION;
1860                 break;
1861         case PRIO_DARWIN_ROLE_UI_NON_FOCAL:
1862                 role = TASK_BACKGROUND_APPLICATION;
1863                 break;
1864         case PRIO_DARWIN_ROLE_TAL_LAUNCH:
1865                 role = TASK_THROTTLE_APPLICATION;
1866                 break;
1867         case PRIO_DARWIN_ROLE_DARWIN_BG:
1868                 role = TASK_DARWINBG_APPLICATION;
1869                 break;
1870         default:
1871                 return EINVAL;
1872         }
1873
1874         *task_role = role;
1875
1876         return 0;
1877 }
1878
1879 int
1880 proc_task_role_to_darwin_role(int task_role)
1881 {
1882         switch (task_role) {
1883         case TASK_FOREGROUND_APPLICATION:
1884                 return PRIO_DARWIN_ROLE_UI_FOCAL;
1885         case TASK_BACKGROUND_APPLICATION:
1886                 return PRIO_DARWIN_ROLE_UI_NON_FOCAL;
1887         case TASK_NONUI_APPLICATION:
1888                 return PRIO_DARWIN_ROLE_NON_UI;
1889         case TASK_DEFAULT_APPLICATION:
1890                 return PRIO_DARWIN_ROLE_UI;
1891         case TASK_THROTTLE_APPLICATION:
1892                 return PRIO_DARWIN_ROLE_TAL_LAUNCH;
1893         case TASK_DARWINBG_APPLICATION:
1894                 return PRIO_DARWIN_ROLE_DARWIN_BG;
1895         case TASK_UNSPECIFIED:
1896         default:
1897                 return PRIO_DARWIN_ROLE_DEFAULT;
1898         }
1899 }
1900
1901
1902 /* TODO: remove this variable when interactive daemon audit period is over */
1903 extern boolean_t ipc_importance_interactive_receiver;
1904
1905 /*
1906  * Called at process exec to initialize the apptype, qos clamp, and qos seed of a process
1907  *
1908  * TODO: Make this function more table-driven instead of ad-hoc
1909  */
1910 void
1911 proc_set_task_spawnpolicy(task_t task, thread_t thread, int apptype, int qos_clamp, int role,
1912     ipc_port_t * portwatch_ports, uint32_t portwatch_count)
1913 {
1914         struct task_pend_token pend_token = {};
1915
1916         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1917             (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_START,
1918             task_pid(task), trequested_0(task), trequested_1(task),
1919             apptype, 0);
1920
1921         switch (apptype) {
1922         case TASK_APPTYPE_APP_TAL:
1923         case TASK_APPTYPE_APP_DEFAULT:
1924                 /* Apps become donors via the 'live-donor' flag instead of the static donor flag */
1925                 task_importance_mark_donor(task, FALSE);
1926                 task_importance_mark_live_donor(task, TRUE);
1927                 task_importance_mark_receiver(task, FALSE);
1928 #if CONFIG_EMBEDDED
1929                 task_importance_mark_denap_receiver(task, FALSE);
1930 #else
1931                 /* Apps are de-nap recievers on desktop for suppression behaviors */
1932                 task_importance_mark_denap_receiver(task, TRUE);
1933 #endif /* CONFIG_EMBEDDED */
1934                 break;
1935
1936         case TASK_APPTYPE_DAEMON_INTERACTIVE:
1937                 task_importance_mark_donor(task, TRUE);
1938                 task_importance_mark_live_donor(task, FALSE);
1939
1940                 /*
1941                  * A boot arg controls whether interactive daemons are importance receivers.
1942                  * Normally, they are not.  But for testing their behavior as an adaptive
1943                  * daemon, the boot-arg can be set.
1944                  *
1945                  * TODO: remove this when the interactive daemon audit period is over.
1946                  */
1947                 task_importance_mark_receiver(task, /* FALSE */ ipc_importance_interactive_receiver);
1948                 task_importance_mark_denap_receiver(task, FALSE);
1949                 break;
1950
1951         case TASK_APPTYPE_DAEMON_STANDARD:
1952                 task_importance_mark_donor(task, TRUE);
1953                 task_importance_mark_live_donor(task, FALSE);
1954                 task_importance_mark_receiver(task, FALSE);
1955                 task_importance_mark_denap_receiver(task, FALSE);
1956                 break;
1957
1958         case TASK_APPTYPE_DAEMON_ADAPTIVE:
1959                 task_importance_mark_donor(task, FALSE);
1960                 task_importance_mark_live_donor(task, FALSE);
1961                 task_importance_mark_receiver(task, TRUE);
1962                 task_importance_mark_denap_receiver(task, FALSE);
1963                 break;
1964
1965         case TASK_APPTYPE_DAEMON_BACKGROUND:
1966                 task_importance_mark_donor(task, FALSE);
1967                 task_importance_mark_live_donor(task, FALSE);
1968                 task_importance_mark_receiver(task, FALSE);
1969                 task_importance_mark_denap_receiver(task, FALSE);
1970                 break;
1971
1972         case TASK_APPTYPE_DRIVER:
1973                 task_importance_mark_donor(task, FALSE);
1974                 task_importance_mark_live_donor(task, FALSE);
1975                 task_importance_mark_receiver(task, FALSE);
1976                 task_importance_mark_denap_receiver(task, FALSE);
1977                 break;
1978
1979         case TASK_APPTYPE_NONE:
1980                 break;
1981         }
1982
1983         if (portwatch_ports != NULL && apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
1984                 int portwatch_boosts = 0;
1985
1986                 for (uint32_t i = 0; i < portwatch_count; i++) {
1987                         ipc_port_t port = NULL;
1988
1989                         if (IP_VALID(port = portwatch_ports[i])) {
1990                                 int boost = 0;
1991                                 task_add_importance_watchport(task, port, &boost);
1992                                 portwatch_boosts += boost;
1993                         }
1994                 }
1995
1996                 if (portwatch_boosts > 0) {
1997                         task_importance_hold_internal_assertion(task, portwatch_boosts);
1998                 }
1999         }
2000
2001         /* Redirect the turnstile push of watchports to task */
2002         if (portwatch_count && portwatch_ports != NULL) {
2003                 task_add_turnstile_watchports(task, thread, portwatch_ports, portwatch_count);
2004         }
2005
2006         task_lock(task);
2007
2008         if (apptype == TASK_APPTYPE_APP_TAL) {
2009                 /* TAL starts off enabled by default */
2010                 task->requested_policy.trp_tal_enabled = 1;
2011         }
2012
2013         if (apptype != TASK_APPTYPE_NONE) {
2014                 task->requested_policy.trp_apptype = apptype;
2015         }
2016
2017 #if CONFIG_EMBEDDED
2018         /* Remove this after launchd starts setting it properly */
2019         if (apptype == TASK_APPTYPE_APP_DEFAULT && role == TASK_UNSPECIFIED) {
2020                 task->requested_policy.trp_role = TASK_FOREGROUND_APPLICATION;
2021         } else
2022 #endif
2023         if (role != TASK_UNSPECIFIED) {
2024                 task->requested_policy.trp_role = role;
2025         }
2026
2027         if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
2028                 task->requested_policy.trp_qos_clamp = qos_clamp;
2029         }
2030
2031         task_policy_update_locked(task, &pend_token);
2032
2033         task_unlock(task);
2034
2035         /* Ensure the donor bit is updated to be in sync with the new live donor status */
2036         pend_token.tpt_update_live_donor = 1;
2037
2038         task_policy_update_complete_unlocked(task, &pend_token);
2039
2040         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2041             (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_END,
2042             task_pid(task), trequested_0(task), trequested_1(task),
2043             task_is_importance_receiver(task), 0);
2044 }
2045
2046 /*
2047  * Inherit task role across exec
2048  */
2049 void
2050 proc_inherit_task_role(task_t new_task,
2051     task_t old_task)
2052 {
2053         int role;
2054
2055         /* inherit the role from old task to new task */
2056         role = proc_get_task_policy(old_task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE);
2057         proc_set_task_policy(new_task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, role);
2058 }
2059
2060 extern void *initproc;
2061
2062 /*
2063  * Compute the default main thread qos for a task
2064  */
2065 int
2066 task_compute_main_thread_qos(task_t task)
2067 {
2068         int primordial_qos = THREAD_QOS_UNSPECIFIED;
2069
2070         int qos_clamp = task->requested_policy.trp_qos_clamp;
2071
2072         switch (task->requested_policy.trp_apptype) {
2073         case TASK_APPTYPE_APP_TAL:
2074         case TASK_APPTYPE_APP_DEFAULT:
2075                 primordial_qos = THREAD_QOS_USER_INTERACTIVE;
2076                 break;
2077
2078         case TASK_APPTYPE_DAEMON_INTERACTIVE:
2079         case TASK_APPTYPE_DAEMON_STANDARD:
2080         case TASK_APPTYPE_DAEMON_ADAPTIVE:
2081         case TASK_APPTYPE_DRIVER:
2082                 primordial_qos = THREAD_QOS_LEGACY;
2083                 break;
2084
2085         case TASK_APPTYPE_DAEMON_BACKGROUND:
2086                 primordial_qos = THREAD_QOS_BACKGROUND;
2087                 break;
2088         }
2089
2090         if (task->bsd_info == initproc) {
2091                 /* PID 1 gets a special case */
2092                 primordial_qos = MAX(primordial_qos, THREAD_QOS_USER_INITIATED);
2093         }
2094
2095         if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
2096                 if (primordial_qos != THREAD_QOS_UNSPECIFIED) {
2097                         primordial_qos = MIN(qos_clamp, primordial_qos);
2098                 } else {
2099                         primordial_qos = qos_clamp;
2100                 }
2101         }
2102
2103         return primordial_qos;
2104 }
2105
2106
2107 /* for process_policy to check before attempting to set */
2108 boolean_t
2109 proc_task_is_tal(task_t task)
2110 {
2111         return (task->requested_policy.trp_apptype == TASK_APPTYPE_APP_TAL) ? TRUE : FALSE;
2112 }
2113
2114 int
2115 task_get_apptype(task_t task)
2116 {
2117         return task->requested_policy.trp_apptype;
2118 }
2119
2120 boolean_t
2121 task_is_daemon(task_t task)
2122 {
2123         switch (task->requested_policy.trp_apptype) {
2124         case TASK_APPTYPE_DAEMON_INTERACTIVE:
2125         case TASK_APPTYPE_DAEMON_STANDARD:
2126         case TASK_APPTYPE_DAEMON_ADAPTIVE:
2127         case TASK_APPTYPE_DAEMON_BACKGROUND:
2128                 return TRUE;
2129         default:
2130                 return FALSE;
2131         }
2132 }
2133
2134 bool
2135 task_is_driver(task_t task)
2136 {
2137         if (!task) {
2138                 return FALSE;
2139         }
2140         return task->requested_policy.trp_apptype == TASK_APPTYPE_DRIVER;
2141 }
2142
2143 boolean_t
2144 task_is_app(task_t task)
2145 {
2146         switch (task->requested_policy.trp_apptype) {
2147         case TASK_APPTYPE_APP_DEFAULT:
2148         case TASK_APPTYPE_APP_TAL:
2149                 return TRUE;
2150         default:
2151                 return FALSE;
2152         }
2153 }
2154
2155 /* for telemetry */
2156 integer_t
2157 task_grab_latency_qos(task_t task)
2158 {
2159         return qos_latency_policy_package(proc_get_effective_task_policy(task, TASK_POLICY_LATENCY_QOS));
2160 }
2161
2162 /* update the darwin background action state in the flags field for libproc */
2163 int
2164 proc_get_darwinbgstate(task_t task, uint32_t * flagsp)
2165 {
2166         if (task->requested_policy.trp_ext_darwinbg) {
2167                 *flagsp |= PROC_FLAG_EXT_DARWINBG;
2168         }
2169
2170         if (task->requested_policy.trp_int_darwinbg) {
2171                 *flagsp |= PROC_FLAG_DARWINBG;
2172         }
2173
2174 #if CONFIG_EMBEDDED
2175         if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_BACKGROUND) {
2176                 *flagsp |= PROC_FLAG_IOS_APPLEDAEMON;
2177         }
2178
2179         if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
2180                 *flagsp |= PROC_FLAG_IOS_IMPPROMOTION;
2181         }
2182 #endif /* CONFIG_EMBEDDED */
2183
2184         if (task->requested_policy.trp_apptype == TASK_APPTYPE_APP_DEFAULT ||
2185             task->requested_policy.trp_apptype == TASK_APPTYPE_APP_TAL) {
2186                 *flagsp |= PROC_FLAG_APPLICATION;
2187         }
2188
2189         if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
2190                 *flagsp |= PROC_FLAG_ADAPTIVE;
2191         }
2192
2193         if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE &&
2194             task->requested_policy.trp_boosted == 1) {
2195                 *flagsp |= PROC_FLAG_ADAPTIVE_IMPORTANT;
2196         }
2197
2198         if (task_is_importance_donor(task)) {
2199                 *flagsp |= PROC_FLAG_IMPORTANCE_DONOR;
2200         }
2201
2202         if (task->effective_policy.tep_sup_active) {
2203                 *flagsp |= PROC_FLAG_SUPPRESSED;
2204         }
2205
2206         return 0;
2207 }
2208
2209 /*
2210  * Tracepoint data... Reading the tracepoint data can be somewhat complicated.
2211  * The current scheme packs as much data into a single tracepoint as it can.
2212  *
2213  * Each task/thread requested/effective structure is 64 bits in size. Any
2214  * given tracepoint will emit either requested or effective data, but not both.
2215  *
2216  * A tracepoint may emit any of task, thread, or task & thread data.
2217  *
2218  * The type of data emitted varies with pointer size. Where possible, both
2219  * task and thread data are emitted. In LP32 systems, the first and second
2220  * halves of either the task or thread data is emitted.
2221  *
2222  * The code uses uintptr_t array indexes instead of high/low to avoid
2223  * confusion WRT big vs little endian.
2224  *
2225  * The truth table for the tracepoint data functions is below, and has the
2226  * following invariants:
2227  *
2228  * 1) task and thread are uintptr_t*
2229  * 2) task may never be NULL
2230  *
2231  *
2232  *                                     LP32            LP64
2233  * trequested_0(task, NULL)            task[0]         task[0]
2234  * trequested_1(task, NULL)            task[1]         NULL
2235  * trequested_0(task, thread)          thread[0]       task[0]
2236  * trequested_1(task, thread)          thread[1]       thread[0]
2237  *
2238  * Basically, you get a full task or thread on LP32, and both on LP64.
2239  *
2240  * The uintptr_t munging here is squicky enough to deserve a comment.
2241  *
2242  * The variables we are accessing are laid out in memory like this:
2243  *
2244  * [            LP64 uintptr_t  0          ]
2245  * [ LP32 uintptr_t 0 ] [ LP32 uintptr_t 1 ]
2246  *
2247  *      1   2   3   4     5   6   7   8
2248  *
2249  */
2250
2251 static uintptr_t
2252 trequested_0(task_t task)
2253 {
2254         static_assert(sizeof(struct task_requested_policy) == sizeof(uint64_t), "size invariant violated");
2255
2256         uintptr_t* raw = (uintptr_t*)&task->requested_policy;
2257
2258         return raw[0];
2259 }
2260
2261 static uintptr_t
2262 trequested_1(task_t task)
2263 {
2264 #if defined __LP64__
2265         (void)task;
2266         return 0;
2267 #else
2268         uintptr_t* raw = (uintptr_t*)(&task->requested_policy);
2269         return raw[1];
2270 #endif
2271 }
2272
2273 static uintptr_t
2274 teffective_0(task_t task)
2275 {
2276         uintptr_t* raw = (uintptr_t*)&task->effective_policy;
2277
2278         return raw[0];
2279 }
2280
2281 static uintptr_t
2282 teffective_1(task_t task)
2283 {
2284 #if defined __LP64__
2285         (void)task;
2286         return 0;
2287 #else
2288         uintptr_t* raw = (uintptr_t*)(&task->effective_policy);
2289         return raw[1];
2290 #endif
2291 }
2292
2293 /* dump pending for tracepoint */
2294 uint32_t
2295 tpending(task_pend_token_t pend_token)
2296 {
2297         return *(uint32_t*)(void*)(pend_token);
2298 }
2299
2300 uint64_t
2301 task_requested_bitfield(task_t task)
2302 {
2303         uint64_t bits = 0;
2304         struct task_requested_policy requested = task->requested_policy;
2305
2306         bits |= (requested.trp_int_darwinbg     ? POLICY_REQ_INT_DARWIN_BG  : 0);
2307         bits |= (requested.trp_ext_darwinbg     ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2308         bits |= (requested.trp_int_iotier       ? (((uint64_t)requested.trp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2309         bits |= (requested.trp_ext_iotier       ? (((uint64_t)requested.trp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2310         bits |= (requested.trp_int_iopassive    ? POLICY_REQ_INT_PASSIVE_IO : 0);
2311         bits |= (requested.trp_ext_iopassive    ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2312         bits |= (requested.trp_bg_iotier        ? (((uint64_t)requested.trp_bg_iotier) << POLICY_REQ_BG_IOTIER_SHIFT)   : 0);
2313         bits |= (requested.trp_terminated       ? POLICY_REQ_TERMINATED     : 0);
2314
2315         bits |= (requested.trp_boosted          ? POLICY_REQ_BOOSTED        : 0);
2316         bits |= (requested.trp_tal_enabled      ? POLICY_REQ_TAL_ENABLED    : 0);
2317         bits |= (requested.trp_apptype          ? (((uint64_t)requested.trp_apptype) << POLICY_REQ_APPTYPE_SHIFT)  : 0);
2318         bits |= (requested.trp_role             ? (((uint64_t)requested.trp_role) << POLICY_REQ_ROLE_SHIFT)     : 0);
2319
2320         bits |= (requested.trp_sup_active       ? POLICY_REQ_SUP_ACTIVE         : 0);
2321         bits |= (requested.trp_sup_lowpri_cpu   ? POLICY_REQ_SUP_LOWPRI_CPU     : 0);
2322         bits |= (requested.trp_sup_cpu          ? POLICY_REQ_SUP_CPU            : 0);
2323         bits |= (requested.trp_sup_timer        ? (((uint64_t)requested.trp_sup_timer) << POLICY_REQ_SUP_TIMER_THROTTLE_SHIFT) : 0);
2324         bits |= (requested.trp_sup_throughput   ? (((uint64_t)requested.trp_sup_throughput) << POLICY_REQ_SUP_THROUGHPUT_SHIFT)     : 0);
2325         bits |= (requested.trp_sup_disk         ? POLICY_REQ_SUP_DISK_THROTTLE  : 0);
2326         bits |= (requested.trp_sup_bg_sockets   ? POLICY_REQ_SUP_BG_SOCKETS     : 0);
2327
2328         bits |= (requested.trp_base_latency_qos ? (((uint64_t)requested.trp_base_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2329         bits |= (requested.trp_over_latency_qos ? (((uint64_t)requested.trp_over_latency_qos) << POLICY_REQ_OVER_LATENCY_QOS_SHIFT) : 0);
2330         bits |= (requested.trp_base_through_qos ? (((uint64_t)requested.trp_base_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2331         bits |= (requested.trp_over_through_qos ? (((uint64_t)requested.trp_over_through_qos) << POLICY_REQ_OVER_THROUGH_QOS_SHIFT) : 0);
2332         bits |= (requested.trp_sfi_managed      ? POLICY_REQ_SFI_MANAGED        : 0);
2333         bits |= (requested.trp_qos_clamp        ? (((uint64_t)requested.trp_qos_clamp) << POLICY_REQ_QOS_CLAMP_SHIFT)        : 0);
2334
2335         return bits;
2336 }
2337
2338 uint64_t
2339 task_effective_bitfield(task_t task)
2340 {
2341         uint64_t bits = 0;
2342         struct task_effective_policy effective = task->effective_policy;
2343
2344         bits |= (effective.tep_io_tier          ? (((uint64_t)effective.tep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2345         bits |= (effective.tep_io_passive       ? POLICY_EFF_IO_PASSIVE     : 0);
2346         bits |= (effective.tep_darwinbg         ? POLICY_EFF_DARWIN_BG      : 0);
2347         bits |= (effective.tep_lowpri_cpu       ? POLICY_EFF_LOWPRI_CPU     : 0);
2348         bits |= (effective.tep_terminated       ? POLICY_EFF_TERMINATED     : 0);
2349         bits |= (effective.tep_all_sockets_bg   ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2350         bits |= (effective.tep_new_sockets_bg   ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2351         bits |= (effective.tep_bg_iotier        ? (((uint64_t)effective.tep_bg_iotier) << POLICY_EFF_BG_IOTIER_SHIFT) : 0);
2352         bits |= (effective.tep_qos_ui_is_urgent ? POLICY_EFF_QOS_UI_IS_URGENT : 0);
2353
2354         bits |= (effective.tep_tal_engaged      ? POLICY_EFF_TAL_ENGAGED    : 0);
2355         bits |= (effective.tep_watchers_bg      ? POLICY_EFF_WATCHERS_BG    : 0);
2356         bits |= (effective.tep_sup_active       ? POLICY_EFF_SUP_ACTIVE     : 0);
2357         bits |= (effective.tep_suppressed_cpu   ? POLICY_EFF_SUP_CPU        : 0);
2358         bits |= (effective.tep_role             ? (((uint64_t)effective.tep_role) << POLICY_EFF_ROLE_SHIFT)        : 0);
2359         bits |= (effective.tep_latency_qos      ? (((uint64_t)effective.tep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2360         bits |= (effective.tep_through_qos      ? (((uint64_t)effective.tep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2361         bits |= (effective.tep_sfi_managed      ? POLICY_EFF_SFI_MANAGED    : 0);
2362         bits |= (effective.tep_qos_ceiling      ? (((uint64_t)effective.tep_qos_ceiling) << POLICY_EFF_QOS_CEILING_SHIFT) : 0);
2363
2364         return bits;
2365 }
2366
2367
2368 /*
2369  * Resource usage and CPU related routines
2370  */
2371
2372 int
2373 proc_get_task_ruse_cpu(task_t task, uint32_t *policyp, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep)
2374 {
2375         int error = 0;
2376         int scope;
2377
2378         task_lock(task);
2379
2380
2381         error = task_get_cpuusage(task, percentagep, intervalp, deadlinep, &scope);
2382         task_unlock(task);
2383
2384         /*
2385          * Reverse-map from CPU resource limit scopes back to policies (see comment below).
2386          */
2387         if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
2388                 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC;
2389         } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
2390                 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE;
2391         } else if (scope == TASK_RUSECPU_FLAGS_DEADLINE) {
2392                 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
2393         }
2394
2395         return error;
2396 }
2397
2398 /*
2399  * Configure the default CPU usage monitor parameters.
2400  *
2401  * For tasks which have this mechanism activated: if any thread in the
2402  * process consumes more CPU than this, an EXC_RESOURCE exception will be generated.
2403  */
2404 void
2405 proc_init_cpumon_params(void)
2406 {
2407         /*
2408          * The max CPU percentage can be configured via the boot-args and
2409          * a key in the device tree. The boot-args are honored first, then the
2410          * device tree.
2411          */
2412         if (!PE_parse_boot_argn("max_cpumon_percentage", &proc_max_cpumon_percentage,
2413             sizeof(proc_max_cpumon_percentage))) {
2414                 uint64_t max_percentage = 0ULL;
2415
2416                 if (!PE_get_default("kern.max_cpumon_percentage", &max_percentage,
2417                     sizeof(max_percentage))) {
2418                         max_percentage = DEFAULT_CPUMON_PERCENTAGE;
2419                 }
2420
2421                 assert(max_percentage <= UINT8_MAX);
2422                 proc_max_cpumon_percentage = (uint8_t) max_percentage;
2423         }
2424
2425         if (proc_max_cpumon_percentage > 100) {
2426                 proc_max_cpumon_percentage = 100;
2427         }
2428
2429         /*
2430          * The interval should be specified in seconds.
2431          *
2432          * Like the max CPU percentage, the max CPU interval can be configured
2433          * via boot-args and the device tree.
2434          */
2435         if (!PE_parse_boot_argn("max_cpumon_interval", &proc_max_cpumon_interval,
2436             sizeof(proc_max_cpumon_interval))) {
2437                 if (!PE_get_default("kern.max_cpumon_interval", &proc_max_cpumon_interval,
2438                     sizeof(proc_max_cpumon_interval))) {
2439                         proc_max_cpumon_interval = DEFAULT_CPUMON_INTERVAL;
2440                 }
2441         }
2442
2443         proc_max_cpumon_interval *= NSEC_PER_SEC;
2444
2445         /* TEMPORARY boot arg to control App suppression */
2446         PE_parse_boot_argn("task_policy_suppression_flags",
2447             &task_policy_suppression_flags,
2448             sizeof(task_policy_suppression_flags));
2449
2450         /* adjust suppression disk policy if called for in boot arg */
2451         if (task_policy_suppression_flags & TASK_POLICY_SUPPRESSION_IOTIER2) {
2452                 proc_suppressed_disk_tier = THROTTLE_LEVEL_TIER2;
2453         }
2454 }
2455
2456 /*
2457  * Currently supported configurations for CPU limits.
2458  *
2459  * Policy                               | Deadline-based CPU limit | Percentage-based CPU limit
2460  * -------------------------------------+--------------------------+------------------------------
2461  * PROC_POLICY_RSRCACT_THROTTLE         | ENOTSUP                  | Task-wide scope only
2462  * PROC_POLICY_RSRCACT_SUSPEND          | Task-wide scope only     | ENOTSUP
2463  * PROC_POLICY_RSRCACT_TERMINATE        | Task-wide scope only     | ENOTSUP
2464  * PROC_POLICY_RSRCACT_NOTIFY_KQ        | Task-wide scope only     | ENOTSUP
2465  * PROC_POLICY_RSRCACT_NOTIFY_EXC       | ENOTSUP                  | Per-thread scope only
2466  *
2467  * A deadline-based CPU limit is actually a simple wallclock timer - the requested action is performed
2468  * after the specified amount of wallclock time has elapsed.
2469  *
2470  * A percentage-based CPU limit performs the requested action after the specified amount of actual CPU time
2471  * has been consumed -- regardless of how much wallclock time has elapsed -- by either the task as an
2472  * aggregate entity (so-called "Task-wide" or "Proc-wide" scope, whereby the CPU time consumed by all threads
2473  * in the task are added together), or by any one thread in the task (so-called "per-thread" scope).
2474  *
2475  * We support either deadline != 0 OR percentage != 0, but not both. The original intention in having them
2476  * share an API was to use actual CPU time as the basis of the deadline-based limit (as in: perform an action
2477  * after I have used some amount of CPU time; this is different than the recurring percentage/interval model)
2478  * but the potential consumer of the API at the time was insisting on wallclock time instead.
2479  *
2480  * Currently, requesting notification via an exception is the only way to get per-thread scope for a
2481  * CPU limit. All other types of notifications force task-wide scope for the limit.
2482  */
2483 int
2484 proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint8_t percentage, uint64_t interval, uint64_t deadline,
2485     int cpumon_entitled)
2486 {
2487         int error = 0;
2488         int scope;
2489
2490         /*
2491          * Enforce the matrix of supported configurations for policy, percentage, and deadline.
2492          */
2493         switch (policy) {
2494         // If no policy is explicitly given, the default is to throttle.
2495         case TASK_POLICY_RESOURCE_ATTRIBUTE_NONE:
2496         case TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE:
2497                 if (deadline != 0) {
2498                         return ENOTSUP;
2499                 }
2500                 scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
2501                 break;
2502         case TASK_POLICY_RESOURCE_ATTRIBUTE_SUSPEND:
2503         case TASK_POLICY_RESOURCE_ATTRIBUTE_TERMINATE:
2504         case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_KQ:
2505                 if (percentage != 0) {
2506                         return ENOTSUP;
2507                 }
2508                 scope = TASK_RUSECPU_FLAGS_DEADLINE;
2509                 break;
2510         case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC:
2511                 if (deadline != 0) {
2512                         return ENOTSUP;
2513                 }
2514                 scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
2515 #ifdef CONFIG_NOMONITORS
2516                 return error;
2517 #endif /* CONFIG_NOMONITORS */
2518                 break;
2519         default:
2520                 return EINVAL;
2521         }
2522
2523         task_lock(task);
2524         if (task != current_task()) {
2525                 task->policy_ru_cpu_ext = policy;
2526         } else {
2527                 task->policy_ru_cpu = policy;
2528         }
2529         error = task_set_cpuusage(task, percentage, interval, deadline, scope, cpumon_entitled);
2530         task_unlock(task);
2531         return error;
2532 }
2533
2534 /* TODO: get rid of these */
2535 #define TASK_POLICY_CPU_RESOURCE_USAGE          0
2536 #define TASK_POLICY_WIREDMEM_RESOURCE_USAGE     1
2537 #define TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE   2
2538 #define TASK_POLICY_DISK_RESOURCE_USAGE         3
2539 #define TASK_POLICY_NETWORK_RESOURCE_USAGE      4
2540 #define TASK_POLICY_POWER_RESOURCE_USAGE        5
2541
2542 #define TASK_POLICY_RESOURCE_USAGE_COUNT        6
2543
2544 int
2545 proc_clear_task_ruse_cpu(task_t task, int cpumon_entitled)
2546 {
2547         int error = 0;
2548         int action;
2549         void * bsdinfo = NULL;
2550
2551         task_lock(task);
2552         if (task != current_task()) {
2553                 task->policy_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;
2554         } else {
2555                 task->policy_ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;
2556         }
2557
2558         error = task_clear_cpuusage_locked(task, cpumon_entitled);
2559         if (error != 0) {
2560                 goto out;
2561         }
2562
2563         action = task->applied_ru_cpu;
2564         if (task->applied_ru_cpu_ext != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2565                 /* reset action */
2566                 task->applied_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
2567         }
2568         if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2569                 bsdinfo = task->bsd_info;
2570                 task_unlock(task);
2571                 proc_restore_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
2572                 goto out1;
2573         }
2574
2575 out:
2576         task_unlock(task);
2577 out1:
2578         return error;
2579 }
2580
2581 /* used to apply resource limit related actions */
2582 static int
2583 task_apply_resource_actions(task_t task, int type)
2584 {
2585         int action = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
2586         void * bsdinfo = NULL;
2587
2588         switch (type) {
2589         case TASK_POLICY_CPU_RESOURCE_USAGE:
2590                 break;
2591         case TASK_POLICY_WIREDMEM_RESOURCE_USAGE:
2592         case TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE:
2593         case TASK_POLICY_DISK_RESOURCE_USAGE:
2594         case TASK_POLICY_NETWORK_RESOURCE_USAGE:
2595         case TASK_POLICY_POWER_RESOURCE_USAGE:
2596                 return 0;
2597
2598         default:
2599                 return 1;
2600         }
2601         ;
2602
2603         /* only cpu actions for now */
2604         task_lock(task);
2605
2606         if (task->applied_ru_cpu_ext == TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2607                 /* apply action */
2608                 task->applied_ru_cpu_ext = task->policy_ru_cpu_ext;
2609                 action = task->applied_ru_cpu_ext;
2610         } else {
2611                 action = task->applied_ru_cpu_ext;
2612         }
2613
2614         if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2615                 bsdinfo = task->bsd_info;
2616                 task_unlock(task);
2617                 proc_apply_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
2618         } else {
2619                 task_unlock(task);
2620         }
2621
2622         return 0;
2623 }
2624
2625 /*
2626  * XXX This API is somewhat broken; we support multiple simultaneous CPU limits, but the get/set API
2627  * only allows for one at a time. This means that if there is a per-thread limit active, the other
2628  * "scopes" will not be accessible via this API. We could change it to pass in the scope of interest
2629  * to the caller, and prefer that, but there's no need for that at the moment.
2630  */
2631 static int
2632 task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope)
2633 {
2634         *percentagep = 0;
2635         *intervalp = 0;
2636         *deadlinep = 0;
2637
2638         if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) != 0) {
2639                 *scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
2640                 *percentagep = task->rusage_cpu_perthr_percentage;
2641                 *intervalp = task->rusage_cpu_perthr_interval;
2642         } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) != 0) {
2643                 *scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
2644                 *percentagep = task->rusage_cpu_percentage;
2645                 *intervalp = task->rusage_cpu_interval;
2646         } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) != 0) {
2647                 *scope = TASK_RUSECPU_FLAGS_DEADLINE;
2648                 *deadlinep = task->rusage_cpu_deadline;
2649         } else {
2650                 *scope = 0;
2651         }
2652
2653         return 0;
2654 }
2655
2656 /*
2657  * Suspend the CPU usage monitor for the task.  Return value indicates
2658  * if the mechanism was actually enabled.
2659  */
2660 int
2661 task_suspend_cpumon(task_t task)
2662 {
2663         thread_t thread;
2664
2665         task_lock_assert_owned(task);
2666
2667         if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) {
2668                 return KERN_INVALID_ARGUMENT;
2669         }
2670
2671 #if CONFIG_TELEMETRY
2672         /*
2673          * Disable task-wide telemetry if it was ever enabled by the CPU usage
2674          * monitor's warning zone.
2675          */
2676         telemetry_task_ctl_locked(task, TF_CPUMON_WARNING, 0);
2677 #endif
2678
2679         /*
2680          * Suspend monitoring for the task, and propagate that change to each thread.
2681          */
2682         task->rusage_cpu_flags &= ~(TASK_RUSECPU_FLAGS_PERTHR_LIMIT | TASK_RUSECPU_FLAGS_FATAL_CPUMON);
2683         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2684                 act_set_astledger(thread);
2685         }
2686
2687         return KERN_SUCCESS;
2688 }
2689
2690 /*
2691  * Remove all traces of the CPU monitor.
2692  */
2693 int
2694 task_disable_cpumon(task_t task)
2695 {
2696         int kret;
2697
2698         task_lock_assert_owned(task);
2699
2700         kret = task_suspend_cpumon(task);
2701         if (kret) {
2702                 return kret;
2703         }
2704
2705         /* Once we clear these values, the monitor can't be resumed */
2706         task->rusage_cpu_perthr_percentage = 0;
2707         task->rusage_cpu_perthr_interval = 0;
2708
2709         return KERN_SUCCESS;
2710 }
2711
2712
2713 static int
2714 task_enable_cpumon_locked(task_t task)
2715 {
2716         thread_t thread;
2717         task_lock_assert_owned(task);
2718
2719         if (task->rusage_cpu_perthr_percentage == 0 ||
2720             task->rusage_cpu_perthr_interval == 0) {
2721                 return KERN_INVALID_ARGUMENT;
2722         }
2723
2724         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
2725         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2726                 act_set_astledger(thread);
2727         }
2728
2729         return KERN_SUCCESS;
2730 }
2731
2732 int
2733 task_resume_cpumon(task_t task)
2734 {
2735         kern_return_t kret;
2736
2737         if (!task) {
2738                 return EINVAL;
2739         }
2740
2741         task_lock(task);
2742         kret = task_enable_cpumon_locked(task);
2743         task_unlock(task);
2744
2745         return kret;
2746 }
2747
2748
2749 /* duplicate values from bsd/sys/process_policy.h */
2750 #define PROC_POLICY_CPUMON_DISABLE      0xFF
2751 #define PROC_POLICY_CPUMON_DEFAULTS     0xFE
2752
2753 static int
2754 task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int cpumon_entitled)
2755 {
2756         uint64_t abstime = 0;
2757         uint64_t limittime = 0;
2758
2759         lck_mtx_assert(&task->lock, LCK_MTX_ASSERT_OWNED);
2760
2761         /* By default, refill once per second */
2762         if (interval == 0) {
2763                 interval = NSEC_PER_SEC;
2764         }
2765
2766         if (percentage != 0) {
2767                 if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
2768                         boolean_t warn = FALSE;
2769
2770                         /*
2771                          * A per-thread CPU limit on a task generates an exception
2772                          * (LEDGER_ACTION_EXCEPTION) if any one thread in the task
2773                          * exceeds the limit.
2774                          */
2775
2776                         if (percentage == PROC_POLICY_CPUMON_DISABLE) {
2777                                 if (cpumon_entitled) {
2778                                         /* 25095698 - task_disable_cpumon() should be reliable */
2779                                         task_disable_cpumon(task);
2780                                         return 0;
2781                                 }
2782
2783                                 /*
2784                                  * This task wishes to disable the CPU usage monitor, but it's
2785                                  * missing the required entitlement:
2786                                  *     com.apple.private.kernel.override-cpumon
2787                                  *
2788                                  * Instead, treat this as a request to reset its params
2789                                  * back to the defaults.
2790                                  */
2791                                 warn = TRUE;
2792                                 percentage = PROC_POLICY_CPUMON_DEFAULTS;
2793                         }
2794
2795                         if (percentage == PROC_POLICY_CPUMON_DEFAULTS) {
2796                                 percentage = proc_max_cpumon_percentage;
2797                                 interval   = proc_max_cpumon_interval;
2798                         }
2799
2800                         if (percentage > 100) {
2801                                 percentage = 100;
2802                         }
2803
2804                         /*
2805                          * Passing in an interval of -1 means either:
2806                          * - Leave the interval as-is, if there's already a per-thread
2807                          *   limit configured
2808                          * - Use the system default.
2809                          */
2810                         if (interval == -1ULL) {
2811                                 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
2812                                         interval = task->rusage_cpu_perthr_interval;
2813                                 } else {
2814                                         interval = proc_max_cpumon_interval;
2815                                 }
2816                         }
2817
2818                         /*
2819                          * Enforce global caps on CPU usage monitor here if the process is not
2820                          * entitled to escape the global caps.
2821                          */
2822                         if ((percentage > proc_max_cpumon_percentage) && (cpumon_entitled == 0)) {
2823                                 warn = TRUE;
2824                                 percentage = proc_max_cpumon_percentage;
2825                         }
2826
2827                         if ((interval > proc_max_cpumon_interval) && (cpumon_entitled == 0)) {
2828                                 warn = TRUE;
2829                                 interval = proc_max_cpumon_interval;
2830                         }
2831
2832                         if (warn) {
2833                                 int       pid = 0;
2834                                 const char *procname = "unknown";
2835
2836 #ifdef MACH_BSD
2837                                 pid = proc_selfpid();
2838                                 if (current_task()->bsd_info != NULL) {
2839                                         procname = proc_name_address(current_task()->bsd_info);
2840                                 }
2841 #endif
2842
2843                                 printf("process %s[%d] denied attempt to escape CPU monitor"
2844                                     " (missing required entitlement).\n", procname, pid);
2845                         }
2846
2847                         /* configure the limit values */
2848                         task->rusage_cpu_perthr_percentage = percentage;
2849                         task->rusage_cpu_perthr_interval = interval;
2850
2851                         /* and enable the CPU monitor */
2852                         (void)task_enable_cpumon_locked(task);
2853                 } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
2854                         /*
2855                          * Currently, a proc-wide CPU limit always blocks if the limit is
2856                          * exceeded (LEDGER_ACTION_BLOCK).
2857                          */
2858                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PROC_LIMIT;
2859                         task->rusage_cpu_percentage = percentage;
2860                         task->rusage_cpu_interval = interval;
2861
2862                         limittime = (interval * percentage) / 100;
2863                         nanoseconds_to_absolutetime(limittime, &abstime);
2864
2865                         ledger_set_limit(task->ledger, task_ledgers.cpu_time, abstime, 0);
2866                         ledger_set_period(task->ledger, task_ledgers.cpu_time, interval);
2867                         ledger_set_action(task->ledger, task_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
2868                 }
2869         }
2870
2871         if (deadline != 0) {
2872                 assert(scope == TASK_RUSECPU_FLAGS_DEADLINE);
2873
2874                 /* if already in use, cancel and wait for it to cleanout */
2875                 if (task->rusage_cpu_callt != NULL) {
2876                         task_unlock(task);
2877                         thread_call_cancel_wait(task->rusage_cpu_callt);
2878                         task_lock(task);
2879                 }
2880                 if (task->rusage_cpu_callt == NULL) {
2881                         task->rusage_cpu_callt = thread_call_allocate_with_priority(task_action_cpuusage, (thread_call_param_t)task, THREAD_CALL_PRIORITY_KERNEL);
2882                 }
2883                 /* setup callout */
2884                 if (task->rusage_cpu_callt != 0) {
2885                         uint64_t save_abstime = 0;
2886
2887                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_DEADLINE;
2888                         task->rusage_cpu_deadline = deadline;
2889
2890                         nanoseconds_to_absolutetime(deadline, &abstime);
2891                         save_abstime = abstime;
2892                         clock_absolutetime_interval_to_deadline(save_abstime, &abstime);
2893                         thread_call_enter_delayed(task->rusage_cpu_callt, abstime);
2894                 }
2895         }
2896
2897         return 0;
2898 }
2899
2900 int
2901 task_clear_cpuusage(task_t task, int cpumon_entitled)
2902 {
2903         int retval = 0;
2904
2905         task_lock(task);
2906         retval = task_clear_cpuusage_locked(task, cpumon_entitled);
2907         task_unlock(task);
2908
2909         return retval;
2910 }
2911
2912 static int
2913 task_clear_cpuusage_locked(task_t task, int cpumon_entitled)
2914 {
2915         thread_call_t savecallt;
2916
2917         /* cancel percentage handling if set */
2918         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) {
2919                 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PROC_LIMIT;
2920                 ledger_set_limit(task->ledger, task_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
2921                 task->rusage_cpu_percentage = 0;
2922                 task->rusage_cpu_interval = 0;
2923         }
2924
2925         /*
2926          * Disable the CPU usage monitor.
2927          */
2928         if (cpumon_entitled) {
2929                 task_disable_cpumon(task);
2930         }
2931
2932         /* cancel deadline handling if set */
2933         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) {
2934                 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_DEADLINE;
2935                 if (task->rusage_cpu_callt != 0) {
2936                         savecallt = task->rusage_cpu_callt;
2937                         task->rusage_cpu_callt = NULL;
2938                         task->rusage_cpu_deadline = 0;
2939                         task_unlock(task);
2940                         thread_call_cancel_wait(savecallt);
2941                         thread_call_free(savecallt);
2942                         task_lock(task);
2943                 }
2944         }
2945         return 0;
2946 }
2947
2948 /* called by ledger unit to enforce action due to resource usage criteria being met */
2949 static void
2950 task_action_cpuusage(thread_call_param_t param0, __unused thread_call_param_t param1)
2951 {
2952         task_t task = (task_t)param0;
2953         (void)task_apply_resource_actions(task, TASK_POLICY_CPU_RESOURCE_USAGE);
2954         return;
2955 }
2956
2957
2958 /*
2959  * Routines for taskwatch and pidbind
2960  */
2961
2962 #if CONFIG_EMBEDDED
2963
2964 lck_mtx_t       task_watch_mtx;
2965
2966 void
2967 task_watch_init(void)
2968 {
2969         lck_mtx_init(&task_watch_mtx, &task_lck_grp, &task_lck_attr);
2970 }
2971
2972 static void
2973 task_watch_lock(void)
2974 {
2975         lck_mtx_lock(&task_watch_mtx);
2976 }
2977
2978 static void
2979 task_watch_unlock(void)
2980 {
2981         lck_mtx_unlock(&task_watch_mtx);
2982 }
2983
2984 static void
2985 add_taskwatch_locked(task_t task, task_watch_t * twp)
2986 {
2987         queue_enter(&task->task_watchers, twp, task_watch_t *, tw_links);
2988         task->num_taskwatchers++;
2989 }
2990
2991 static void
2992 remove_taskwatch_locked(task_t task, task_watch_t * twp)
2993 {
2994         queue_remove(&task->task_watchers, twp, task_watch_t *, tw_links);
2995         task->num_taskwatchers--;
2996 }
2997
2998
2999 int
3000 proc_lf_pidbind(task_t curtask, uint64_t tid, task_t target_task, int bind)
3001 {
3002         thread_t target_thread = NULL;
3003         int ret = 0, setbg = 0;
3004         task_watch_t *twp = NULL;
3005         task_t task = TASK_NULL;
3006
3007         target_thread = task_findtid(curtask, tid);
3008         if (target_thread == NULL) {
3009                 return ESRCH;
3010         }
3011         /* holds thread reference */
3012
3013         if (bind != 0) {
3014                 /* task is still active ? */
3015                 task_lock(target_task);
3016                 if (target_task->active == 0) {
3017                         task_unlock(target_task);
3018                         ret = ESRCH;
3019                         goto out;
3020                 }
3021                 task_unlock(target_task);
3022
3023                 twp = (task_watch_t *)kalloc(sizeof(task_watch_t));
3024                 if (twp == NULL) {
3025                         task_watch_unlock();
3026                         ret = ENOMEM;
3027                         goto out;
3028                 }
3029
3030                 bzero(twp, sizeof(task_watch_t));
3031
3032                 task_watch_lock();
3033
3034                 if (target_thread->taskwatch != NULL) {
3035                         /* already bound to another task */
3036                         task_watch_unlock();
3037
3038                         kfree(twp, sizeof(task_watch_t));
3039                         ret = EBUSY;
3040                         goto out;
3041                 }
3042
3043                 task_reference(target_task);
3044
3045                 setbg = proc_get_effective_task_policy(target_task, TASK_POLICY_WATCHERS_BG);
3046
3047                 twp->tw_task = target_task;             /* holds the task reference */
3048                 twp->tw_thread = target_thread;         /* holds the thread reference */
3049                 twp->tw_state = setbg;
3050                 twp->tw_importance = target_thread->importance;
3051
3052                 add_taskwatch_locked(target_task, twp);
3053
3054                 target_thread->taskwatch = twp;
3055
3056                 task_watch_unlock();
3057
3058                 if (setbg) {
3059                         set_thread_appbg(target_thread, setbg, INT_MIN);
3060                 }
3061
3062                 /* retain the thread reference as it is in twp */
3063                 target_thread = NULL;
3064         } else {
3065                 /* unbind */
3066                 task_watch_lock();
3067                 if ((twp = target_thread->taskwatch) != NULL) {
3068                         task = twp->tw_task;
3069                         target_thread->taskwatch = NULL;
3070                         remove_taskwatch_locked(task, twp);
3071
3072                         task_watch_unlock();
3073
3074                         task_deallocate(task);                  /* drop task ref in twp */
3075                         set_thread_appbg(target_thread, 0, twp->tw_importance);
3076                         thread_deallocate(target_thread);       /* drop thread ref in twp */
3077                         kfree(twp, sizeof(task_watch_t));
3078                 } else {
3079                         task_watch_unlock();
3080                         ret = 0;                /* return success if it not alredy bound */
3081                         goto out;
3082                 }
3083         }
3084 out:
3085         thread_deallocate(target_thread);       /* drop thread ref acquired in this routine */
3086         return ret;
3087 }
3088
3089 static void
3090 set_thread_appbg(thread_t thread, int setbg, __unused int importance)
3091 {
3092         int enable = (setbg ? TASK_POLICY_ENABLE : TASK_POLICY_DISABLE);
3093
3094         proc_set_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_PIDBIND_BG, enable);
3095 }
3096
3097 static void
3098 apply_appstate_watchers(task_t task)
3099 {
3100         int numwatchers = 0, i, j, setbg;
3101         thread_watchlist_t * threadlist;
3102         task_watch_t * twp;
3103
3104 retry:
3105         /* if no watchers on the list return */
3106         if ((numwatchers = task->num_taskwatchers) == 0) {
3107                 return;
3108         }
3109
3110         threadlist = (thread_watchlist_t *)kalloc(numwatchers * sizeof(thread_watchlist_t));
3111         if (threadlist == NULL) {
3112                 return;
3113         }
3114
3115         bzero(threadlist, numwatchers * sizeof(thread_watchlist_t));
3116
3117         task_watch_lock();
3118         /*serialize application of app state changes */
3119
3120         if (task->watchapplying != 0) {
3121                 lck_mtx_sleep(&task_watch_mtx, LCK_SLEEP_DEFAULT, &task->watchapplying, THREAD_UNINT);
3122                 task_watch_unlock();
3123                 kfree(threadlist, numwatchers * sizeof(thread_watchlist_t));
3124                 goto retry;
3125         }
3126
3127         if (numwatchers != task->num_taskwatchers) {
3128                 task_watch_unlock();
3129                 kfree(threadlist, numwatchers * sizeof(thread_watchlist_t));
3130                 goto retry;
3131         }
3132
3133         setbg = proc_get_effective_task_policy(task, TASK_POLICY_WATCHERS_BG);
3134
3135         task->watchapplying = 1;
3136         i = 0;
3137         queue_iterate(&task->task_watchers, twp, task_watch_t *, tw_links) {
3138                 threadlist[i].thread = twp->tw_thread;
3139                 thread_reference(threadlist[i].thread);
3140                 if (setbg != 0) {
3141                         twp->tw_importance = twp->tw_thread->importance;
3142                         threadlist[i].importance = INT_MIN;
3143                 } else {
3144                         threadlist[i].importance = twp->tw_importance;
3145                 }
3146                 i++;
3147                 if (i > numwatchers) {
3148                         break;
3149                 }
3150         }
3151
3152         task_watch_unlock();
3153
3154         for (j = 0; j < i; j++) {
3155                 set_thread_appbg(threadlist[j].thread, setbg, threadlist[j].importance);
3156                 thread_deallocate(threadlist[j].thread);
3157         }
3158         kfree(threadlist, numwatchers * sizeof(thread_watchlist_t));
3159
3160
3161         task_watch_lock();
3162         task->watchapplying = 0;
3163         thread_wakeup_one(&task->watchapplying);
3164         task_watch_unlock();
3165 }
3166
3167 void
3168 thead_remove_taskwatch(thread_t thread)
3169 {
3170         task_watch_t * twp;
3171         int importance = 0;
3172
3173         task_watch_lock();
3174         if ((twp = thread->taskwatch) != NULL) {
3175                 thread->taskwatch = NULL;
3176                 remove_taskwatch_locked(twp->tw_task, twp);
3177         }
3178         task_watch_unlock();
3179         if (twp != NULL) {
3180                 thread_deallocate(twp->tw_thread);
3181                 task_deallocate(twp->tw_task);
3182                 importance = twp->tw_importance;
3183                 kfree(twp, sizeof(task_watch_t));
3184                 /* remove the thread and networkbg */
3185                 set_thread_appbg(thread, 0, importance);
3186         }
3187 }
3188
3189 void
3190 task_removewatchers(task_t task)
3191 {
3192         int numwatchers = 0, i, j;
3193         task_watch_t ** twplist = NULL;
3194         task_watch_t * twp = NULL;
3195
3196 retry:
3197         if ((numwatchers = task->num_taskwatchers) == 0) {
3198                 return;
3199         }
3200
3201         twplist = (task_watch_t **)kalloc(numwatchers * sizeof(task_watch_t *));
3202         if (twplist == NULL) {
3203                 return;
3204         }
3205
3206         bzero(twplist, numwatchers * sizeof(task_watch_t *));
3207
3208         task_watch_lock();
3209         if (task->num_taskwatchers == 0) {
3210                 task_watch_unlock();
3211                 goto out;
3212         }
3213
3214         if (numwatchers != task->num_taskwatchers) {
3215                 task_watch_unlock();
3216                 kfree(twplist, numwatchers * sizeof(task_watch_t *));
3217                 numwatchers = 0;
3218                 goto retry;
3219         }
3220
3221         i = 0;
3222         while ((twp = (task_watch_t *)dequeue_head(&task->task_watchers)) != NULL) {
3223                 twplist[i] = twp;
3224                 task->num_taskwatchers--;
3225
3226                 /*
3227                  * Since the linkage is removed and thead state cleanup is already set up,
3228                  * remove the refernce from the thread.
3229                  */
3230                 twp->tw_thread->taskwatch = NULL;       /* removed linkage, clear thread holding ref */
3231                 i++;
3232                 if ((task->num_taskwatchers == 0) || (i > numwatchers)) {
3233                         break;
3234                 }
3235         }
3236
3237         task_watch_unlock();
3238
3239         for (j = 0; j < i; j++) {
3240                 twp = twplist[j];
3241                 /* remove thread and network bg */
3242                 set_thread_appbg(twp->tw_thread, 0, twp->tw_importance);
3243                 thread_deallocate(twp->tw_thread);
3244                 task_deallocate(twp->tw_task);
3245                 kfree(twp, sizeof(task_watch_t));
3246         }
3247
3248 out:
3249         kfree(twplist, numwatchers * sizeof(task_watch_t *));
3250 }
3251 #endif /* CONFIG_EMBEDDED */
3252
3253 /*
3254  * Routines for importance donation/inheritance/boosting
3255  */
3256
3257 static void
3258 task_importance_update_live_donor(task_t target_task)
3259 {
3260 #if IMPORTANCE_INHERITANCE
3261
3262         ipc_importance_task_t task_imp;
3263
3264         task_imp = ipc_importance_for_task(target_task, FALSE);
3265         if (IIT_NULL != task_imp) {
3266                 ipc_importance_task_update_live_donor(task_imp);
3267                 ipc_importance_task_release(task_imp);
3268         }
3269 #endif /* IMPORTANCE_INHERITANCE */
3270 }
3271
3272 void
3273 task_importance_mark_donor(task_t task, boolean_t donating)
3274 {
3275 #if IMPORTANCE_INHERITANCE
3276         ipc_importance_task_t task_imp;
3277
3278         task_imp = ipc_importance_for_task(task, FALSE);
3279         if (IIT_NULL != task_imp) {
3280                 ipc_importance_task_mark_donor(task_imp, donating);
3281                 ipc_importance_task_release(task_imp);
3282         }
3283 #endif /* IMPORTANCE_INHERITANCE */
3284 }
3285
3286 void
3287 task_importance_mark_live_donor(task_t task, boolean_t live_donating)
3288 {
3289 #if IMPORTANCE_INHERITANCE
3290         ipc_importance_task_t task_imp;
3291
3292         task_imp = ipc_importance_for_task(task, FALSE);
3293         if (IIT_NULL != task_imp) {
3294                 ipc_importance_task_mark_live_donor(task_imp, live_donating);
3295                 ipc_importance_task_release(task_imp);
3296         }
3297 #endif /* IMPORTANCE_INHERITANCE */
3298 }
3299
3300 void
3301 task_importance_mark_receiver(task_t task, boolean_t receiving)
3302 {
3303 #if IMPORTANCE_INHERITANCE
3304         ipc_importance_task_t task_imp;
3305
3306         task_imp = ipc_importance_for_task(task, FALSE);
3307         if (IIT_NULL != task_imp) {
3308                 ipc_importance_task_mark_receiver(task_imp, receiving);
3309                 ipc_importance_task_release(task_imp);
3310         }
3311 #endif /* IMPORTANCE_INHERITANCE */
3312 }
3313
3314 void
3315 task_importance_mark_denap_receiver(task_t task, boolean_t denap)
3316 {
3317 #if IMPORTANCE_INHERITANCE
3318         ipc_importance_task_t task_imp;
3319
3320         task_imp = ipc_importance_for_task(task, FALSE);
3321         if (IIT_NULL != task_imp) {
3322                 ipc_importance_task_mark_denap_receiver(task_imp, denap);
3323                 ipc_importance_task_release(task_imp);
3324         }
3325 #endif /* IMPORTANCE_INHERITANCE */
3326 }
3327
3328 void
3329 task_importance_reset(__imp_only task_t task)
3330 {
3331 #if IMPORTANCE_INHERITANCE
3332         ipc_importance_task_t task_imp;
3333
3334         /* TODO: Lower importance downstream before disconnect */
3335         task_imp = task->task_imp_base;
3336         ipc_importance_reset(task_imp, FALSE);
3337         task_importance_update_live_donor(task);
3338 #endif /* IMPORTANCE_INHERITANCE */
3339 }
3340
3341 void
3342 task_importance_init_from_parent(__imp_only task_t new_task, __imp_only task_t parent_task)
3343 {
3344 #if IMPORTANCE_INHERITANCE
3345         ipc_importance_task_t new_task_imp = IIT_NULL;
3346
3347         new_task->task_imp_base = NULL;
3348         if (!parent_task) {
3349                 return;
3350         }
3351
3352         if (task_is_marked_importance_donor(parent_task)) {
3353                 new_task_imp = ipc_importance_for_task(new_task, FALSE);
3354                 assert(IIT_NULL != new_task_imp);
3355                 ipc_importance_task_mark_donor(new_task_imp, TRUE);
3356         }
3357         if (task_is_marked_live_importance_donor(parent_task)) {
3358                 if (IIT_NULL == new_task_imp) {
3359                         new_task_imp = ipc_importance_for_task(new_task, FALSE);
3360                 }
3361                 assert(IIT_NULL != new_task_imp);
3362                 ipc_importance_task_mark_live_donor(new_task_imp, TRUE);
3363         }
3364         /* Do not inherit 'receiver' on fork, vfexec or true spawn */
3365         if (task_is_exec_copy(new_task) &&
3366             task_is_marked_importance_receiver(parent_task)) {
3367                 if (IIT_NULL == new_task_imp) {
3368                         new_task_imp = ipc_importance_for_task(new_task, FALSE);
3369                 }
3370                 assert(IIT_NULL != new_task_imp);
3371                 ipc_importance_task_mark_receiver(new_task_imp, TRUE);
3372         }
3373         if (task_is_marked_importance_denap_receiver(parent_task)) {
3374                 if (IIT_NULL == new_task_imp) {
3375                         new_task_imp = ipc_importance_for_task(new_task, FALSE);
3376                 }
3377                 assert(IIT_NULL != new_task_imp);
3378                 ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
3379         }
3380         if (IIT_NULL != new_task_imp) {
3381                 assert(new_task->task_imp_base == new_task_imp);
3382                 ipc_importance_task_release(new_task_imp);
3383         }
3384 #endif /* IMPORTANCE_INHERITANCE */
3385 }
3386
3387 #if IMPORTANCE_INHERITANCE
3388 /*
3389  * Sets the task boost bit to the provided value.  Does NOT run the update function.
3390  *
3391  * Task lock must be held.
3392  */
3393 static void
3394 task_set_boost_locked(task_t task, boolean_t boost_active)
3395 {
3396 #if IMPORTANCE_TRACE
3397         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_START),
3398             proc_selfpid(), task_pid(task), trequested_0(task), trequested_1(task), 0);
3399 #endif /* IMPORTANCE_TRACE */
3400
3401         task->requested_policy.trp_boosted = boost_active;
3402
3403 #if IMPORTANCE_TRACE
3404         if (boost_active == TRUE) {
3405                 DTRACE_BOOST2(boost, task_t, task, int, task_pid(task));
3406         } else {
3407                 DTRACE_BOOST2(unboost, task_t, task, int, task_pid(task));
3408         }
3409         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_END),
3410             proc_selfpid(), task_pid(task),
3411             trequested_0(task), trequested_1(task), 0);
3412 #endif /* IMPORTANCE_TRACE */
3413 }
3414
3415 /*
3416  * Sets the task boost bit to the provided value and applies the update.
3417  *
3418  * Task lock must be held.  Must call update complete after unlocking the task.
3419  */
3420 void
3421 task_update_boost_locked(task_t task, boolean_t boost_active, task_pend_token_t pend_token)
3422 {
3423         task_set_boost_locked(task, boost_active);
3424
3425         task_policy_update_locked(task, pend_token);
3426 }
3427
3428 /*
3429  * Check if this task should donate importance.
3430  *
3431  * May be called without taking the task lock. In that case, donor status can change
3432  * so you must check only once for each donation event.
3433  */
3434 boolean_t
3435 task_is_importance_donor(task_t task)
3436 {
3437         if (task->task_imp_base == IIT_NULL) {
3438                 return FALSE;
3439         }
3440         return ipc_importance_task_is_donor(task->task_imp_base);
3441 }
3442
3443 /*
3444  * Query the status of the task's donor mark.
3445  */
3446 boolean_t
3447 task_is_marked_importance_donor(task_t task)
3448 {
3449         if (task->task_imp_base == IIT_NULL) {
3450                 return FALSE;
3451         }
3452         return ipc_importance_task_is_marked_donor(task->task_imp_base);
3453 }
3454
3455 /*
3456  * Query the status of the task's live donor and donor mark.
3457  */
3458 boolean_t
3459 task_is_marked_live_importance_donor(task_t task)
3460 {
3461         if (task->task_imp_base == IIT_NULL) {
3462                 return FALSE;
3463         }
3464         return ipc_importance_task_is_marked_live_donor(task->task_imp_base);
3465 }
3466
3467
3468 /*
3469  * This routine may be called without holding task lock
3470  * since the value of imp_receiver can never be unset.
3471  */
3472 boolean_t
3473 task_is_importance_receiver(task_t task)
3474 {
3475         if (task->task_imp_base == IIT_NULL) {
3476                 return FALSE;
3477         }
3478         return ipc_importance_task_is_marked_receiver(task->task_imp_base);
3479 }
3480
3481 /*
3482  * Query the task's receiver mark.
3483  */
3484 boolean_t
3485 task_is_marked_importance_receiver(task_t task)
3486 {
3487         if (task->task_imp_base == IIT_NULL) {
3488                 return FALSE;
3489         }
3490         return ipc_importance_task_is_marked_receiver(task->task_imp_base);
3491 }
3492
3493 /*
3494  * This routine may be called without holding task lock
3495  * since the value of de-nap receiver can never be unset.
3496  */
3497 boolean_t
3498 task_is_importance_denap_receiver(task_t task)
3499 {
3500         if (task->task_imp_base == IIT_NULL) {
3501                 return FALSE;
3502         }
3503         return ipc_importance_task_is_denap_receiver(task->task_imp_base);
3504 }
3505
3506 /*
3507  * Query the task's de-nap receiver mark.
3508  */
3509 boolean_t
3510 task_is_marked_importance_denap_receiver(task_t task)
3511 {
3512         if (task->task_imp_base == IIT_NULL) {
3513                 return FALSE;
3514         }
3515         return ipc_importance_task_is_marked_denap_receiver(task->task_imp_base);
3516 }
3517
3518 /*
3519  * This routine may be called without holding task lock
3520  * since the value of imp_receiver can never be unset.
3521  */
3522 boolean_t
3523 task_is_importance_receiver_type(task_t task)
3524 {
3525         if (task->task_imp_base == IIT_NULL) {
3526                 return FALSE;
3527         }
3528         return task_is_importance_receiver(task) ||
3529                task_is_importance_denap_receiver(task);
3530 }
3531
3532 /*
3533  * External importance assertions are managed by the process in userspace
3534  * Internal importance assertions are the responsibility of the kernel
3535  * Assertions are changed from internal to external via task_importance_externalize_assertion
3536  */
3537
3538 int
3539 task_importance_hold_internal_assertion(task_t target_task, uint32_t count)
3540 {
3541         ipc_importance_task_t task_imp;
3542         kern_return_t ret;
3543
3544         /* may be first time, so allow for possible importance setup */
3545         task_imp = ipc_importance_for_task(target_task, FALSE);
3546         if (IIT_NULL == task_imp) {
3547                 return EOVERFLOW;
3548         }
3549         ret = ipc_importance_task_hold_internal_assertion(task_imp, count);
3550         ipc_importance_task_release(task_imp);
3551
3552         return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3553 }
3554
3555 int
3556 task_importance_hold_file_lock_assertion(task_t target_task, uint32_t count)
3557 {
3558         ipc_importance_task_t task_imp;
3559         kern_return_t ret;
3560
3561         /* may be first time, so allow for possible importance setup */
3562         task_imp = ipc_importance_for_task(target_task, FALSE);
3563         if (IIT_NULL == task_imp) {
3564                 return EOVERFLOW;
3565         }
3566         ret = ipc_importance_task_hold_file_lock_assertion(task_imp, count);
3567         ipc_importance_task_release(task_imp);
3568
3569         return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3570 }
3571
3572 int
3573 task_importance_hold_legacy_external_assertion(task_t target_task, uint32_t count)
3574 {
3575         ipc_importance_task_t task_imp;
3576         kern_return_t ret;
3577
3578         /* must already have set up an importance */
3579         task_imp = target_task->task_imp_base;
3580         if (IIT_NULL == task_imp) {
3581                 return EOVERFLOW;
3582         }
3583         ret = ipc_importance_task_hold_legacy_external_assertion(task_imp, count);
3584         return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3585 }
3586
3587 int
3588 task_importance_drop_file_lock_assertion(task_t target_task, uint32_t count)
3589 {
3590         ipc_importance_task_t task_imp;
3591         kern_return_t ret;
3592
3593         /* must already have set up an importance */
3594         task_imp = target_task->task_imp_base;
3595         if (IIT_NULL == task_imp) {
3596                 return EOVERFLOW;
3597         }
3598         ret = ipc_importance_task_drop_file_lock_assertion(target_task->task_imp_base, count);
3599         return (KERN_SUCCESS != ret) ? EOVERFLOW : 0;
3600 }
3601
3602 int
3603 task_importance_drop_legacy_external_assertion(task_t target_task, uint32_t count)
3604 {
3605         ipc_importance_task_t task_imp;
3606         kern_return_t ret;
3607
3608         /* must already have set up an importance */
3609         task_imp = target_task->task_imp_base;
3610         if (IIT_NULL == task_imp) {
3611                 return EOVERFLOW;
3612         }
3613         ret = ipc_importance_task_drop_legacy_external_assertion(task_imp, count);
3614         return (KERN_SUCCESS != ret) ? EOVERFLOW : 0;
3615 }
3616
3617 static void
3618 task_add_importance_watchport(task_t task, mach_port_t port, int *boostp)
3619 {
3620         int boost = 0;
3621
3622         __imptrace_only int released_pid = 0;
3623         __imptrace_only int pid = task_pid(task);
3624
3625         ipc_importance_task_t release_imp_task = IIT_NULL;
3626
3627         if (IP_VALID(port) != 0) {
3628                 ipc_importance_task_t new_imp_task = ipc_importance_for_task(task, FALSE);
3629
3630                 ip_lock(port);
3631
3632                 /*
3633                  * The port must have been marked tempowner already.
3634                  * This also filters out ports whose receive rights
3635                  * are already enqueued in a message, as you can't
3636                  * change the right's destination once it's already
3637                  * on its way.
3638                  */
3639                 if (port->ip_tempowner != 0) {
3640                         assert(port->ip_impdonation != 0);
3641
3642                         boost = port->ip_impcount;
3643                         if (IIT_NULL != port->ip_imp_task) {
3644                                 /*
3645                                  * if this port is already bound to a task,
3646                                  * release the task reference and drop any
3647                                  * watchport-forwarded boosts
3648                                  */
3649                                 release_imp_task = port->ip_imp_task;
3650                                 port->ip_imp_task = IIT_NULL;
3651                         }
3652
3653                         /* mark the port is watching another task (reference held in port->ip_imp_task) */
3654                         if (ipc_importance_task_is_marked_receiver(new_imp_task)) {
3655                                 port->ip_imp_task = new_imp_task;
3656                                 new_imp_task = IIT_NULL;
3657                         }
3658                 }
3659                 ip_unlock(port);
3660
3661                 if (IIT_NULL != new_imp_task) {
3662                         ipc_importance_task_release(new_imp_task);
3663                 }
3664
3665                 if (IIT_NULL != release_imp_task) {
3666                         if (boost > 0) {
3667                                 ipc_importance_task_drop_internal_assertion(release_imp_task, boost);
3668                         }
3669
3670                         // released_pid = task_pid(release_imp_task); /* TODO: Need ref-safe way to get pid */
3671                         ipc_importance_task_release(release_imp_task);
3672                 }
3673 #if IMPORTANCE_TRACE
3674                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_WATCHPORT, 0)) | DBG_FUNC_NONE,
3675                     proc_selfpid(), pid, boost, released_pid, 0);
3676 #endif /* IMPORTANCE_TRACE */
3677         }
3678
3679         *boostp = boost;
3680         return;
3681 }
3682
3683 #endif /* IMPORTANCE_INHERITANCE */
3684
3685 /*
3686  * Routines for VM to query task importance
3687  */
3688
3689
3690 /*
3691  * Order to be considered while estimating importance
3692  * for low memory notification and purging purgeable memory.
3693  */
3694 #define TASK_IMPORTANCE_FOREGROUND     4
3695 #define TASK_IMPORTANCE_NOTDARWINBG    1
3696
3697
3698 /*
3699  * (Un)Mark the task as a privileged listener for memory notifications.
3700  * if marked, this task will be among the first to be notified amongst
3701  * the bulk of all other tasks when the system enters a pressure level
3702  * of interest to this task.
3703  */
3704 int
3705 task_low_mem_privileged_listener(task_t task, boolean_t new_value, boolean_t *old_value)
3706 {
3707         if (old_value != NULL) {
3708                 *old_value = (boolean_t)task->low_mem_privileged_listener;
3709         } else {
3710                 task_lock(task);
3711                 task->low_mem_privileged_listener = (uint32_t)new_value;
3712                 task_unlock(task);
3713         }
3714
3715         return 0;
3716 }
3717
3718 /*
3719  * Checks if the task is already notified.
3720  *
3721  * Condition: task lock should be held while calling this function.
3722  */
3723 boolean_t
3724 task_has_been_notified(task_t task, int pressurelevel)
3725 {
3726         if (task == NULL) {
3727                 return FALSE;
3728         }
3729
3730         if (pressurelevel == kVMPressureWarning) {
3731                 return task->low_mem_notified_warn ? TRUE : FALSE;
3732         } else if (pressurelevel == kVMPressureCritical) {
3733                 return task->low_mem_notified_critical ? TRUE : FALSE;
3734         } else {
3735                 return TRUE;
3736         }
3737 }
3738
3739
3740 /*
3741  * Checks if the task is used for purging.
3742  *
3743  * Condition: task lock should be held while calling this function.
3744  */
3745 boolean_t
3746 task_used_for_purging(task_t task, int pressurelevel)
3747 {
3748         if (task == NULL) {
3749                 return FALSE;
3750         }
3751
3752         if (pressurelevel == kVMPressureWarning) {
3753                 return task->purged_memory_warn ? TRUE : FALSE;
3754         } else if (pressurelevel == kVMPressureCritical) {
3755                 return task->purged_memory_critical ? TRUE : FALSE;
3756         } else {
3757                 return TRUE;
3758         }
3759 }
3760
3761
3762 /*
3763  * Mark the task as notified with memory notification.
3764  *
3765  * Condition: task lock should be held while calling this function.
3766  */
3767 void
3768 task_mark_has_been_notified(task_t task, int pressurelevel)
3769 {
3770         if (task == NULL) {
3771                 return;
3772         }
3773
3774         if (pressurelevel == kVMPressureWarning) {
3775                 task->low_mem_notified_warn = 1;
3776         } else if (pressurelevel == kVMPressureCritical) {
3777                 task->low_mem_notified_critical = 1;
3778         }
3779 }
3780
3781
3782 /*
3783  * Mark the task as purged.
3784  *
3785  * Condition: task lock should be held while calling this function.
3786  */
3787 void
3788 task_mark_used_for_purging(task_t task, int pressurelevel)
3789 {
3790         if (task == NULL) {
3791                 return;
3792         }
3793
3794         if (pressurelevel == kVMPressureWarning) {
3795                 task->purged_memory_warn = 1;
3796         } else if (pressurelevel == kVMPressureCritical) {
3797                 task->purged_memory_critical = 1;
3798         }
3799 }
3800
3801
3802 /*
3803  * Mark the task eligible for low memory notification.
3804  *
3805  * Condition: task lock should be held while calling this function.
3806  */
3807 void
3808 task_clear_has_been_notified(task_t task, int pressurelevel)
3809 {
3810         if (task == NULL) {
3811                 return;
3812         }
3813
3814         if (pressurelevel == kVMPressureWarning) {
3815                 task->low_mem_notified_warn = 0;
3816         } else if (pressurelevel == kVMPressureCritical) {
3817                 task->low_mem_notified_critical = 0;
3818         }
3819 }
3820
3821
3822 /*
3823  * Mark the task eligible for purging its purgeable memory.
3824  *
3825  * Condition: task lock should be held while calling this function.
3826  */
3827 void
3828 task_clear_used_for_purging(task_t task)
3829 {
3830         if (task == NULL) {
3831                 return;
3832         }
3833
3834         task->purged_memory_warn = 0;
3835         task->purged_memory_critical = 0;
3836 }
3837
3838
3839 /*
3840  * Estimate task importance for purging its purgeable memory
3841  * and low memory notification.
3842  *
3843  * Importance is calculated in the following order of criteria:
3844  * -Task role : Background vs Foreground
3845  * -Boost status: Not boosted vs Boosted
3846  * -Darwin BG status.
3847  *
3848  * Returns: Estimated task importance. Less important task will have lower
3849  *          estimated importance.
3850  */
3851 int
3852 task_importance_estimate(task_t task)
3853 {
3854         int task_importance = 0;
3855
3856         if (task == NULL) {
3857                 return 0;
3858         }
3859
3860         if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) == TASK_FOREGROUND_APPLICATION) {
3861                 task_importance += TASK_IMPORTANCE_FOREGROUND;
3862         }
3863
3864         if (proc_get_effective_task_policy(task, TASK_POLICY_DARWIN_BG) == 0) {
3865                 task_importance += TASK_IMPORTANCE_NOTDARWINBG;
3866         }
3867
3868         return task_importance;
3869 }
3870
3871 boolean_t
3872 task_has_assertions(task_t task)
3873 {
3874         return task->task_imp_base->iit_assertcnt? TRUE : FALSE;
3875 }
3876
3877
3878 kern_return_t
3879 send_resource_violation(typeof(send_cpu_usage_violation) sendfunc,
3880     task_t violator,
3881     struct ledger_entry_info *linfo,
3882     resource_notify_flags_t flags)
3883 {
3884 #ifndef MACH_BSD
3885         return KERN_NOT_SUPPORTED;
3886 #else
3887         kern_return_t   kr = KERN_SUCCESS;
3888         proc_t          proc = NULL;
3889         posix_path_t    proc_path = "";
3890         proc_name_t     procname = "<unknown>";
3891         int             pid = -1;
3892         clock_sec_t     secs;
3893         clock_nsec_t    nsecs;
3894         mach_timespec_t timestamp;
3895         thread_t        curthread = current_thread();
3896         ipc_port_t      dstport = MACH_PORT_NULL;
3897
3898         if (!violator) {
3899                 kr = KERN_INVALID_ARGUMENT; goto finish;
3900         }
3901
3902         /* extract violator information */
3903         task_lock(violator);
3904         if (!(proc = get_bsdtask_info(violator))) {
3905                 task_unlock(violator);
3906                 kr = KERN_INVALID_ARGUMENT; goto finish;
3907         }
3908         (void)mig_strncpy(procname, proc_best_name(proc), sizeof(procname));
3909         pid = task_pid(violator);
3910         if (flags & kRNFatalLimitFlag) {
3911                 kr = proc_pidpathinfo_internal(proc, 0, proc_path,
3912                     sizeof(proc_path), NULL);
3913         }
3914         task_unlock(violator);
3915         if (kr) {
3916                 goto finish;
3917         }
3918
3919         /* violation time ~ now */
3920         clock_get_calendar_nanotime(&secs, &nsecs);
3921         timestamp.tv_sec = (int32_t)secs;
3922         timestamp.tv_nsec = (int32_t)nsecs;
3923         /* 25567702 tracks widening mach_timespec_t */
3924
3925         /* send message */
3926         kr = host_get_special_port(host_priv_self(), HOST_LOCAL_NODE,
3927             HOST_RESOURCE_NOTIFY_PORT, &dstport);
3928         if (kr) {
3929                 goto finish;
3930         }
3931
3932         thread_set_honor_qlimit(curthread);
3933         kr = sendfunc(dstport,
3934             procname, pid, proc_path, timestamp,
3935             linfo->lei_balance, linfo->lei_last_refill,
3936             linfo->lei_limit, linfo->lei_refill_period,
3937             flags);
3938         thread_clear_honor_qlimit(curthread);
3939
3940         ipc_port_release_send(dstport);
3941
3942 finish:
3943         return kr;
3944 #endif      /* MACH_BSD */
3945 }
3946
3947
3948 /*
3949  * Resource violations trace four 64-bit integers.  For K32, two additional
3950  * codes are allocated, the first with the low nibble doubled.  So if the K64
3951  * code is 0x042, the K32 codes would be 0x044 and 0x45.
3952  */
3953 #ifdef __LP64__
3954 void
3955 trace_resource_violation(uint16_t code,
3956     struct ledger_entry_info *linfo)
3957 {
3958         KERNEL_DBG_IST_SANE(KDBG_CODE(DBG_MACH, DBG_MACH_RESOURCE, code),
3959             linfo->lei_balance, linfo->lei_last_refill,
3960             linfo->lei_limit, linfo->lei_refill_period);
3961 }
3962 #else /* K32 */
3963 /* TODO: create/find a trace_two_LLs() for K32 systems */
3964 #define MASK32 0xffffffff
3965 void
3966 trace_resource_violation(uint16_t code,
3967     struct ledger_entry_info *linfo)
3968 {
3969         int8_t lownibble = (code & 0x3) * 2;
3970         int16_t codeA = (code & 0xffc) | lownibble;
3971         int16_t codeB = codeA + 1;
3972
3973         int32_t balance_high = (linfo->lei_balance >> 32) & MASK32;
3974         int32_t balance_low = linfo->lei_balance & MASK32;
3975         int32_t last_refill_high = (linfo->lei_last_refill >> 32) & MASK32;
3976         int32_t last_refill_low = linfo->lei_last_refill & MASK32;
3977
3978         int32_t limit_high = (linfo->lei_limit >> 32) & MASK32;
3979         int32_t limit_low = linfo->lei_limit & MASK32;
3980         int32_t refill_period_high = (linfo->lei_refill_period >> 32) & MASK32;
3981         int32_t refill_period_low = linfo->lei_refill_period & MASK32;
3982
3983         KERNEL_DBG_IST_SANE(KDBG_CODE(DBG_MACH, DBG_MACH_RESOURCE, codeA),
3984             balance_high, balance_low,
3985             last_refill_high, last_refill_low);
3986         KERNEL_DBG_IST_SANE(KDBG_CODE(DBG_MACH, DBG_MACH_RESOURCE, codeB),
3987             limit_high, limit_low,
3988             refill_period_high, refill_period_low);
3989 }
3990 #endif /* K64/K32 */