osfmk/kern/task_policy.c

   1 /*
   2  * Copyright (c) 2000-2016 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <kern/policy_internal.h>
  30 #include <mach/task_policy.h>
  31
  32 #include <mach/mach_types.h>
  33 #include <mach/task_server.h>
  34
  35 #include <kern/host.h>                  /* host_priv_self()        */
  36 #include <mach/host_priv.h>             /* host_get_special_port() */
  37 #include <mach/host_special_ports.h>    /* RESOURCE_NOTIFY_PORT    */
  38 #include <kern/sched.h>
  39 #include <kern/task.h>
  40 #include <mach/thread_policy.h>
  41 #include <sys/errno.h>
  42 #include <sys/resource.h>
  43 #include <machine/limits.h>
  44 #include <kern/ledger.h>
  45 #include <kern/thread_call.h>
  46 #include <kern/sfi.h>
  47 #include <kern/coalition.h>
  48 #if CONFIG_TELEMETRY
  49 #include <kern/telemetry.h>
  50 #endif
  51 #if CONFIG_EMBEDDED
  52 #include <kern/kalloc.h>
  53 #include <sys/errno.h>
  54 #endif /* CONFIG_EMBEDDED */
  55
  56 #if IMPORTANCE_INHERITANCE
  57 #include <ipc/ipc_importance.h>
  58 #if IMPORTANCE_TRACE
  59 #include <mach/machine/sdt.h>
  60 #endif /* IMPORTANCE_TRACE */
  61 #endif /* IMPORTANCE_INHERITACE */
  62
  63 #include <sys/kdebug.h>
  64
  65 /*
  66  *  Task Policy
  67  *
  68  *  This subsystem manages task and thread IO priority and backgrounding,
  69  *  as well as importance inheritance, process suppression, task QoS, and apptype.
  70  *  These properties have a suprising number of complex interactions, so they are
  71  *  centralized here in one state machine to simplify the implementation of those interactions.
  72  *
  73  *  Architecture:
  74  *  Threads and tasks have two policy fields: requested, effective.
  75  *  Requested represents the wishes of each interface that influences task policy.
  76  *  Effective represents the distillation of that policy into a set of behaviors.
  77  *
  78  *  Each thread making a modification in the policy system passes a 'pending' struct,
  79  *  which tracks updates that will be applied after dropping the policy engine lock.
  80  *
  81  *  Each interface that has an input into the task policy state machine controls a field in requested.
  82  *  If the interface has a getter, it returns what is in the field in requested, but that is
  83  *  not necessarily what is actually in effect.
  84  *
  85  *  All kernel subsystems that behave differently based on task policy call into
  86  *  the proc_get_effective_(task|thread)_policy functions, which return the decision of the task policy state machine
  87  *  for that subsystem by querying only the 'effective' field.
  88  *
  89  *  Policy change operations:
  90  *  Here are the steps to change a policy on a task or thread:
  91  *  1) Lock task
  92  *  2) Change requested field for the relevant policy
  93  *  3) Run a task policy update, which recalculates effective based on requested,
  94  *     then takes a diff between the old and new versions of requested and calls the relevant
  95  *     other subsystems to apply these changes, and updates the pending field.
  96  *  4) Unlock task
  97  *  5) Run task policy update complete, which looks at the pending field to update
  98  *     subsystems which cannot be touched while holding the task lock.
  99  *
 100  *  To add a new requested policy, add the field in the requested struct, the flavor in task.h,
 101  *  the setter and getter in proc_(set|get)_task_policy*,
 102  *  then set up the effects of that behavior in task_policy_update*. If the policy manifests
 103  *  itself as a distinct effective policy, add it to the effective struct and add it to the
 104  *  proc_get_effective_task_policy accessor.
 105  *
 106  *  Most policies are set via proc_set_task_policy, but policies that don't fit that interface
 107  *  roll their own lock/set/update/unlock/complete code inside this file.
 108  *
 109  *
 110  *  Suppression policy
 111  *
 112  *  These are a set of behaviors that can be requested for a task.  They currently have specific
 113  *  implied actions when they're enabled, but they may be made customizable in the future.
 114  *
 115  *  When the affected task is boosted, we temporarily disable the suppression behaviors
 116  *  so that the affected process has a chance to run so it can call the API to permanently
 117  *  disable the suppression behaviors.
 118  *
 119  *  Locking
 120  *
 121  *  Changing task policy on a task takes the task lock.
 122  *  Changing task policy on a thread takes the thread mutex.
 123  *  Task policy changes that affect threads will take each thread's mutex to update it if necessary.
 124  *
 125  *  Querying the effective policy does not take a lock, because callers
 126  *  may run in interrupt context or other place where locks are not OK.
 127  *
 128  *  This means that any notification of state change needs to be externally synchronized.
 129  *  We do this by idempotent callouts after the state has changed to ask
 130  *  other subsystems to update their view of the world.
 131  *
 132  * TODO: Move all cpu/wakes/io monitor code into a separate file
 133  * TODO: Move all importance code over to importance subsystem
 134  * TODO: Move all taskwatch code into a separate file
 135  * TODO: Move all VM importance code into a separate file
 136  */
 137
 138 /* Task policy related helper functions */
 139 static void proc_set_task_policy_locked(task_t task, int category, int flavor, int value, int value2);
 140
 141 static void task_policy_update_locked(task_t task, task_pend_token_t pend_token);
 142 static void task_policy_update_internal_locked(task_t task, boolean_t in_create, task_pend_token_t pend_token);
 143
 144 /* For attributes that have two scalars as input/output */
 145 static void proc_set_task_policy2(task_t task, int category, int flavor, int value1, int value2);
 146 static void proc_get_task_policy2(task_t task, int category, int flavor, int *value1, int *value2);
 147
 148 static boolean_t task_policy_update_coalition_focal_tasks(task_t task, int prev_role, int next_role, task_pend_token_t pend_token);
 149
 150 static uint64_t task_requested_bitfield(task_t task);
 151 static uint64_t task_effective_bitfield(task_t task);
 152
 153 /* Convenience functions for munging a policy bitfield into a tracepoint */
 154 static uintptr_t trequested_0(task_t task);
 155 static uintptr_t trequested_1(task_t task);
 156 static uintptr_t teffective_0(task_t task);
 157 static uintptr_t teffective_1(task_t task);
 158
 159 /* CPU limits helper functions */
 160 static int task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int entitled);
 161 static int task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope);
 162 static int task_enable_cpumon_locked(task_t task);
 163 static int task_disable_cpumon(task_t task);
 164 static int task_clear_cpuusage_locked(task_t task, int cpumon_entitled);
 165 static int task_apply_resource_actions(task_t task, int type);
 166 static void task_action_cpuusage(thread_call_param_t param0, thread_call_param_t param1);
 167
 168 #ifdef MACH_BSD
 169 typedef struct proc *   proc_t;
 170 int                     proc_pid(void *proc);
 171 extern int              proc_selfpid(void);
 172 extern char *           proc_name_address(void *p);
 173 extern char *           proc_best_name(proc_t proc);
 174
 175 extern int proc_pidpathinfo_internal(proc_t p, uint64_t arg,
 176     char *buffer, uint32_t buffersize,
 177     int32_t *retval);
 178 #endif /* MACH_BSD */
 179
 180
 181 #if CONFIG_EMBEDDED
 182 /* TODO: make CONFIG_TASKWATCH */
 183 /* Taskwatch related helper functions */
 184 static void set_thread_appbg(thread_t thread, int setbg, int importance);
 185 static void add_taskwatch_locked(task_t task, task_watch_t * twp);
 186 static void remove_taskwatch_locked(task_t task, task_watch_t * twp);
 187 static void task_watch_lock(void);
 188 static void task_watch_unlock(void);
 189 static void apply_appstate_watchers(task_t task);
 190
 191 typedef struct task_watcher {
 192         queue_chain_t   tw_links;       /* queueing of threads */
 193         task_t          tw_task;        /* task that is being watched */
 194         thread_t        tw_thread;      /* thread that is watching the watch_task */
 195         int             tw_state;       /* the current app state of the thread */
 196         int             tw_importance;  /* importance prior to backgrounding */
 197 } task_watch_t;
 198
 199 typedef struct thread_watchlist {
 200         thread_t        thread;         /* thread being worked on for taskwatch action */
 201         int             importance;     /* importance to be restored if thread is being made active */
 202 } thread_watchlist_t;
 203
 204 #endif /* CONFIG_EMBEDDED */
 205
 206 extern int memorystatus_update_priority_for_appnap(proc_t p, boolean_t is_appnap);
 207
 208 /* Importance Inheritance related helper functions */
 209
 210 #if IMPORTANCE_INHERITANCE
 211
 212 static void task_importance_mark_live_donor(task_t task, boolean_t donating);
 213 static void task_importance_mark_receiver(task_t task, boolean_t receiving);
 214 static void task_importance_mark_denap_receiver(task_t task, boolean_t denap);
 215
 216 static boolean_t task_is_marked_live_importance_donor(task_t task);
 217 static boolean_t task_is_importance_receiver(task_t task);
 218 static boolean_t task_is_importance_denap_receiver(task_t task);
 219
 220 static int task_importance_hold_internal_assertion(task_t target_task, uint32_t count);
 221
 222 static void task_add_importance_watchport(task_t task, mach_port_t port, int *boostp);
 223 static void task_importance_update_live_donor(task_t target_task);
 224
 225 static void task_set_boost_locked(task_t task, boolean_t boost_active);
 226
 227 #endif /* IMPORTANCE_INHERITANCE */
 228
 229 #if IMPORTANCE_TRACE
 230 #define __imptrace_only
 231 #else /* IMPORTANCE_TRACE */
 232 #define __imptrace_only __unused
 233 #endif /* !IMPORTANCE_TRACE */
 234
 235 #if IMPORTANCE_INHERITANCE
 236 #define __imp_only
 237 #else
 238 #define __imp_only __unused
 239 #endif
 240
 241 /*
 242  * Default parameters for certain policies
 243  */
 244
 245 int proc_standard_daemon_tier = THROTTLE_LEVEL_TIER1;
 246 int proc_suppressed_disk_tier = THROTTLE_LEVEL_TIER1;
 247 int proc_tal_disk_tier        = THROTTLE_LEVEL_TIER1;
 248
 249 int proc_graphics_timer_qos   = (LATENCY_QOS_TIER_0 & 0xFF);
 250
 251 const int proc_default_bg_iotier  = THROTTLE_LEVEL_TIER2;
 252
 253 /* Latency/throughput QoS fields remain zeroed, i.e. TIER_UNSPECIFIED at creation */
 254 const struct task_requested_policy default_task_requested_policy = {
 255         .trp_bg_iotier = proc_default_bg_iotier
 256 };
 257 const struct task_effective_policy default_task_effective_policy = {};
 258
 259 /*
 260  * Default parameters for CPU usage monitor.
 261  *
 262  * Default setting is 50% over 3 minutes.
 263  */
 264 #define         DEFAULT_CPUMON_PERCENTAGE 50
 265 #define         DEFAULT_CPUMON_INTERVAL   (3 * 60)
 266
 267 uint8_t         proc_max_cpumon_percentage;
 268 uint64_t        proc_max_cpumon_interval;
 269
 270
 271 kern_return_t
 272 qos_latency_policy_validate(task_latency_qos_t ltier)
 273 {
 274         if ((ltier != LATENCY_QOS_TIER_UNSPECIFIED) &&
 275             ((ltier > LATENCY_QOS_TIER_5) || (ltier < LATENCY_QOS_TIER_0))) {
 276                 return KERN_INVALID_ARGUMENT;
 277         }
 278
 279         return KERN_SUCCESS;
 280 }
 281
 282 kern_return_t
 283 qos_throughput_policy_validate(task_throughput_qos_t ttier)
 284 {
 285         if ((ttier != THROUGHPUT_QOS_TIER_UNSPECIFIED) &&
 286             ((ttier > THROUGHPUT_QOS_TIER_5) || (ttier < THROUGHPUT_QOS_TIER_0))) {
 287                 return KERN_INVALID_ARGUMENT;
 288         }
 289
 290         return KERN_SUCCESS;
 291 }
 292
 293 static kern_return_t
 294 task_qos_policy_validate(task_qos_policy_t qosinfo, mach_msg_type_number_t count)
 295 {
 296         if (count < TASK_QOS_POLICY_COUNT) {
 297                 return KERN_INVALID_ARGUMENT;
 298         }
 299
 300         task_latency_qos_t ltier = qosinfo->task_latency_qos_tier;
 301         task_throughput_qos_t ttier = qosinfo->task_throughput_qos_tier;
 302
 303         kern_return_t kr = qos_latency_policy_validate(ltier);
 304
 305         if (kr != KERN_SUCCESS) {
 306                 return kr;
 307         }
 308
 309         kr = qos_throughput_policy_validate(ttier);
 310
 311         return kr;
 312 }
 313
 314 uint32_t
 315 qos_extract(uint32_t qv)
 316 {
 317         return qv & 0xFF;
 318 }
 319
 320 uint32_t
 321 qos_latency_policy_package(uint32_t qv)
 322 {
 323         return (qv == LATENCY_QOS_TIER_UNSPECIFIED) ? LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | qv);
 324 }
 325
 326 uint32_t
 327 qos_throughput_policy_package(uint32_t qv)
 328 {
 329         return (qv == THROUGHPUT_QOS_TIER_UNSPECIFIED) ? THROUGHPUT_QOS_TIER_UNSPECIFIED : ((0xFE << 16) | qv);
 330 }
 331
 332 #define TASK_POLICY_SUPPRESSION_DISABLE  0x1
 333 #define TASK_POLICY_SUPPRESSION_IOTIER2  0x2
 334 #define TASK_POLICY_SUPPRESSION_NONDONOR 0x4
 335 /* TEMPORARY boot-arg controlling task_policy suppression (App Nap) */
 336 static boolean_t task_policy_suppression_flags = TASK_POLICY_SUPPRESSION_IOTIER2 |
 337     TASK_POLICY_SUPPRESSION_NONDONOR;
 338
 339 kern_return_t
 340 task_policy_set(
 341         task_t                                  task,
 342         task_policy_flavor_t    flavor,
 343         task_policy_t                   policy_info,
 344         mach_msg_type_number_t  count)
 345 {
 346         kern_return_t           result = KERN_SUCCESS;
 347
 348         if (task == TASK_NULL || task == kernel_task) {
 349                 return KERN_INVALID_ARGUMENT;
 350         }
 351
 352         switch (flavor) {
 353         case TASK_CATEGORY_POLICY: {
 354                 task_category_policy_t info = (task_category_policy_t)policy_info;
 355
 356                 if (count < TASK_CATEGORY_POLICY_COUNT) {
 357                         return KERN_INVALID_ARGUMENT;
 358                 }
 359
 360 #if CONFIG_EMBEDDED
 361                 /* On embedded, you can't modify your own role. */
 362                 if (current_task() == task) {
 363                         return KERN_INVALID_ARGUMENT;
 364                 }
 365 #endif
 366
 367                 switch (info->role) {
 368                 case TASK_FOREGROUND_APPLICATION:
 369                 case TASK_BACKGROUND_APPLICATION:
 370                 case TASK_DEFAULT_APPLICATION:
 371                         proc_set_task_policy(task,
 372                             TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
 373                             info->role);
 374                         break;
 375
 376                 case TASK_CONTROL_APPLICATION:
 377                         if (task != current_task() || task->sec_token.val[0] != 0) {
 378                                 result = KERN_INVALID_ARGUMENT;
 379                         } else {
 380                                 proc_set_task_policy(task,
 381                                     TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
 382                                     info->role);
 383                         }
 384                         break;
 385
 386                 case TASK_GRAPHICS_SERVER:
 387                         /* TODO: Restrict this role to FCFS <rdar://problem/12552788> */
 388                         if (task != current_task() || task->sec_token.val[0] != 0) {
 389                                 result = KERN_INVALID_ARGUMENT;
 390                         } else {
 391                                 proc_set_task_policy(task,
 392                                     TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
 393                                     info->role);
 394                         }
 395                         break;
 396                 default:
 397                         result = KERN_INVALID_ARGUMENT;
 398                         break;
 399                 } /* switch (info->role) */
 400
 401                 break;
 402         }
 403
 404 /* Desired energy-efficiency/performance "quality-of-service" */
 405         case TASK_BASE_QOS_POLICY:
 406         case TASK_OVERRIDE_QOS_POLICY:
 407         {
 408                 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
 409                 kern_return_t kr = task_qos_policy_validate(qosinfo, count);
 410
 411                 if (kr != KERN_SUCCESS) {
 412                         return kr;
 413                 }
 414
 415
 416                 uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier);
 417                 uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier);
 418
 419                 proc_set_task_policy2(task, TASK_POLICY_ATTRIBUTE,
 420                     flavor == TASK_BASE_QOS_POLICY ? TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS : TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS,
 421                     lqos, tqos);
 422         }
 423         break;
 424
 425         case TASK_BASE_LATENCY_QOS_POLICY:
 426         {
 427                 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
 428                 kern_return_t kr = task_qos_policy_validate(qosinfo, count);
 429
 430                 if (kr != KERN_SUCCESS) {
 431                         return kr;
 432                 }
 433
 434                 uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier);
 435
 436                 proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE, TASK_BASE_LATENCY_QOS_POLICY, lqos);
 437         }
 438         break;
 439
 440         case TASK_BASE_THROUGHPUT_QOS_POLICY:
 441         {
 442                 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
 443                 kern_return_t kr = task_qos_policy_validate(qosinfo, count);
 444
 445                 if (kr != KERN_SUCCESS) {
 446                         return kr;
 447                 }
 448
 449                 uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier);
 450
 451                 proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE, TASK_BASE_THROUGHPUT_QOS_POLICY, tqos);
 452         }
 453         break;
 454
 455         case TASK_SUPPRESSION_POLICY:
 456         {
 457 #if CONFIG_EMBEDDED
 458                 /*
 459                  * Suppression policy is not enabled for embedded
 460                  * because apps aren't marked as denap receivers
 461                  */
 462                 result = KERN_INVALID_ARGUMENT;
 463                 break;
 464 #else /* CONFIG_EMBEDDED */
 465
 466                 task_suppression_policy_t info = (task_suppression_policy_t)policy_info;
 467
 468                 if (count < TASK_SUPPRESSION_POLICY_COUNT) {
 469                         return KERN_INVALID_ARGUMENT;
 470                 }
 471
 472                 struct task_qos_policy qosinfo;
 473
 474                 qosinfo.task_latency_qos_tier = info->timer_throttle;
 475                 qosinfo.task_throughput_qos_tier = info->throughput_qos;
 476
 477                 kern_return_t kr = task_qos_policy_validate(&qosinfo, TASK_QOS_POLICY_COUNT);
 478
 479                 if (kr != KERN_SUCCESS) {
 480                         return kr;
 481                 }
 482
 483                 /* TEMPORARY disablement of task suppression */
 484                 if (info->active &&
 485                     (task_policy_suppression_flags & TASK_POLICY_SUPPRESSION_DISABLE)) {
 486                         return KERN_SUCCESS;
 487                 }
 488
 489                 struct task_pend_token pend_token = {};
 490
 491                 task_lock(task);
 492
 493                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 494                     (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_START,
 495                     proc_selfpid(), task_pid(task), trequested_0(task),
 496                     trequested_1(task), 0);
 497
 498                 task->requested_policy.trp_sup_active      = (info->active)         ? 1 : 0;
 499                 task->requested_policy.trp_sup_lowpri_cpu  = (info->lowpri_cpu)     ? 1 : 0;
 500                 task->requested_policy.trp_sup_timer       = qos_extract(info->timer_throttle);
 501                 task->requested_policy.trp_sup_disk        = (info->disk_throttle)  ? 1 : 0;
 502                 task->requested_policy.trp_sup_throughput  = qos_extract(info->throughput_qos);
 503                 task->requested_policy.trp_sup_cpu         = (info->suppressed_cpu) ? 1 : 0;
 504                 task->requested_policy.trp_sup_bg_sockets  = (info->background_sockets) ? 1 : 0;
 505
 506                 task_policy_update_locked(task, &pend_token);
 507
 508                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 509                     (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_END,
 510                     proc_selfpid(), task_pid(task), trequested_0(task),
 511                     trequested_1(task), 0);
 512
 513                 task_unlock(task);
 514
 515                 task_policy_update_complete_unlocked(task, &pend_token);
 516
 517                 break;
 518
 519 #endif /* CONFIG_EMBEDDED */
 520         }
 521
 522         default:
 523                 result = KERN_INVALID_ARGUMENT;
 524                 break;
 525         }
 526
 527         return result;
 528 }
 529
 530 /* Sets BSD 'nice' value on the task */
 531 kern_return_t
 532 task_importance(
 533         task_t                          task,
 534         integer_t                       importance)
 535 {
 536         if (task == TASK_NULL || task == kernel_task) {
 537                 return KERN_INVALID_ARGUMENT;
 538         }
 539
 540         task_lock(task);
 541
 542         if (!task->active) {
 543                 task_unlock(task);
 544
 545                 return KERN_TERMINATED;
 546         }
 547
 548         if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) >= TASK_CONTROL_APPLICATION) {
 549                 task_unlock(task);
 550
 551                 return KERN_INVALID_ARGUMENT;
 552         }
 553
 554         task->importance = importance;
 555
 556         struct task_pend_token pend_token = {};
 557
 558         task_policy_update_locked(task, &pend_token);
 559
 560         task_unlock(task);
 561
 562         task_policy_update_complete_unlocked(task, &pend_token);
 563
 564         return KERN_SUCCESS;
 565 }
 566
 567 kern_return_t
 568 task_policy_get(
 569         task_t                                  task,
 570         task_policy_flavor_t    flavor,
 571         task_policy_t                   policy_info,
 572         mach_msg_type_number_t  *count,
 573         boolean_t                               *get_default)
 574 {
 575         if (task == TASK_NULL || task == kernel_task) {
 576                 return KERN_INVALID_ARGUMENT;
 577         }
 578
 579         switch (flavor) {
 580         case TASK_CATEGORY_POLICY:
 581         {
 582                 task_category_policy_t          info = (task_category_policy_t)policy_info;
 583
 584                 if (*count < TASK_CATEGORY_POLICY_COUNT) {
 585                         return KERN_INVALID_ARGUMENT;
 586                 }
 587
 588                 if (*get_default) {
 589                         info->role = TASK_UNSPECIFIED;
 590                 } else {
 591                         info->role = proc_get_task_policy(task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE);
 592                 }
 593                 break;
 594         }
 595
 596         case TASK_BASE_QOS_POLICY: /* FALLTHRU */
 597         case TASK_OVERRIDE_QOS_POLICY:
 598         {
 599                 task_qos_policy_t info = (task_qos_policy_t)policy_info;
 600
 601                 if (*count < TASK_QOS_POLICY_COUNT) {
 602                         return KERN_INVALID_ARGUMENT;
 603                 }
 604
 605                 if (*get_default) {
 606                         info->task_latency_qos_tier = LATENCY_QOS_TIER_UNSPECIFIED;
 607                         info->task_throughput_qos_tier = THROUGHPUT_QOS_TIER_UNSPECIFIED;
 608                 } else if (flavor == TASK_BASE_QOS_POLICY) {
 609                         int value1, value2;
 610
 611                         proc_get_task_policy2(task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2);
 612
 613                         info->task_latency_qos_tier = qos_latency_policy_package(value1);
 614                         info->task_throughput_qos_tier = qos_throughput_policy_package(value2);
 615                 } else if (flavor == TASK_OVERRIDE_QOS_POLICY) {
 616                         int value1, value2;
 617
 618                         proc_get_task_policy2(task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2);
 619
 620                         info->task_latency_qos_tier = qos_latency_policy_package(value1);
 621                         info->task_throughput_qos_tier = qos_throughput_policy_package(value2);
 622                 }
 623
 624                 break;
 625         }
 626
 627         case TASK_POLICY_STATE:
 628         {
 629                 task_policy_state_t info = (task_policy_state_t)policy_info;
 630
 631                 if (*count < TASK_POLICY_STATE_COUNT) {
 632                         return KERN_INVALID_ARGUMENT;
 633                 }
 634
 635                 /* Only root can get this info */
 636                 if (current_task()->sec_token.val[0] != 0) {
 637                         return KERN_PROTECTION_FAILURE;
 638                 }
 639
 640                 if (*get_default) {
 641                         info->requested = 0;
 642                         info->effective = 0;
 643                         info->pending = 0;
 644                         info->imp_assertcnt = 0;
 645                         info->imp_externcnt = 0;
 646                         info->flags = 0;
 647                         info->imp_transitions = 0;
 648                 } else {
 649                         task_lock(task);
 650
 651                         info->requested = task_requested_bitfield(task);
 652                         info->effective = task_effective_bitfield(task);
 653                         info->pending   = 0;
 654
 655                         info->tps_requested_policy = *(uint64_t*)(&task->requested_policy);
 656                         info->tps_effective_policy = *(uint64_t*)(&task->effective_policy);
 657
 658                         info->flags = 0;
 659                         if (task->task_imp_base != NULL) {
 660                                 info->imp_assertcnt = task->task_imp_base->iit_assertcnt;
 661                                 info->imp_externcnt = IIT_EXTERN(task->task_imp_base);
 662                                 info->flags |= (task_is_marked_importance_receiver(task) ? TASK_IMP_RECEIVER : 0);
 663                                 info->flags |= (task_is_marked_importance_denap_receiver(task) ? TASK_DENAP_RECEIVER : 0);
 664                                 info->flags |= (task_is_marked_importance_donor(task) ? TASK_IMP_DONOR : 0);
 665                                 info->flags |= (task_is_marked_live_importance_donor(task) ? TASK_IMP_LIVE_DONOR : 0);
 666                                 info->imp_transitions = task->task_imp_base->iit_transitions;
 667                         } else {
 668                                 info->imp_assertcnt = 0;
 669                                 info->imp_externcnt = 0;
 670                                 info->imp_transitions = 0;
 671                         }
 672                         task_unlock(task);
 673                 }
 674
 675                 break;
 676         }
 677
 678         case TASK_SUPPRESSION_POLICY:
 679         {
 680                 task_suppression_policy_t info = (task_suppression_policy_t)policy_info;
 681
 682                 if (*count < TASK_SUPPRESSION_POLICY_COUNT) {
 683                         return KERN_INVALID_ARGUMENT;
 684                 }
 685
 686                 task_lock(task);
 687
 688                 if (*get_default) {
 689                         info->active            = 0;
 690                         info->lowpri_cpu        = 0;
 691                         info->timer_throttle    = LATENCY_QOS_TIER_UNSPECIFIED;
 692                         info->disk_throttle     = 0;
 693                         info->cpu_limit         = 0;
 694                         info->suspend           = 0;
 695                         info->throughput_qos    = 0;
 696                         info->suppressed_cpu    = 0;
 697                 } else {
 698                         info->active            = task->requested_policy.trp_sup_active;
 699                         info->lowpri_cpu        = task->requested_policy.trp_sup_lowpri_cpu;
 700                         info->timer_throttle    = qos_latency_policy_package(task->requested_policy.trp_sup_timer);
 701                         info->disk_throttle     = task->requested_policy.trp_sup_disk;
 702                         info->cpu_limit         = 0;
 703                         info->suspend           = 0;
 704                         info->throughput_qos    = qos_throughput_policy_package(task->requested_policy.trp_sup_throughput);
 705                         info->suppressed_cpu    = task->requested_policy.trp_sup_cpu;
 706                         info->background_sockets = task->requested_policy.trp_sup_bg_sockets;
 707                 }
 708
 709                 task_unlock(task);
 710                 break;
 711         }
 712
 713         default:
 714                 return KERN_INVALID_ARGUMENT;
 715         }
 716
 717         return KERN_SUCCESS;
 718 }
 719
 720 /*
 721  * Called at task creation
 722  * We calculate the correct effective but don't apply it to anything yet.
 723  * The threads, etc will inherit from the task as they get created.
 724  */
 725 void
 726 task_policy_create(task_t task, task_t parent_task)
 727 {
 728         task->requested_policy.trp_apptype          = parent_task->requested_policy.trp_apptype;
 729
 730         task->requested_policy.trp_int_darwinbg     = parent_task->requested_policy.trp_int_darwinbg;
 731         task->requested_policy.trp_ext_darwinbg     = parent_task->requested_policy.trp_ext_darwinbg;
 732         task->requested_policy.trp_int_iotier       = parent_task->requested_policy.trp_int_iotier;
 733         task->requested_policy.trp_ext_iotier       = parent_task->requested_policy.trp_ext_iotier;
 734         task->requested_policy.trp_int_iopassive    = parent_task->requested_policy.trp_int_iopassive;
 735         task->requested_policy.trp_ext_iopassive    = parent_task->requested_policy.trp_ext_iopassive;
 736         task->requested_policy.trp_bg_iotier        = parent_task->requested_policy.trp_bg_iotier;
 737         task->requested_policy.trp_terminated       = parent_task->requested_policy.trp_terminated;
 738         task->requested_policy.trp_qos_clamp        = parent_task->requested_policy.trp_qos_clamp;
 739
 740         if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE && !task_is_exec_copy(task)) {
 741                 /* Do not update the apptype for exec copy task */
 742                 if (parent_task->requested_policy.trp_boosted) {
 743                         task->requested_policy.trp_apptype = TASK_APPTYPE_DAEMON_INTERACTIVE;
 744                         task_importance_mark_donor(task, TRUE);
 745                 } else {
 746                         task->requested_policy.trp_apptype = TASK_APPTYPE_DAEMON_BACKGROUND;
 747                         task_importance_mark_receiver(task, FALSE);
 748                 }
 749         }
 750
 751         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 752             (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_START,
 753             task_pid(task), teffective_0(task),
 754             teffective_1(task), task->priority, 0);
 755
 756         task_policy_update_internal_locked(task, TRUE, NULL);
 757
 758         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 759             (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_END,
 760             task_pid(task), teffective_0(task),
 761             teffective_1(task), task->priority, 0);
 762
 763         task_importance_update_live_donor(task);
 764 }
 765
 766
 767 static void
 768 task_policy_update_locked(task_t task, task_pend_token_t pend_token)
 769 {
 770         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 771             (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_TASK) | DBG_FUNC_START),
 772             task_pid(task), teffective_0(task),
 773             teffective_1(task), task->priority, 0);
 774
 775         task_policy_update_internal_locked(task, FALSE, pend_token);
 776
 777         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
 778             (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_TASK)) | DBG_FUNC_END,
 779             task_pid(task), teffective_0(task),
 780             teffective_1(task), task->priority, 0);
 781 }
 782
 783 /*
 784  * One state update function TO RULE THEM ALL
 785  *
 786  * This function updates the task or thread effective policy fields
 787  * and pushes the results to the relevant subsystems.
 788  *
 789  * Must call update_complete after unlocking the task,
 790  * as some subsystems cannot be updated while holding the task lock.
 791  *
 792  * Called with task locked, not thread
 793  */
 794
 795 static void
 796 task_policy_update_internal_locked(task_t task, boolean_t in_create, task_pend_token_t pend_token)
 797 {
 798         /*
 799          * Step 1:
 800          *  Gather requested policy
 801          */
 802
 803         struct task_requested_policy requested = task->requested_policy;
 804
 805         /*
 806          * Step 2:
 807          *  Calculate new effective policies from requested policy and task state
 808          *  Rules:
 809          *      Don't change requested, it won't take effect
 810          */
 811
 812         struct task_effective_policy next = {};
 813
 814         /* Update task role */
 815         next.tep_role = requested.trp_role;
 816
 817         /* Set task qos clamp and ceiling */
 818         next.tep_qos_clamp = requested.trp_qos_clamp;
 819
 820         if (requested.trp_apptype == TASK_APPTYPE_APP_DEFAULT ||
 821             requested.trp_apptype == TASK_APPTYPE_APP_TAL) {
 822                 switch (next.tep_role) {
 823                 case TASK_FOREGROUND_APPLICATION:
 824                         /* Foreground apps get urgent scheduler priority */
 825                         next.tep_qos_ui_is_urgent = 1;
 826                         next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED;
 827                         break;
 828
 829                 case TASK_BACKGROUND_APPLICATION:
 830                         /* This is really 'non-focal but on-screen' */
 831                         next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED;
 832                         break;
 833
 834                 case TASK_DEFAULT_APPLICATION:
 835                         /* This is 'may render UI but we don't know if it's focal/nonfocal' */
 836                         next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED;
 837                         break;
 838
 839                 case TASK_NONUI_APPLICATION:
 840                         /* i.e. 'off-screen' */
 841                         next.tep_qos_ceiling = THREAD_QOS_LEGACY;
 842                         break;
 843
 844                 case TASK_CONTROL_APPLICATION:
 845                 case TASK_GRAPHICS_SERVER:
 846                         next.tep_qos_ui_is_urgent = 1;
 847                         next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED;
 848                         break;
 849
 850                 case TASK_THROTTLE_APPLICATION:
 851                         /* i.e. 'TAL launch' */
 852                         next.tep_qos_ceiling = THREAD_QOS_UTILITY;
 853                         break;
 854
 855                 case TASK_DARWINBG_APPLICATION:
 856                         /* i.e. 'DARWIN_BG throttled background application' */
 857                         next.tep_qos_ceiling = THREAD_QOS_BACKGROUND;
 858                         break;
 859
 860                 case TASK_UNSPECIFIED:
 861                 default:
 862                         /* Apps that don't have an application role get
 863                          * USER_INTERACTIVE and USER_INITIATED squashed to LEGACY */
 864                         next.tep_qos_ceiling = THREAD_QOS_LEGACY;
 865                         break;
 866                 }
 867         } else {
 868                 /* Daemons get USER_INTERACTIVE squashed to USER_INITIATED */
 869                 next.tep_qos_ceiling = THREAD_QOS_USER_INITIATED;
 870         }
 871
 872         /* Calculate DARWIN_BG */
 873         boolean_t wants_darwinbg        = FALSE;
 874         boolean_t wants_all_sockets_bg  = FALSE; /* Do I want my existing sockets to be bg */
 875         boolean_t wants_watchersbg      = FALSE; /* Do I want my pidbound threads to be bg */
 876
 877         /*
 878          * If DARWIN_BG has been requested at either level, it's engaged.
 879          * Only true DARWIN_BG changes cause watchers to transition.
 880          *
 881          * Backgrounding due to apptype does.
 882          */
 883         if (requested.trp_int_darwinbg || requested.trp_ext_darwinbg ||
 884             next.tep_role == TASK_DARWINBG_APPLICATION) {
 885                 wants_watchersbg = wants_all_sockets_bg = wants_darwinbg = TRUE;
 886         }
 887
 888         /*
 889          * Deprecated TAL implementation for TAL apptype
 890          * Background TAL apps are throttled when TAL is enabled
 891          */
 892         if (requested.trp_apptype == TASK_APPTYPE_APP_TAL &&
 893             requested.trp_role == TASK_BACKGROUND_APPLICATION &&
 894             requested.trp_tal_enabled == 1) {
 895                 next.tep_tal_engaged = 1;
 896         }
 897
 898         /* New TAL implementation based on TAL role alone, works for all apps */
 899         if ((requested.trp_apptype == TASK_APPTYPE_APP_DEFAULT ||
 900             requested.trp_apptype == TASK_APPTYPE_APP_TAL) &&
 901             requested.trp_role == TASK_THROTTLE_APPLICATION) {
 902                 next.tep_tal_engaged = 1;
 903         }
 904
 905         /* Adaptive daemons are DARWIN_BG unless boosted, and don't get network throttled. */
 906         if (requested.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE &&
 907             requested.trp_boosted == 0) {
 908                 wants_darwinbg = TRUE;
 909         }
 910
 911         /* Background daemons are always DARWIN_BG, no exceptions, and don't get network throttled. */
 912         if (requested.trp_apptype == TASK_APPTYPE_DAEMON_BACKGROUND) {
 913                 wants_darwinbg = TRUE;
 914         }
 915
 916         if (next.tep_qos_clamp == THREAD_QOS_BACKGROUND || next.tep_qos_clamp == THREAD_QOS_MAINTENANCE) {
 917                 wants_darwinbg = TRUE;
 918         }
 919
 920         /* Calculate side effects of DARWIN_BG */
 921
 922         if (wants_darwinbg) {
 923                 next.tep_darwinbg = 1;
 924                 /* darwinbg tasks always create bg sockets, but we don't always loop over all sockets */
 925                 next.tep_new_sockets_bg = 1;
 926                 next.tep_lowpri_cpu = 1;
 927         }
 928
 929         if (wants_all_sockets_bg) {
 930                 next.tep_all_sockets_bg = 1;
 931         }
 932
 933         if (wants_watchersbg) {
 934                 next.tep_watchers_bg = 1;
 935         }
 936
 937         /* Calculate low CPU priority */
 938
 939         boolean_t wants_lowpri_cpu = FALSE;
 940
 941         if (wants_darwinbg) {
 942                 wants_lowpri_cpu = TRUE;
 943         }
 944
 945         if (next.tep_tal_engaged) {
 946                 wants_lowpri_cpu = TRUE;
 947         }
 948
 949         if (requested.trp_sup_lowpri_cpu && requested.trp_boosted == 0) {
 950                 wants_lowpri_cpu = TRUE;
 951         }
 952
 953         if (wants_lowpri_cpu) {
 954                 next.tep_lowpri_cpu = 1;
 955         }
 956
 957         /* Calculate IO policy */
 958
 959         /* Update BG IO policy (so we can see if it has changed) */
 960         next.tep_bg_iotier = requested.trp_bg_iotier;
 961
 962         int iopol = THROTTLE_LEVEL_TIER0;
 963
 964         if (wants_darwinbg) {
 965                 iopol = MAX(iopol, requested.trp_bg_iotier);
 966         }
 967
 968         if (requested.trp_apptype == TASK_APPTYPE_DAEMON_STANDARD) {
 969                 iopol = MAX(iopol, proc_standard_daemon_tier);
 970         }
 971
 972         if (requested.trp_sup_disk && requested.trp_boosted == 0) {
 973                 iopol = MAX(iopol, proc_suppressed_disk_tier);
 974         }
 975
 976         if (next.tep_tal_engaged) {
 977                 iopol = MAX(iopol, proc_tal_disk_tier);
 978         }
 979
 980         if (next.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
 981                 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.tep_qos_clamp]);
 982         }
 983
 984         iopol = MAX(iopol, requested.trp_int_iotier);
 985         iopol = MAX(iopol, requested.trp_ext_iotier);
 986
 987         next.tep_io_tier = iopol;
 988
 989         /* Calculate Passive IO policy */
 990
 991         if (requested.trp_ext_iopassive || requested.trp_int_iopassive) {
 992                 next.tep_io_passive = 1;
 993         }
 994
 995         /* Calculate suppression-active flag */
 996         boolean_t appnap_transition = FALSE;
 997
 998         if (requested.trp_sup_active && requested.trp_boosted == 0) {
 999                 next.tep_sup_active = 1;
1000         }
1001
1002         if (task->effective_policy.tep_sup_active != next.tep_sup_active) {
1003                 appnap_transition = TRUE;
1004         }
1005
1006         /* Calculate timer QOS */
1007         int latency_qos = requested.trp_base_latency_qos;
1008
1009         if (requested.trp_sup_timer && requested.trp_boosted == 0) {
1010                 latency_qos = requested.trp_sup_timer;
1011         }
1012
1013         if (next.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1014                 latency_qos = MAX(latency_qos, (int)thread_qos_policy_params.qos_latency_qos[next.tep_qos_clamp]);
1015         }
1016
1017         if (requested.trp_over_latency_qos != 0) {
1018                 latency_qos = requested.trp_over_latency_qos;
1019         }
1020
1021         /* Treat the windowserver special */
1022         if (requested.trp_role == TASK_GRAPHICS_SERVER) {
1023                 latency_qos = proc_graphics_timer_qos;
1024         }
1025
1026         next.tep_latency_qos = latency_qos;
1027
1028         /* Calculate throughput QOS */
1029         int through_qos = requested.trp_base_through_qos;
1030
1031         if (requested.trp_sup_throughput && requested.trp_boosted == 0) {
1032                 through_qos = requested.trp_sup_throughput;
1033         }
1034
1035         if (next.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1036                 through_qos = MAX(through_qos, (int)thread_qos_policy_params.qos_through_qos[next.tep_qos_clamp]);
1037         }
1038
1039         if (requested.trp_over_through_qos != 0) {
1040                 through_qos = requested.trp_over_through_qos;
1041         }
1042
1043         next.tep_through_qos = through_qos;
1044
1045         /* Calculate suppressed CPU priority */
1046         if (requested.trp_sup_cpu && requested.trp_boosted == 0) {
1047                 next.tep_suppressed_cpu = 1;
1048         }
1049
1050         /*
1051          * Calculate background sockets
1052          * Don't take into account boosting to limit transition frequency.
1053          */
1054         if (requested.trp_sup_bg_sockets) {
1055                 next.tep_all_sockets_bg = 1;
1056                 next.tep_new_sockets_bg = 1;
1057         }
1058
1059         /* Apply SFI Managed class bit */
1060         next.tep_sfi_managed = requested.trp_sfi_managed;
1061
1062         /* Calculate 'live donor' status for live importance */
1063         switch (requested.trp_apptype) {
1064         case TASK_APPTYPE_APP_TAL:
1065         case TASK_APPTYPE_APP_DEFAULT:
1066                 if (requested.trp_ext_darwinbg == 1 ||
1067                     (next.tep_sup_active == 1 &&
1068                     (task_policy_suppression_flags & TASK_POLICY_SUPPRESSION_NONDONOR)) ||
1069                     next.tep_role == TASK_DARWINBG_APPLICATION) {
1070                         next.tep_live_donor = 0;
1071                 } else {
1072                         next.tep_live_donor = 1;
1073                 }
1074                 break;
1075
1076         case TASK_APPTYPE_DAEMON_INTERACTIVE:
1077         case TASK_APPTYPE_DAEMON_STANDARD:
1078         case TASK_APPTYPE_DAEMON_ADAPTIVE:
1079         case TASK_APPTYPE_DAEMON_BACKGROUND:
1080         default:
1081                 next.tep_live_donor = 0;
1082                 break;
1083         }
1084
1085         if (requested.trp_terminated) {
1086                 /*
1087                  * Shoot down the throttles that slow down exit or response to SIGTERM
1088                  * We don't need to shoot down:
1089                  * passive        (don't want to cause others to throttle)
1090                  * all_sockets_bg (don't need to iterate FDs on every exit)
1091                  * new_sockets_bg (doesn't matter for exiting process)
1092                  * pidsuspend     (jetsam-ed BG process shouldn't run again)
1093                  * watchers_bg    (watcher threads don't need to be unthrottled)
1094                  * latency_qos    (affects userspace timers only)
1095                  */
1096
1097                 next.tep_terminated     = 1;
1098                 next.tep_darwinbg       = 0;
1099                 next.tep_lowpri_cpu     = 0;
1100                 next.tep_io_tier        = THROTTLE_LEVEL_TIER0;
1101                 next.tep_tal_engaged    = 0;
1102                 next.tep_role           = TASK_UNSPECIFIED;
1103                 next.tep_suppressed_cpu = 0;
1104         }
1105
1106         /*
1107          * Step 3:
1108          *  Swap out old policy for new policy
1109          */
1110
1111         struct task_effective_policy prev = task->effective_policy;
1112
1113         /* This is the point where the new values become visible to other threads */
1114         task->effective_policy = next;
1115
1116         /* Don't do anything further to a half-formed task */
1117         if (in_create) {
1118                 return;
1119         }
1120
1121         if (task == kernel_task) {
1122                 panic("Attempting to set task policy on kernel_task");
1123         }
1124
1125         /*
1126          * Step 4:
1127          *  Pend updates that can't be done while holding the task lock
1128          */
1129
1130         if (prev.tep_all_sockets_bg != next.tep_all_sockets_bg) {
1131                 pend_token->tpt_update_sockets = 1;
1132         }
1133
1134         /* Only re-scan the timer list if the qos level is getting less strong */
1135         if (prev.tep_latency_qos > next.tep_latency_qos) {
1136                 pend_token->tpt_update_timers = 1;
1137         }
1138
1139 #if CONFIG_EMBEDDED
1140         if (prev.tep_watchers_bg != next.tep_watchers_bg) {
1141                 pend_token->tpt_update_watchers = 1;
1142         }
1143 #endif /* CONFIG_EMBEDDED */
1144
1145         if (prev.tep_live_donor != next.tep_live_donor) {
1146                 pend_token->tpt_update_live_donor = 1;
1147         }
1148
1149         /*
1150          * Step 5:
1151          *  Update other subsystems as necessary if something has changed
1152          */
1153
1154         boolean_t update_threads = FALSE, update_sfi = FALSE;
1155
1156         /*
1157          * Check for the attributes that thread_policy_update_internal_locked() consults,
1158          *  and trigger thread policy re-evaluation.
1159          */
1160         if (prev.tep_io_tier != next.tep_io_tier ||
1161             prev.tep_bg_iotier != next.tep_bg_iotier ||
1162             prev.tep_io_passive != next.tep_io_passive ||
1163             prev.tep_darwinbg != next.tep_darwinbg ||
1164             prev.tep_qos_clamp != next.tep_qos_clamp ||
1165             prev.tep_qos_ceiling != next.tep_qos_ceiling ||
1166             prev.tep_qos_ui_is_urgent != next.tep_qos_ui_is_urgent ||
1167             prev.tep_latency_qos != next.tep_latency_qos ||
1168             prev.tep_through_qos != next.tep_through_qos ||
1169             prev.tep_lowpri_cpu != next.tep_lowpri_cpu ||
1170             prev.tep_new_sockets_bg != next.tep_new_sockets_bg ||
1171             prev.tep_terminated != next.tep_terminated) {
1172                 update_threads = TRUE;
1173         }
1174
1175         /*
1176          * Check for the attributes that sfi_thread_classify() consults,
1177          *  and trigger SFI re-evaluation.
1178          */
1179         if (prev.tep_latency_qos != next.tep_latency_qos ||
1180             prev.tep_role != next.tep_role ||
1181             prev.tep_sfi_managed != next.tep_sfi_managed) {
1182                 update_sfi = TRUE;
1183         }
1184
1185         /* Reflect task role transitions into the coalition role counters */
1186         if (prev.tep_role != next.tep_role) {
1187                 if (task_policy_update_coalition_focal_tasks(task, prev.tep_role, next.tep_role, pend_token)) {
1188                         update_sfi = TRUE;
1189                 }
1190         }
1191
1192         boolean_t update_priority = FALSE;
1193
1194         int priority     = BASEPRI_DEFAULT;
1195         int max_priority = MAXPRI_USER;
1196
1197         if (next.tep_lowpri_cpu) {
1198                 priority = MAXPRI_THROTTLE;
1199                 max_priority = MAXPRI_THROTTLE;
1200         } else if (next.tep_suppressed_cpu) {
1201                 priority = MAXPRI_SUPPRESSED;
1202                 max_priority = MAXPRI_SUPPRESSED;
1203         } else {
1204                 switch (next.tep_role) {
1205                 case TASK_CONTROL_APPLICATION:
1206                         priority = BASEPRI_CONTROL;
1207                         break;
1208                 case TASK_GRAPHICS_SERVER:
1209                         priority = BASEPRI_GRAPHICS;
1210                         max_priority = MAXPRI_RESERVED;
1211                         break;
1212                 default:
1213                         break;
1214                 }
1215
1216                 /* factor in 'nice' value */
1217                 priority += task->importance;
1218
1219                 if (task->effective_policy.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1220                         int qos_clamp_priority = thread_qos_policy_params.qos_pri[task->effective_policy.tep_qos_clamp];
1221
1222                         priority        = MIN(priority, qos_clamp_priority);
1223                         max_priority    = MIN(max_priority, qos_clamp_priority);
1224                 }
1225
1226                 if (priority > max_priority) {
1227                         priority = max_priority;
1228                 } else if (priority < MINPRI) {
1229                         priority = MINPRI;
1230                 }
1231         }
1232
1233         assert(priority <= max_priority);
1234
1235         /* avoid extra work if priority isn't changing */
1236         if (priority != task->priority ||
1237             max_priority != task->max_priority) {
1238                 /* update the scheduling priority for the task */
1239                 task->max_priority  = max_priority;
1240                 task->priority      = priority;
1241                 update_priority     = TRUE;
1242         }
1243
1244         /* Loop over the threads in the task:
1245          * only once
1246          * only if necessary
1247          * with one thread mutex hold per thread
1248          */
1249         if (update_threads || update_priority || update_sfi) {
1250                 thread_t thread;
1251
1252                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1253                         struct task_pend_token thread_pend_token = {};
1254
1255                         if (update_sfi) {
1256                                 thread_pend_token.tpt_update_thread_sfi = 1;
1257                         }
1258
1259                         if (update_priority || update_threads) {
1260                                 thread_policy_update_tasklocked(thread,
1261                                     task->priority, task->max_priority,
1262                                     &thread_pend_token);
1263                         }
1264
1265                         assert(!thread_pend_token.tpt_update_sockets);
1266
1267                         // Slightly risky, as we still hold the task lock...
1268                         thread_policy_update_complete_unlocked(thread, &thread_pend_token);
1269                 }
1270         }
1271
1272         /*
1273          * Use the app-nap transitions to influence the
1274          * transition of the process within the jetsam band
1275          * [and optionally its live-donor status]
1276          * On macOS only.
1277          */
1278         if (appnap_transition == TRUE) {
1279                 if (task->effective_policy.tep_sup_active == 1) {
1280                         memorystatus_update_priority_for_appnap(((proc_t) task->bsd_info), TRUE);
1281                 } else {
1282                         memorystatus_update_priority_for_appnap(((proc_t) task->bsd_info), FALSE);
1283                 }
1284         }
1285 }
1286
1287
1288 /*
1289  * Yet another layering violation. We reach out and bang on the coalition directly.
1290  */
1291 static boolean_t
1292 task_policy_update_coalition_focal_tasks(task_t            task,
1293     int               prev_role,
1294     int               next_role,
1295     task_pend_token_t pend_token)
1296 {
1297         boolean_t sfi_transition = FALSE;
1298         uint32_t new_count = 0;
1299
1300         /* task moving into/out-of the foreground */
1301         if (prev_role != TASK_FOREGROUND_APPLICATION && next_role == TASK_FOREGROUND_APPLICATION) {
1302                 if (task_coalition_adjust_focal_count(task, 1, &new_count) && (new_count == 1)) {
1303                         sfi_transition = TRUE;
1304                         pend_token->tpt_update_tg_ui_flag = TRUE;
1305                 }
1306         } else if (prev_role == TASK_FOREGROUND_APPLICATION && next_role != TASK_FOREGROUND_APPLICATION) {
1307                 if (task_coalition_adjust_focal_count(task, -1, &new_count) && (new_count == 0)) {
1308                         sfi_transition = TRUE;
1309                         pend_token->tpt_update_tg_ui_flag = TRUE;
1310                 }
1311         }
1312
1313         /* task moving into/out-of background */
1314         if (prev_role != TASK_BACKGROUND_APPLICATION && next_role == TASK_BACKGROUND_APPLICATION) {
1315                 if (task_coalition_adjust_nonfocal_count(task, 1, &new_count) && (new_count == 1)) {
1316                         sfi_transition = TRUE;
1317                 }
1318         } else if (prev_role == TASK_BACKGROUND_APPLICATION && next_role != TASK_BACKGROUND_APPLICATION) {
1319                 if (task_coalition_adjust_nonfocal_count(task, -1, &new_count) && (new_count == 0)) {
1320                         sfi_transition = TRUE;
1321                 }
1322         }
1323
1324         if (sfi_transition) {
1325                 pend_token->tpt_update_coal_sfi = 1;
1326         }
1327         return sfi_transition;
1328 }
1329
1330 #if CONFIG_SCHED_SFI
1331
1332 /* coalition object is locked */
1333 static void
1334 task_sfi_reevaluate_cb(coalition_t coal, void *ctx, task_t task)
1335 {
1336         thread_t thread;
1337
1338         /* unused for now */
1339         (void)coal;
1340
1341         /* skip the task we're re-evaluating on behalf of: it's already updated */
1342         if (task == (task_t)ctx) {
1343                 return;
1344         }
1345
1346         task_lock(task);
1347
1348         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1349                 sfi_reevaluate(thread);
1350         }
1351
1352         task_unlock(task);
1353 }
1354 #endif /* CONFIG_SCHED_SFI */
1355
1356 /*
1357  * Called with task unlocked to do things that can't be done while holding the task lock
1358  */
1359 void
1360 task_policy_update_complete_unlocked(task_t task, task_pend_token_t pend_token)
1361 {
1362 #ifdef MACH_BSD
1363         if (pend_token->tpt_update_sockets) {
1364                 proc_apply_task_networkbg(task->bsd_info, THREAD_NULL);
1365         }
1366 #endif /* MACH_BSD */
1367
1368         /* The timer throttle has been removed or reduced, we need to look for expired timers and fire them */
1369         if (pend_token->tpt_update_timers) {
1370                 ml_timer_evaluate();
1371         }
1372
1373 #if CONFIG_EMBEDDED
1374         if (pend_token->tpt_update_watchers) {
1375                 apply_appstate_watchers(task);
1376         }
1377 #endif /* CONFIG_EMBEDDED */
1378
1379         if (pend_token->tpt_update_live_donor) {
1380                 task_importance_update_live_donor(task);
1381         }
1382
1383 #if CONFIG_SCHED_SFI
1384         /* use the resource coalition for SFI re-evaluation */
1385         if (pend_token->tpt_update_coal_sfi) {
1386                 coalition_for_each_task(task->coalition[COALITION_TYPE_RESOURCE],
1387                     (void *)task, task_sfi_reevaluate_cb);
1388         }
1389 #endif /* CONFIG_SCHED_SFI */
1390
1391 }
1392
1393 /*
1394  * Initiate a task policy state transition
1395  *
1396  * Everything that modifies requested except functions that need to hold the task lock
1397  * should use this function
1398  *
1399  * Argument validation should be performed before reaching this point.
1400  *
1401  * TODO: Do we need to check task->active?
1402  */
1403 void
1404 proc_set_task_policy(task_t     task,
1405     int        category,
1406     int        flavor,
1407     int        value)
1408 {
1409         struct task_pend_token pend_token = {};
1410
1411         task_lock(task);
1412
1413         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1414             (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_TASK))) | DBG_FUNC_START,
1415             task_pid(task), trequested_0(task),
1416             trequested_1(task), value, 0);
1417
1418         proc_set_task_policy_locked(task, category, flavor, value, 0);
1419
1420         task_policy_update_locked(task, &pend_token);
1421
1422
1423         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1424             (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_TASK))) | DBG_FUNC_END,
1425             task_pid(task), trequested_0(task),
1426             trequested_1(task), tpending(&pend_token), 0);
1427
1428         task_unlock(task);
1429
1430         task_policy_update_complete_unlocked(task, &pend_token);
1431 }
1432
1433 /*
1434  * Variant of proc_set_task_policy() that sets two scalars in the requested policy structure.
1435  * Same locking rules apply.
1436  */
1437 void
1438 proc_set_task_policy2(task_t    task,
1439     int       category,
1440     int       flavor,
1441     int       value,
1442     int       value2)
1443 {
1444         struct task_pend_token pend_token = {};
1445
1446         task_lock(task);
1447
1448         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1449             (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_TASK))) | DBG_FUNC_START,
1450             task_pid(task), trequested_0(task),
1451             trequested_1(task), value, 0);
1452
1453         proc_set_task_policy_locked(task, category, flavor, value, value2);
1454
1455         task_policy_update_locked(task, &pend_token);
1456
1457         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1458             (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_TASK))) | DBG_FUNC_END,
1459             task_pid(task), trequested_0(task),
1460             trequested_1(task), tpending(&pend_token), 0);
1461
1462         task_unlock(task);
1463
1464         task_policy_update_complete_unlocked(task, &pend_token);
1465 }
1466
1467 /*
1468  * Set the requested state for a specific flavor to a specific value.
1469  *
1470  *  TODO:
1471  *  Verify that arguments to non iopol things are 1 or 0
1472  */
1473 static void
1474 proc_set_task_policy_locked(task_t      task,
1475     int         category,
1476     int         flavor,
1477     int         value,
1478     int         value2)
1479 {
1480         int tier, passive;
1481
1482         struct task_requested_policy requested = task->requested_policy;
1483
1484         switch (flavor) {
1485         /* Category: EXTERNAL and INTERNAL */
1486
1487         case TASK_POLICY_DARWIN_BG:
1488                 if (category == TASK_POLICY_EXTERNAL) {
1489                         requested.trp_ext_darwinbg = value;
1490                 } else {
1491                         requested.trp_int_darwinbg = value;
1492                 }
1493                 break;
1494
1495         case TASK_POLICY_IOPOL:
1496                 proc_iopol_to_tier(value, &tier, &passive);
1497                 if (category == TASK_POLICY_EXTERNAL) {
1498                         requested.trp_ext_iotier  = tier;
1499                         requested.trp_ext_iopassive = passive;
1500                 } else {
1501                         requested.trp_int_iotier  = tier;
1502                         requested.trp_int_iopassive = passive;
1503                 }
1504                 break;
1505
1506         case TASK_POLICY_IO:
1507                 if (category == TASK_POLICY_EXTERNAL) {
1508                         requested.trp_ext_iotier = value;
1509                 } else {
1510                         requested.trp_int_iotier = value;
1511                 }
1512                 break;
1513
1514         case TASK_POLICY_PASSIVE_IO:
1515                 if (category == TASK_POLICY_EXTERNAL) {
1516                         requested.trp_ext_iopassive = value;
1517                 } else {
1518                         requested.trp_int_iopassive = value;
1519                 }
1520                 break;
1521
1522         /* Category: INTERNAL */
1523
1524         case TASK_POLICY_DARWIN_BG_IOPOL:
1525                 assert(category == TASK_POLICY_INTERNAL);
1526                 proc_iopol_to_tier(value, &tier, &passive);
1527                 requested.trp_bg_iotier = tier;
1528                 break;
1529
1530         /* Category: ATTRIBUTE */
1531
1532         case TASK_POLICY_TAL:
1533                 assert(category == TASK_POLICY_ATTRIBUTE);
1534                 requested.trp_tal_enabled = value;
1535                 break;
1536
1537         case TASK_POLICY_BOOST:
1538                 assert(category == TASK_POLICY_ATTRIBUTE);
1539                 requested.trp_boosted = value;
1540                 break;
1541
1542         case TASK_POLICY_ROLE:
1543                 assert(category == TASK_POLICY_ATTRIBUTE);
1544                 requested.trp_role = value;
1545                 break;
1546
1547         case TASK_POLICY_TERMINATED:
1548                 assert(category == TASK_POLICY_ATTRIBUTE);
1549                 requested.trp_terminated = value;
1550                 break;
1551
1552         case TASK_BASE_LATENCY_QOS_POLICY:
1553                 assert(category == TASK_POLICY_ATTRIBUTE);
1554                 requested.trp_base_latency_qos = value;
1555                 break;
1556
1557         case TASK_BASE_THROUGHPUT_QOS_POLICY:
1558                 assert(category == TASK_POLICY_ATTRIBUTE);
1559                 requested.trp_base_through_qos = value;
1560                 break;
1561
1562         case TASK_POLICY_SFI_MANAGED:
1563                 assert(category == TASK_POLICY_ATTRIBUTE);
1564                 requested.trp_sfi_managed = value;
1565                 break;
1566
1567         case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS:
1568                 assert(category == TASK_POLICY_ATTRIBUTE);
1569                 requested.trp_base_latency_qos = value;
1570                 requested.trp_base_through_qos = value2;
1571                 break;
1572
1573         case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS:
1574                 assert(category == TASK_POLICY_ATTRIBUTE);
1575                 requested.trp_over_latency_qos = value;
1576                 requested.trp_over_through_qos = value2;
1577                 break;
1578
1579         default:
1580                 panic("unknown task policy: %d %d %d %d", category, flavor, value, value2);
1581                 break;
1582         }
1583
1584         task->requested_policy = requested;
1585 }
1586
1587 /*
1588  * Gets what you set. Effective values may be different.
1589  */
1590 int
1591 proc_get_task_policy(task_t     task,
1592     int        category,
1593     int        flavor)
1594 {
1595         int value = 0;
1596
1597         task_lock(task);
1598
1599         struct task_requested_policy requested = task->requested_policy;
1600
1601         switch (flavor) {
1602         case TASK_POLICY_DARWIN_BG:
1603                 if (category == TASK_POLICY_EXTERNAL) {
1604                         value = requested.trp_ext_darwinbg;
1605                 } else {
1606                         value = requested.trp_int_darwinbg;
1607                 }
1608                 break;
1609         case TASK_POLICY_IOPOL:
1610                 if (category == TASK_POLICY_EXTERNAL) {
1611                         value = proc_tier_to_iopol(requested.trp_ext_iotier,
1612                             requested.trp_ext_iopassive);
1613                 } else {
1614                         value = proc_tier_to_iopol(requested.trp_int_iotier,
1615                             requested.trp_int_iopassive);
1616                 }
1617                 break;
1618         case TASK_POLICY_IO:
1619                 if (category == TASK_POLICY_EXTERNAL) {
1620                         value = requested.trp_ext_iotier;
1621                 } else {
1622                         value = requested.trp_int_iotier;
1623                 }
1624                 break;
1625         case TASK_POLICY_PASSIVE_IO:
1626                 if (category == TASK_POLICY_EXTERNAL) {
1627                         value = requested.trp_ext_iopassive;
1628                 } else {
1629                         value = requested.trp_int_iopassive;
1630                 }
1631                 break;
1632         case TASK_POLICY_DARWIN_BG_IOPOL:
1633                 assert(category == TASK_POLICY_ATTRIBUTE);
1634                 value = proc_tier_to_iopol(requested.trp_bg_iotier, 0);
1635                 break;
1636         case TASK_POLICY_ROLE:
1637                 assert(category == TASK_POLICY_ATTRIBUTE);
1638                 value = requested.trp_role;
1639                 break;
1640         case TASK_POLICY_SFI_MANAGED:
1641                 assert(category == TASK_POLICY_ATTRIBUTE);
1642                 value = requested.trp_sfi_managed;
1643                 break;
1644         default:
1645                 panic("unknown policy_flavor %d", flavor);
1646                 break;
1647         }
1648
1649         task_unlock(task);
1650
1651         return value;
1652 }
1653
1654 /*
1655  * Variant of proc_get_task_policy() that returns two scalar outputs.
1656  */
1657 void
1658 proc_get_task_policy2(task_t task,
1659     __assert_only int category,
1660     int flavor,
1661     int *value1,
1662     int *value2)
1663 {
1664         task_lock(task);
1665
1666         struct task_requested_policy requested = task->requested_policy;
1667
1668         switch (flavor) {
1669         case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS:
1670                 assert(category == TASK_POLICY_ATTRIBUTE);
1671                 *value1 = requested.trp_base_latency_qos;
1672                 *value2 = requested.trp_base_through_qos;
1673                 break;
1674
1675         case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS:
1676                 assert(category == TASK_POLICY_ATTRIBUTE);
1677                 *value1 = requested.trp_over_latency_qos;
1678                 *value2 = requested.trp_over_through_qos;
1679                 break;
1680
1681         default:
1682                 panic("unknown policy_flavor %d", flavor);
1683                 break;
1684         }
1685
1686         task_unlock(task);
1687 }
1688
1689 /*
1690  * Function for querying effective state for relevant subsystems
1691  * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
1692  *
1693  * ONLY the relevant subsystem should query this.
1694  * NEVER take a value from the 'effective' function and stuff it into a setter.
1695  *
1696  * NOTE: This accessor does not take the task lock.
1697  * Notifications of state updates need to be externally synchronized with state queries.
1698  * This routine *MUST* remain interrupt safe, as it is potentially invoked
1699  * within the context of a timer interrupt.  It is also called in KDP context for stackshot.
1700  */
1701 int
1702 proc_get_effective_task_policy(task_t   task,
1703     int      flavor)
1704 {
1705         int value = 0;
1706
1707         switch (flavor) {
1708         case TASK_POLICY_DARWIN_BG:
1709                 /*
1710                  * This backs the KPI call proc_pidbackgrounded to find
1711                  * out if a pid is backgrounded.
1712                  * It is used to communicate state to the VM system, as well as
1713                  * prioritizing requests to the graphics system.
1714                  * Returns 1 for background mode, 0 for normal mode
1715                  */
1716                 value = task->effective_policy.tep_darwinbg;
1717                 break;
1718         case TASK_POLICY_ALL_SOCKETS_BG:
1719                 /*
1720                  * do_background_socket() calls this to determine what it should do to the proc's sockets
1721                  * Returns 1 for background mode, 0 for normal mode
1722                  *
1723                  * This consults both thread and task so un-DBGing a thread while the task is BG
1724                  * doesn't get you out of the network throttle.
1725                  */
1726                 value = task->effective_policy.tep_all_sockets_bg;
1727                 break;
1728         case TASK_POLICY_SUP_ACTIVE:
1729                 /*
1730                  * Is the task in AppNap? This is used to determine the urgency
1731                  * that's passed to the performance management subsystem for threads
1732                  * that are running at a priority <= MAXPRI_THROTTLE.
1733                  */
1734                 value = task->effective_policy.tep_sup_active;
1735                 break;
1736         case TASK_POLICY_LATENCY_QOS:
1737                 /*
1738                  * timer arming calls into here to find out the timer coalescing level
1739                  * Returns a QoS tier (0-6)
1740                  */
1741                 value = task->effective_policy.tep_latency_qos;
1742                 break;
1743         case TASK_POLICY_THROUGH_QOS:
1744                 /*
1745                  * This value is passed into the urgency callout from the scheduler
1746                  * to the performance management subsystem.
1747                  * Returns a QoS tier (0-6)
1748                  */
1749                 value = task->effective_policy.tep_through_qos;
1750                 break;
1751         case TASK_POLICY_ROLE:
1752                 /*
1753                  * This controls various things that ask whether a process is foreground,
1754                  * like SFI, VM, access to GPU, etc
1755                  */
1756                 value = task->effective_policy.tep_role;
1757                 break;
1758         case TASK_POLICY_WATCHERS_BG:
1759                 /*
1760                  * This controls whether or not a thread watching this process should be BG.
1761                  */
1762                 value = task->effective_policy.tep_watchers_bg;
1763                 break;
1764         case TASK_POLICY_SFI_MANAGED:
1765                 /*
1766                  * This controls whether or not a process is targeted for specific control by thermald.
1767                  */
1768                 value = task->effective_policy.tep_sfi_managed;
1769                 break;
1770         default:
1771                 panic("unknown policy_flavor %d", flavor);
1772                 break;
1773         }
1774
1775         return value;
1776 }
1777
1778 /*
1779  * Convert from IOPOL_* values to throttle tiers.
1780  *
1781  * TODO: Can this be made more compact, like an array lookup
1782  * Note that it is possible to support e.g. IOPOL_PASSIVE_STANDARD in the future
1783  */
1784
1785 void
1786 proc_iopol_to_tier(int iopolicy, int *tier, int *passive)
1787 {
1788         *passive = 0;
1789         *tier = 0;
1790         switch (iopolicy) {
1791         case IOPOL_IMPORTANT:
1792                 *tier = THROTTLE_LEVEL_TIER0;
1793                 break;
1794         case IOPOL_PASSIVE:
1795                 *tier = THROTTLE_LEVEL_TIER0;
1796                 *passive = 1;
1797                 break;
1798         case IOPOL_STANDARD:
1799                 *tier = THROTTLE_LEVEL_TIER1;
1800                 break;
1801         case IOPOL_UTILITY:
1802                 *tier = THROTTLE_LEVEL_TIER2;
1803                 break;
1804         case IOPOL_THROTTLE:
1805                 *tier = THROTTLE_LEVEL_TIER3;
1806                 break;
1807         default:
1808                 panic("unknown I/O policy %d", iopolicy);
1809                 break;
1810         }
1811 }
1812
1813 int
1814 proc_tier_to_iopol(int tier, int passive)
1815 {
1816         if (passive == 1) {
1817                 switch (tier) {
1818                 case THROTTLE_LEVEL_TIER0:
1819                         return IOPOL_PASSIVE;
1820                 default:
1821                         panic("unknown passive tier %d", tier);
1822                         return IOPOL_DEFAULT;
1823                 }
1824         } else {
1825                 switch (tier) {
1826                 case THROTTLE_LEVEL_NONE:
1827                 case THROTTLE_LEVEL_TIER0:
1828                         return IOPOL_DEFAULT;
1829                 case THROTTLE_LEVEL_TIER1:
1830                         return IOPOL_STANDARD;
1831                 case THROTTLE_LEVEL_TIER2:
1832                         return IOPOL_UTILITY;
1833                 case THROTTLE_LEVEL_TIER3:
1834                         return IOPOL_THROTTLE;
1835                 default:
1836                         panic("unknown tier %d", tier);
1837                         return IOPOL_DEFAULT;
1838                 }
1839         }
1840 }
1841
1842 int
1843 proc_darwin_role_to_task_role(int darwin_role, int* task_role)
1844 {
1845         integer_t role = TASK_UNSPECIFIED;
1846
1847         switch (darwin_role) {
1848         case PRIO_DARWIN_ROLE_DEFAULT:
1849                 role = TASK_UNSPECIFIED;
1850                 break;
1851         case PRIO_DARWIN_ROLE_UI_FOCAL:
1852                 role = TASK_FOREGROUND_APPLICATION;
1853                 break;
1854         case PRIO_DARWIN_ROLE_UI:
1855                 role = TASK_DEFAULT_APPLICATION;
1856                 break;
1857         case PRIO_DARWIN_ROLE_NON_UI:
1858                 role = TASK_NONUI_APPLICATION;
1859                 break;
1860         case PRIO_DARWIN_ROLE_UI_NON_FOCAL:
1861                 role = TASK_BACKGROUND_APPLICATION;
1862                 break;
1863         case PRIO_DARWIN_ROLE_TAL_LAUNCH:
1864                 role = TASK_THROTTLE_APPLICATION;
1865                 break;
1866         case PRIO_DARWIN_ROLE_DARWIN_BG:
1867                 role = TASK_DARWINBG_APPLICATION;
1868                 break;
1869         default:
1870                 return EINVAL;
1871         }
1872
1873         *task_role = role;
1874
1875         return 0;
1876 }
1877
1878 int
1879 proc_task_role_to_darwin_role(int task_role)
1880 {
1881         switch (task_role) {
1882         case TASK_FOREGROUND_APPLICATION:
1883                 return PRIO_DARWIN_ROLE_UI_FOCAL;
1884         case TASK_BACKGROUND_APPLICATION:
1885                 return PRIO_DARWIN_ROLE_UI_NON_FOCAL;
1886         case TASK_NONUI_APPLICATION:
1887                 return PRIO_DARWIN_ROLE_NON_UI;
1888         case TASK_DEFAULT_APPLICATION:
1889                 return PRIO_DARWIN_ROLE_UI;
1890         case TASK_THROTTLE_APPLICATION:
1891                 return PRIO_DARWIN_ROLE_TAL_LAUNCH;
1892         case TASK_DARWINBG_APPLICATION:
1893                 return PRIO_DARWIN_ROLE_DARWIN_BG;
1894         case TASK_UNSPECIFIED:
1895         default:
1896                 return PRIO_DARWIN_ROLE_DEFAULT;
1897         }
1898 }
1899
1900
1901 /* TODO: remove this variable when interactive daemon audit period is over */
1902 extern boolean_t ipc_importance_interactive_receiver;
1903
1904 /*
1905  * Called at process exec to initialize the apptype, qos clamp, and qos seed of a process
1906  *
1907  * TODO: Make this function more table-driven instead of ad-hoc
1908  */
1909 void
1910 proc_set_task_spawnpolicy(task_t task, int apptype, int qos_clamp, int role,
1911     ipc_port_t * portwatch_ports, int portwatch_count)
1912 {
1913         struct task_pend_token pend_token = {};
1914
1915         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1916             (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_START,
1917             task_pid(task), trequested_0(task), trequested_1(task),
1918             apptype, 0);
1919
1920         switch (apptype) {
1921         case TASK_APPTYPE_APP_TAL:
1922         case TASK_APPTYPE_APP_DEFAULT:
1923                 /* Apps become donors via the 'live-donor' flag instead of the static donor flag */
1924                 task_importance_mark_donor(task, FALSE);
1925                 task_importance_mark_live_donor(task, TRUE);
1926                 task_importance_mark_receiver(task, FALSE);
1927 #if CONFIG_EMBEDDED
1928                 task_importance_mark_denap_receiver(task, FALSE);
1929 #else
1930                 /* Apps are de-nap recievers on desktop for suppression behaviors */
1931                 task_importance_mark_denap_receiver(task, TRUE);
1932 #endif /* CONFIG_EMBEDDED */
1933                 break;
1934
1935         case TASK_APPTYPE_DAEMON_INTERACTIVE:
1936                 task_importance_mark_donor(task, TRUE);
1937                 task_importance_mark_live_donor(task, FALSE);
1938
1939                 /*
1940                  * A boot arg controls whether interactive daemons are importance receivers.
1941                  * Normally, they are not.  But for testing their behavior as an adaptive
1942                  * daemon, the boot-arg can be set.
1943                  *
1944                  * TODO: remove this when the interactive daemon audit period is over.
1945                  */
1946                 task_importance_mark_receiver(task, /* FALSE */ ipc_importance_interactive_receiver);
1947                 task_importance_mark_denap_receiver(task, FALSE);
1948                 break;
1949
1950         case TASK_APPTYPE_DAEMON_STANDARD:
1951                 task_importance_mark_donor(task, TRUE);
1952                 task_importance_mark_live_donor(task, FALSE);
1953                 task_importance_mark_receiver(task, FALSE);
1954                 task_importance_mark_denap_receiver(task, FALSE);
1955                 break;
1956
1957         case TASK_APPTYPE_DAEMON_ADAPTIVE:
1958                 task_importance_mark_donor(task, FALSE);
1959                 task_importance_mark_live_donor(task, FALSE);
1960                 task_importance_mark_receiver(task, TRUE);
1961                 task_importance_mark_denap_receiver(task, FALSE);
1962                 break;
1963
1964         case TASK_APPTYPE_DAEMON_BACKGROUND:
1965                 task_importance_mark_donor(task, FALSE);
1966                 task_importance_mark_live_donor(task, FALSE);
1967                 task_importance_mark_receiver(task, FALSE);
1968                 task_importance_mark_denap_receiver(task, FALSE);
1969                 break;
1970
1971         case TASK_APPTYPE_NONE:
1972                 break;
1973         }
1974
1975         if (portwatch_ports != NULL && apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
1976                 int portwatch_boosts = 0;
1977
1978                 for (int i = 0; i < portwatch_count; i++) {
1979                         ipc_port_t port = NULL;
1980
1981                         if ((port = portwatch_ports[i]) != NULL) {
1982                                 int boost = 0;
1983                                 task_add_importance_watchport(task, port, &boost);
1984                                 portwatch_boosts += boost;
1985                         }
1986                 }
1987
1988                 if (portwatch_boosts > 0) {
1989                         task_importance_hold_internal_assertion(task, portwatch_boosts);
1990                 }
1991         }
1992
1993         task_lock(task);
1994
1995         if (apptype == TASK_APPTYPE_APP_TAL) {
1996                 /* TAL starts off enabled by default */
1997                 task->requested_policy.trp_tal_enabled = 1;
1998         }
1999
2000         if (apptype != TASK_APPTYPE_NONE) {
2001                 task->requested_policy.trp_apptype = apptype;
2002         }
2003
2004 #if CONFIG_EMBEDDED
2005         /* Remove this after launchd starts setting it properly */
2006         if (apptype == TASK_APPTYPE_APP_DEFAULT && role == TASK_UNSPECIFIED) {
2007                 task->requested_policy.trp_role = TASK_FOREGROUND_APPLICATION;
2008         } else
2009 #endif
2010         if (role != TASK_UNSPECIFIED) {
2011                 task->requested_policy.trp_role = role;
2012         }
2013
2014         if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
2015                 task->requested_policy.trp_qos_clamp = qos_clamp;
2016         }
2017
2018         task_policy_update_locked(task, &pend_token);
2019
2020         task_unlock(task);
2021
2022         /* Ensure the donor bit is updated to be in sync with the new live donor status */
2023         pend_token.tpt_update_live_donor = 1;
2024
2025         task_policy_update_complete_unlocked(task, &pend_token);
2026
2027         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2028             (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_END,
2029             task_pid(task), trequested_0(task), trequested_1(task),
2030             task_is_importance_receiver(task), 0);
2031 }
2032
2033 /*
2034  * Inherit task role across exec
2035  */
2036 void
2037 proc_inherit_task_role(task_t new_task,
2038     task_t old_task)
2039 {
2040         int role;
2041
2042         /* inherit the role from old task to new task */
2043         role = proc_get_task_policy(old_task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE);
2044         proc_set_task_policy(new_task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, role);
2045 }
2046
2047 extern void *initproc;
2048
2049 /*
2050  * Compute the default main thread qos for a task
2051  */
2052 int
2053 task_compute_main_thread_qos(task_t task)
2054 {
2055         int primordial_qos = THREAD_QOS_UNSPECIFIED;
2056
2057         int qos_clamp = task->requested_policy.trp_qos_clamp;
2058
2059         switch (task->requested_policy.trp_apptype) {
2060         case TASK_APPTYPE_APP_TAL:
2061         case TASK_APPTYPE_APP_DEFAULT:
2062                 primordial_qos = THREAD_QOS_USER_INTERACTIVE;
2063                 break;
2064
2065         case TASK_APPTYPE_DAEMON_INTERACTIVE:
2066         case TASK_APPTYPE_DAEMON_STANDARD:
2067         case TASK_APPTYPE_DAEMON_ADAPTIVE:
2068                 primordial_qos = THREAD_QOS_LEGACY;
2069                 break;
2070
2071         case TASK_APPTYPE_DAEMON_BACKGROUND:
2072                 primordial_qos = THREAD_QOS_BACKGROUND;
2073                 break;
2074         }
2075
2076         if (task->bsd_info == initproc) {
2077                 /* PID 1 gets a special case */
2078                 primordial_qos = MAX(primordial_qos, THREAD_QOS_USER_INITIATED);
2079         }
2080
2081         if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
2082                 if (primordial_qos != THREAD_QOS_UNSPECIFIED) {
2083                         primordial_qos = MIN(qos_clamp, primordial_qos);
2084                 } else {
2085                         primordial_qos = qos_clamp;
2086                 }
2087         }
2088
2089         return primordial_qos;
2090 }
2091
2092
2093 /* for process_policy to check before attempting to set */
2094 boolean_t
2095 proc_task_is_tal(task_t task)
2096 {
2097         return (task->requested_policy.trp_apptype == TASK_APPTYPE_APP_TAL) ? TRUE : FALSE;
2098 }
2099
2100 int
2101 task_get_apptype(task_t task)
2102 {
2103         return task->requested_policy.trp_apptype;
2104 }
2105
2106 boolean_t
2107 task_is_daemon(task_t task)
2108 {
2109         switch (task->requested_policy.trp_apptype) {
2110         case TASK_APPTYPE_DAEMON_INTERACTIVE:
2111         case TASK_APPTYPE_DAEMON_STANDARD:
2112         case TASK_APPTYPE_DAEMON_ADAPTIVE:
2113         case TASK_APPTYPE_DAEMON_BACKGROUND:
2114                 return TRUE;
2115         default:
2116                 return FALSE;
2117         }
2118 }
2119
2120 boolean_t
2121 task_is_app(task_t task)
2122 {
2123         switch (task->requested_policy.trp_apptype) {
2124         case TASK_APPTYPE_APP_DEFAULT:
2125         case TASK_APPTYPE_APP_TAL:
2126                 return TRUE;
2127         default:
2128                 return FALSE;
2129         }
2130 }
2131
2132 /* for telemetry */
2133 integer_t
2134 task_grab_latency_qos(task_t task)
2135 {
2136         return qos_latency_policy_package(proc_get_effective_task_policy(task, TASK_POLICY_LATENCY_QOS));
2137 }
2138
2139 /* update the darwin background action state in the flags field for libproc */
2140 int
2141 proc_get_darwinbgstate(task_t task, uint32_t * flagsp)
2142 {
2143         if (task->requested_policy.trp_ext_darwinbg) {
2144                 *flagsp |= PROC_FLAG_EXT_DARWINBG;
2145         }
2146
2147         if (task->requested_policy.trp_int_darwinbg) {
2148                 *flagsp |= PROC_FLAG_DARWINBG;
2149         }
2150
2151 #if CONFIG_EMBEDDED
2152         if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_BACKGROUND) {
2153                 *flagsp |= PROC_FLAG_IOS_APPLEDAEMON;
2154         }
2155
2156         if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
2157                 *flagsp |= PROC_FLAG_IOS_IMPPROMOTION;
2158         }
2159 #endif /* CONFIG_EMBEDDED */
2160
2161         if (task->requested_policy.trp_apptype == TASK_APPTYPE_APP_DEFAULT ||
2162             task->requested_policy.trp_apptype == TASK_APPTYPE_APP_TAL) {
2163                 *flagsp |= PROC_FLAG_APPLICATION;
2164         }
2165
2166         if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
2167                 *flagsp |= PROC_FLAG_ADAPTIVE;
2168         }
2169
2170         if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE &&
2171             task->requested_policy.trp_boosted == 1) {
2172                 *flagsp |= PROC_FLAG_ADAPTIVE_IMPORTANT;
2173         }
2174
2175         if (task_is_importance_donor(task)) {
2176                 *flagsp |= PROC_FLAG_IMPORTANCE_DONOR;
2177         }
2178
2179         if (task->effective_policy.tep_sup_active) {
2180                 *flagsp |= PROC_FLAG_SUPPRESSED;
2181         }
2182
2183         return 0;
2184 }
2185
2186 /*
2187  * Tracepoint data... Reading the tracepoint data can be somewhat complicated.
2188  * The current scheme packs as much data into a single tracepoint as it can.
2189  *
2190  * Each task/thread requested/effective structure is 64 bits in size. Any
2191  * given tracepoint will emit either requested or effective data, but not both.
2192  *
2193  * A tracepoint may emit any of task, thread, or task & thread data.
2194  *
2195  * The type of data emitted varies with pointer size. Where possible, both
2196  * task and thread data are emitted. In LP32 systems, the first and second
2197  * halves of either the task or thread data is emitted.
2198  *
2199  * The code uses uintptr_t array indexes instead of high/low to avoid
2200  * confusion WRT big vs little endian.
2201  *
2202  * The truth table for the tracepoint data functions is below, and has the
2203  * following invariants:
2204  *
2205  * 1) task and thread are uintptr_t*
2206  * 2) task may never be NULL
2207  *
2208  *
2209  *                                     LP32            LP64
2210  * trequested_0(task, NULL)            task[0]         task[0]
2211  * trequested_1(task, NULL)            task[1]         NULL
2212  * trequested_0(task, thread)          thread[0]       task[0]
2213  * trequested_1(task, thread)          thread[1]       thread[0]
2214  *
2215  * Basically, you get a full task or thread on LP32, and both on LP64.
2216  *
2217  * The uintptr_t munging here is squicky enough to deserve a comment.
2218  *
2219  * The variables we are accessing are laid out in memory like this:
2220  *
2221  * [            LP64 uintptr_t  0          ]
2222  * [ LP32 uintptr_t 0 ] [ LP32 uintptr_t 1 ]
2223  *
2224  *      1   2   3   4     5   6   7   8
2225  *
2226  */
2227
2228 static uintptr_t
2229 trequested_0(task_t task)
2230 {
2231         static_assert(sizeof(struct task_requested_policy) == sizeof(uint64_t), "size invariant violated");
2232
2233         uintptr_t* raw = (uintptr_t*)&task->requested_policy;
2234
2235         return raw[0];
2236 }
2237
2238 static uintptr_t
2239 trequested_1(task_t task)
2240 {
2241 #if defined __LP64__
2242         (void)task;
2243         return 0;
2244 #else
2245         uintptr_t* raw = (uintptr_t*)(&task->requested_policy);
2246         return raw[1];
2247 #endif
2248 }
2249
2250 static uintptr_t
2251 teffective_0(task_t task)
2252 {
2253         uintptr_t* raw = (uintptr_t*)&task->effective_policy;
2254
2255         return raw[0];
2256 }
2257
2258 static uintptr_t
2259 teffective_1(task_t task)
2260 {
2261 #if defined __LP64__
2262         (void)task;
2263         return 0;
2264 #else
2265         uintptr_t* raw = (uintptr_t*)(&task->effective_policy);
2266         return raw[1];
2267 #endif
2268 }
2269
2270 /* dump pending for tracepoint */
2271 uint32_t
2272 tpending(task_pend_token_t pend_token)
2273 {
2274         return *(uint32_t*)(void*)(pend_token);
2275 }
2276
2277 uint64_t
2278 task_requested_bitfield(task_t task)
2279 {
2280         uint64_t bits = 0;
2281         struct task_requested_policy requested = task->requested_policy;
2282
2283         bits |= (requested.trp_int_darwinbg     ? POLICY_REQ_INT_DARWIN_BG  : 0);
2284         bits |= (requested.trp_ext_darwinbg     ? POLICY_REQ_EXT_DARWIN_BG  : 0);
2285         bits |= (requested.trp_int_iotier       ? (((uint64_t)requested.trp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2286         bits |= (requested.trp_ext_iotier       ? (((uint64_t)requested.trp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2287         bits |= (requested.trp_int_iopassive    ? POLICY_REQ_INT_PASSIVE_IO : 0);
2288         bits |= (requested.trp_ext_iopassive    ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2289         bits |= (requested.trp_bg_iotier        ? (((uint64_t)requested.trp_bg_iotier) << POLICY_REQ_BG_IOTIER_SHIFT)   : 0);
2290         bits |= (requested.trp_terminated       ? POLICY_REQ_TERMINATED     : 0);
2291
2292         bits |= (requested.trp_boosted          ? POLICY_REQ_BOOSTED        : 0);
2293         bits |= (requested.trp_tal_enabled      ? POLICY_REQ_TAL_ENABLED    : 0);
2294         bits |= (requested.trp_apptype          ? (((uint64_t)requested.trp_apptype) << POLICY_REQ_APPTYPE_SHIFT)  : 0);
2295         bits |= (requested.trp_role             ? (((uint64_t)requested.trp_role) << POLICY_REQ_ROLE_SHIFT)     : 0);
2296
2297         bits |= (requested.trp_sup_active       ? POLICY_REQ_SUP_ACTIVE         : 0);
2298         bits |= (requested.trp_sup_lowpri_cpu   ? POLICY_REQ_SUP_LOWPRI_CPU     : 0);
2299         bits |= (requested.trp_sup_cpu          ? POLICY_REQ_SUP_CPU            : 0);
2300         bits |= (requested.trp_sup_timer        ? (((uint64_t)requested.trp_sup_timer) << POLICY_REQ_SUP_TIMER_THROTTLE_SHIFT) : 0);
2301         bits |= (requested.trp_sup_throughput   ? (((uint64_t)requested.trp_sup_throughput) << POLICY_REQ_SUP_THROUGHPUT_SHIFT)     : 0);
2302         bits |= (requested.trp_sup_disk         ? POLICY_REQ_SUP_DISK_THROTTLE  : 0);
2303         bits |= (requested.trp_sup_bg_sockets   ? POLICY_REQ_SUP_BG_SOCKETS     : 0);
2304
2305         bits |= (requested.trp_base_latency_qos ? (((uint64_t)requested.trp_base_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2306         bits |= (requested.trp_over_latency_qos ? (((uint64_t)requested.trp_over_latency_qos) << POLICY_REQ_OVER_LATENCY_QOS_SHIFT) : 0);
2307         bits |= (requested.trp_base_through_qos ? (((uint64_t)requested.trp_base_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2308         bits |= (requested.trp_over_through_qos ? (((uint64_t)requested.trp_over_through_qos) << POLICY_REQ_OVER_THROUGH_QOS_SHIFT) : 0);
2309         bits |= (requested.trp_sfi_managed      ? POLICY_REQ_SFI_MANAGED        : 0);
2310         bits |= (requested.trp_qos_clamp        ? (((uint64_t)requested.trp_qos_clamp) << POLICY_REQ_QOS_CLAMP_SHIFT)        : 0);
2311
2312         return bits;
2313 }
2314
2315 uint64_t
2316 task_effective_bitfield(task_t task)
2317 {
2318         uint64_t bits = 0;
2319         struct task_effective_policy effective = task->effective_policy;
2320
2321         bits |= (effective.tep_io_tier          ? (((uint64_t)effective.tep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2322         bits |= (effective.tep_io_passive       ? POLICY_EFF_IO_PASSIVE     : 0);
2323         bits |= (effective.tep_darwinbg         ? POLICY_EFF_DARWIN_BG      : 0);
2324         bits |= (effective.tep_lowpri_cpu       ? POLICY_EFF_LOWPRI_CPU     : 0);
2325         bits |= (effective.tep_terminated       ? POLICY_EFF_TERMINATED     : 0);
2326         bits |= (effective.tep_all_sockets_bg   ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2327         bits |= (effective.tep_new_sockets_bg   ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2328         bits |= (effective.tep_bg_iotier        ? (((uint64_t)effective.tep_bg_iotier) << POLICY_EFF_BG_IOTIER_SHIFT) : 0);
2329         bits |= (effective.tep_qos_ui_is_urgent ? POLICY_EFF_QOS_UI_IS_URGENT : 0);
2330
2331         bits |= (effective.tep_tal_engaged      ? POLICY_EFF_TAL_ENGAGED    : 0);
2332         bits |= (effective.tep_watchers_bg      ? POLICY_EFF_WATCHERS_BG    : 0);
2333         bits |= (effective.tep_sup_active       ? POLICY_EFF_SUP_ACTIVE     : 0);
2334         bits |= (effective.tep_suppressed_cpu   ? POLICY_EFF_SUP_CPU        : 0);
2335         bits |= (effective.tep_role             ? (((uint64_t)effective.tep_role) << POLICY_EFF_ROLE_SHIFT)        : 0);
2336         bits |= (effective.tep_latency_qos      ? (((uint64_t)effective.tep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2337         bits |= (effective.tep_through_qos      ? (((uint64_t)effective.tep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2338         bits |= (effective.tep_sfi_managed      ? POLICY_EFF_SFI_MANAGED    : 0);
2339         bits |= (effective.tep_qos_ceiling      ? (((uint64_t)effective.tep_qos_ceiling) << POLICY_EFF_QOS_CEILING_SHIFT) : 0);
2340
2341         return bits;
2342 }
2343
2344
2345 /*
2346  * Resource usage and CPU related routines
2347  */
2348
2349 int
2350 proc_get_task_ruse_cpu(task_t task, uint32_t *policyp, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep)
2351 {
2352         int error = 0;
2353         int scope;
2354
2355         task_lock(task);
2356
2357
2358         error = task_get_cpuusage(task, percentagep, intervalp, deadlinep, &scope);
2359         task_unlock(task);
2360
2361         /*
2362          * Reverse-map from CPU resource limit scopes back to policies (see comment below).
2363          */
2364         if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
2365                 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC;
2366         } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
2367                 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE;
2368         } else if (scope == TASK_RUSECPU_FLAGS_DEADLINE) {
2369                 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
2370         }
2371
2372         return error;
2373 }
2374
2375 /*
2376  * Configure the default CPU usage monitor parameters.
2377  *
2378  * For tasks which have this mechanism activated: if any thread in the
2379  * process consumes more CPU than this, an EXC_RESOURCE exception will be generated.
2380  */
2381 void
2382 proc_init_cpumon_params(void)
2383 {
2384         /*
2385          * The max CPU percentage can be configured via the boot-args and
2386          * a key in the device tree. The boot-args are honored first, then the
2387          * device tree.
2388          */
2389         if (!PE_parse_boot_argn("max_cpumon_percentage", &proc_max_cpumon_percentage,
2390             sizeof(proc_max_cpumon_percentage))) {
2391                 uint64_t max_percentage = 0ULL;
2392
2393                 if (!PE_get_default("kern.max_cpumon_percentage", &max_percentage,
2394                     sizeof(max_percentage))) {
2395                         max_percentage = DEFAULT_CPUMON_PERCENTAGE;
2396                 }
2397
2398                 assert(max_percentage <= UINT8_MAX);
2399                 proc_max_cpumon_percentage = (uint8_t) max_percentage;
2400         }
2401
2402         if (proc_max_cpumon_percentage > 100) {
2403                 proc_max_cpumon_percentage = 100;
2404         }
2405
2406         /*
2407          * The interval should be specified in seconds.
2408          *
2409          * Like the max CPU percentage, the max CPU interval can be configured
2410          * via boot-args and the device tree.
2411          */
2412         if (!PE_parse_boot_argn("max_cpumon_interval", &proc_max_cpumon_interval,
2413             sizeof(proc_max_cpumon_interval))) {
2414                 if (!PE_get_default("kern.max_cpumon_interval", &proc_max_cpumon_interval,
2415                     sizeof(proc_max_cpumon_interval))) {
2416                         proc_max_cpumon_interval = DEFAULT_CPUMON_INTERVAL;
2417                 }
2418         }
2419
2420         proc_max_cpumon_interval *= NSEC_PER_SEC;
2421
2422         /* TEMPORARY boot arg to control App suppression */
2423         PE_parse_boot_argn("task_policy_suppression_flags",
2424             &task_policy_suppression_flags,
2425             sizeof(task_policy_suppression_flags));
2426
2427         /* adjust suppression disk policy if called for in boot arg */
2428         if (task_policy_suppression_flags & TASK_POLICY_SUPPRESSION_IOTIER2) {
2429                 proc_suppressed_disk_tier = THROTTLE_LEVEL_TIER2;
2430         }
2431 }
2432
2433 /*
2434  * Currently supported configurations for CPU limits.
2435  *
2436  * Policy                               | Deadline-based CPU limit | Percentage-based CPU limit
2437  * -------------------------------------+--------------------------+------------------------------
2438  * PROC_POLICY_RSRCACT_THROTTLE         | ENOTSUP                  | Task-wide scope only
2439  * PROC_POLICY_RSRCACT_SUSPEND          | Task-wide scope only     | ENOTSUP
2440  * PROC_POLICY_RSRCACT_TERMINATE        | Task-wide scope only     | ENOTSUP
2441  * PROC_POLICY_RSRCACT_NOTIFY_KQ        | Task-wide scope only     | ENOTSUP
2442  * PROC_POLICY_RSRCACT_NOTIFY_EXC       | ENOTSUP                  | Per-thread scope only
2443  *
2444  * A deadline-based CPU limit is actually a simple wallclock timer - the requested action is performed
2445  * after the specified amount of wallclock time has elapsed.
2446  *
2447  * A percentage-based CPU limit performs the requested action after the specified amount of actual CPU time
2448  * has been consumed -- regardless of how much wallclock time has elapsed -- by either the task as an
2449  * aggregate entity (so-called "Task-wide" or "Proc-wide" scope, whereby the CPU time consumed by all threads
2450  * in the task are added together), or by any one thread in the task (so-called "per-thread" scope).
2451  *
2452  * We support either deadline != 0 OR percentage != 0, but not both. The original intention in having them
2453  * share an API was to use actual CPU time as the basis of the deadline-based limit (as in: perform an action
2454  * after I have used some amount of CPU time; this is different than the recurring percentage/interval model)
2455  * but the potential consumer of the API at the time was insisting on wallclock time instead.
2456  *
2457  * Currently, requesting notification via an exception is the only way to get per-thread scope for a
2458  * CPU limit. All other types of notifications force task-wide scope for the limit.
2459  */
2460 int
2461 proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint8_t percentage, uint64_t interval, uint64_t deadline,
2462     int cpumon_entitled)
2463 {
2464         int error = 0;
2465         int scope;
2466
2467         /*
2468          * Enforce the matrix of supported configurations for policy, percentage, and deadline.
2469          */
2470         switch (policy) {
2471         // If no policy is explicitly given, the default is to throttle.
2472         case TASK_POLICY_RESOURCE_ATTRIBUTE_NONE:
2473         case TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE:
2474                 if (deadline != 0) {
2475                         return ENOTSUP;
2476                 }
2477                 scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
2478                 break;
2479         case TASK_POLICY_RESOURCE_ATTRIBUTE_SUSPEND:
2480         case TASK_POLICY_RESOURCE_ATTRIBUTE_TERMINATE:
2481         case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_KQ:
2482                 if (percentage != 0) {
2483                         return ENOTSUP;
2484                 }
2485                 scope = TASK_RUSECPU_FLAGS_DEADLINE;
2486                 break;
2487         case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC:
2488                 if (deadline != 0) {
2489                         return ENOTSUP;
2490                 }
2491                 scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
2492 #ifdef CONFIG_NOMONITORS
2493                 return error;
2494 #endif /* CONFIG_NOMONITORS */
2495                 break;
2496         default:
2497                 return EINVAL;
2498         }
2499
2500         task_lock(task);
2501         if (task != current_task()) {
2502                 task->policy_ru_cpu_ext = policy;
2503         } else {
2504                 task->policy_ru_cpu = policy;
2505         }
2506         error = task_set_cpuusage(task, percentage, interval, deadline, scope, cpumon_entitled);
2507         task_unlock(task);
2508         return error;
2509 }
2510
2511 /* TODO: get rid of these */
2512 #define TASK_POLICY_CPU_RESOURCE_USAGE          0
2513 #define TASK_POLICY_WIREDMEM_RESOURCE_USAGE     1
2514 #define TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE   2
2515 #define TASK_POLICY_DISK_RESOURCE_USAGE         3
2516 #define TASK_POLICY_NETWORK_RESOURCE_USAGE      4
2517 #define TASK_POLICY_POWER_RESOURCE_USAGE        5
2518
2519 #define TASK_POLICY_RESOURCE_USAGE_COUNT        6
2520
2521 int
2522 proc_clear_task_ruse_cpu(task_t task, int cpumon_entitled)
2523 {
2524         int error = 0;
2525         int action;
2526         void * bsdinfo = NULL;
2527
2528         task_lock(task);
2529         if (task != current_task()) {
2530                 task->policy_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;
2531         } else {
2532                 task->policy_ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;
2533         }
2534
2535         error = task_clear_cpuusage_locked(task, cpumon_entitled);
2536         if (error != 0) {
2537                 goto out;
2538         }
2539
2540         action = task->applied_ru_cpu;
2541         if (task->applied_ru_cpu_ext != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2542                 /* reset action */
2543                 task->applied_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
2544         }
2545         if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2546                 bsdinfo = task->bsd_info;
2547                 task_unlock(task);
2548                 proc_restore_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
2549                 goto out1;
2550         }
2551
2552 out:
2553         task_unlock(task);
2554 out1:
2555         return error;
2556 }
2557
2558 /* used to apply resource limit related actions */
2559 static int
2560 task_apply_resource_actions(task_t task, int type)
2561 {
2562         int action = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
2563         void * bsdinfo = NULL;
2564
2565         switch (type) {
2566         case TASK_POLICY_CPU_RESOURCE_USAGE:
2567                 break;
2568         case TASK_POLICY_WIREDMEM_RESOURCE_USAGE:
2569         case TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE:
2570         case TASK_POLICY_DISK_RESOURCE_USAGE:
2571         case TASK_POLICY_NETWORK_RESOURCE_USAGE:
2572         case TASK_POLICY_POWER_RESOURCE_USAGE:
2573                 return 0;
2574
2575         default:
2576                 return 1;
2577         }
2578         ;
2579
2580         /* only cpu actions for now */
2581         task_lock(task);
2582
2583         if (task->applied_ru_cpu_ext == TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2584                 /* apply action */
2585                 task->applied_ru_cpu_ext = task->policy_ru_cpu_ext;
2586                 action = task->applied_ru_cpu_ext;
2587         } else {
2588                 action = task->applied_ru_cpu_ext;
2589         }
2590
2591         if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2592                 bsdinfo = task->bsd_info;
2593                 task_unlock(task);
2594                 proc_apply_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
2595         } else {
2596                 task_unlock(task);
2597         }
2598
2599         return 0;
2600 }
2601
2602 /*
2603  * XXX This API is somewhat broken; we support multiple simultaneous CPU limits, but the get/set API
2604  * only allows for one at a time. This means that if there is a per-thread limit active, the other
2605  * "scopes" will not be accessible via this API. We could change it to pass in the scope of interest
2606  * to the caller, and prefer that, but there's no need for that at the moment.
2607  */
2608 static int
2609 task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope)
2610 {
2611         *percentagep = 0;
2612         *intervalp = 0;
2613         *deadlinep = 0;
2614
2615         if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) != 0) {
2616                 *scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
2617                 *percentagep = task->rusage_cpu_perthr_percentage;
2618                 *intervalp = task->rusage_cpu_perthr_interval;
2619         } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) != 0) {
2620                 *scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
2621                 *percentagep = task->rusage_cpu_percentage;
2622                 *intervalp = task->rusage_cpu_interval;
2623         } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) != 0) {
2624                 *scope = TASK_RUSECPU_FLAGS_DEADLINE;
2625                 *deadlinep = task->rusage_cpu_deadline;
2626         } else {
2627                 *scope = 0;
2628         }
2629
2630         return 0;
2631 }
2632
2633 /*
2634  * Suspend the CPU usage monitor for the task.  Return value indicates
2635  * if the mechanism was actually enabled.
2636  */
2637 int
2638 task_suspend_cpumon(task_t task)
2639 {
2640         thread_t thread;
2641
2642         task_lock_assert_owned(task);
2643
2644         if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) {
2645                 return KERN_INVALID_ARGUMENT;
2646         }
2647
2648 #if CONFIG_TELEMETRY
2649         /*
2650          * Disable task-wide telemetry if it was ever enabled by the CPU usage
2651          * monitor's warning zone.
2652          */
2653         telemetry_task_ctl_locked(task, TF_CPUMON_WARNING, 0);
2654 #endif
2655
2656         /*
2657          * Suspend monitoring for the task, and propagate that change to each thread.
2658          */
2659         task->rusage_cpu_flags &= ~(TASK_RUSECPU_FLAGS_PERTHR_LIMIT | TASK_RUSECPU_FLAGS_FATAL_CPUMON);
2660         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2661                 act_set_astledger(thread);
2662         }
2663
2664         return KERN_SUCCESS;
2665 }
2666
2667 /*
2668  * Remove all traces of the CPU monitor.
2669  */
2670 int
2671 task_disable_cpumon(task_t task)
2672 {
2673         int kret;
2674
2675         task_lock_assert_owned(task);
2676
2677         kret = task_suspend_cpumon(task);
2678         if (kret) {
2679                 return kret;
2680         }
2681
2682         /* Once we clear these values, the monitor can't be resumed */
2683         task->rusage_cpu_perthr_percentage = 0;
2684         task->rusage_cpu_perthr_interval = 0;
2685
2686         return KERN_SUCCESS;
2687 }
2688
2689
2690 static int
2691 task_enable_cpumon_locked(task_t task)
2692 {
2693         thread_t thread;
2694         task_lock_assert_owned(task);
2695
2696         if (task->rusage_cpu_perthr_percentage == 0 ||
2697             task->rusage_cpu_perthr_interval == 0) {
2698                 return KERN_INVALID_ARGUMENT;
2699         }
2700
2701         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
2702         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2703                 act_set_astledger(thread);
2704         }
2705
2706         return KERN_SUCCESS;
2707 }
2708
2709 int
2710 task_resume_cpumon(task_t task)
2711 {
2712         kern_return_t kret;
2713
2714         if (!task) {
2715                 return EINVAL;
2716         }
2717
2718         task_lock(task);
2719         kret = task_enable_cpumon_locked(task);
2720         task_unlock(task);
2721
2722         return kret;
2723 }
2724
2725
2726 /* duplicate values from bsd/sys/process_policy.h */
2727 #define PROC_POLICY_CPUMON_DISABLE      0xFF
2728 #define PROC_POLICY_CPUMON_DEFAULTS     0xFE
2729
2730 static int
2731 task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int cpumon_entitled)
2732 {
2733         uint64_t abstime = 0;
2734         uint64_t limittime = 0;
2735
2736         lck_mtx_assert(&task->lock, LCK_MTX_ASSERT_OWNED);
2737
2738         /* By default, refill once per second */
2739         if (interval == 0) {
2740                 interval = NSEC_PER_SEC;
2741         }
2742
2743         if (percentage != 0) {
2744                 if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
2745                         boolean_t warn = FALSE;
2746
2747                         /*
2748                          * A per-thread CPU limit on a task generates an exception
2749                          * (LEDGER_ACTION_EXCEPTION) if any one thread in the task
2750                          * exceeds the limit.
2751                          */
2752
2753                         if (percentage == PROC_POLICY_CPUMON_DISABLE) {
2754                                 if (cpumon_entitled) {
2755                                         /* 25095698 - task_disable_cpumon() should be reliable */
2756                                         task_disable_cpumon(task);
2757                                         return 0;
2758                                 }
2759
2760                                 /*
2761                                  * This task wishes to disable the CPU usage monitor, but it's
2762                                  * missing the required entitlement:
2763                                  *     com.apple.private.kernel.override-cpumon
2764                                  *
2765                                  * Instead, treat this as a request to reset its params
2766                                  * back to the defaults.
2767                                  */
2768                                 warn = TRUE;
2769                                 percentage = PROC_POLICY_CPUMON_DEFAULTS;
2770                         }
2771
2772                         if (percentage == PROC_POLICY_CPUMON_DEFAULTS) {
2773                                 percentage = proc_max_cpumon_percentage;
2774                                 interval   = proc_max_cpumon_interval;
2775                         }
2776
2777                         if (percentage > 100) {
2778                                 percentage = 100;
2779                         }
2780
2781                         /*
2782                          * Passing in an interval of -1 means either:
2783                          * - Leave the interval as-is, if there's already a per-thread
2784                          *   limit configured
2785                          * - Use the system default.
2786                          */
2787                         if (interval == -1ULL) {
2788                                 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
2789                                         interval = task->rusage_cpu_perthr_interval;
2790                                 } else {
2791                                         interval = proc_max_cpumon_interval;
2792                                 }
2793                         }
2794
2795                         /*
2796                          * Enforce global caps on CPU usage monitor here if the process is not
2797                          * entitled to escape the global caps.
2798                          */
2799                         if ((percentage > proc_max_cpumon_percentage) && (cpumon_entitled == 0)) {
2800                                 warn = TRUE;
2801                                 percentage = proc_max_cpumon_percentage;
2802                         }
2803
2804                         if ((interval > proc_max_cpumon_interval) && (cpumon_entitled == 0)) {
2805                                 warn = TRUE;
2806                                 interval = proc_max_cpumon_interval;
2807                         }
2808
2809                         if (warn) {
2810                                 int       pid = 0;
2811                                 const char *procname = "unknown";
2812
2813 #ifdef MACH_BSD
2814                                 pid = proc_selfpid();
2815                                 if (current_task()->bsd_info != NULL) {
2816                                         procname = proc_name_address(current_task()->bsd_info);
2817                                 }
2818 #endif
2819
2820                                 printf("process %s[%d] denied attempt to escape CPU monitor"
2821                                     " (missing required entitlement).\n", procname, pid);
2822                         }
2823
2824                         /* configure the limit values */
2825                         task->rusage_cpu_perthr_percentage = percentage;
2826                         task->rusage_cpu_perthr_interval = interval;
2827
2828                         /* and enable the CPU monitor */
2829                         (void)task_enable_cpumon_locked(task);
2830                 } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
2831                         /*
2832                          * Currently, a proc-wide CPU limit always blocks if the limit is
2833                          * exceeded (LEDGER_ACTION_BLOCK).
2834                          */
2835                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PROC_LIMIT;
2836                         task->rusage_cpu_percentage = percentage;
2837                         task->rusage_cpu_interval = interval;
2838
2839                         limittime = (interval * percentage) / 100;
2840                         nanoseconds_to_absolutetime(limittime, &abstime);
2841
2842                         ledger_set_limit(task->ledger, task_ledgers.cpu_time, abstime, 0);
2843                         ledger_set_period(task->ledger, task_ledgers.cpu_time, interval);
2844                         ledger_set_action(task->ledger, task_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
2845                 }
2846         }
2847
2848         if (deadline != 0) {
2849                 assert(scope == TASK_RUSECPU_FLAGS_DEADLINE);
2850
2851                 /* if already in use, cancel and wait for it to cleanout */
2852                 if (task->rusage_cpu_callt != NULL) {
2853                         task_unlock(task);
2854                         thread_call_cancel_wait(task->rusage_cpu_callt);
2855                         task_lock(task);
2856                 }
2857                 if (task->rusage_cpu_callt == NULL) {
2858                         task->rusage_cpu_callt = thread_call_allocate_with_priority(task_action_cpuusage, (thread_call_param_t)task, THREAD_CALL_PRIORITY_KERNEL);
2859                 }
2860                 /* setup callout */
2861                 if (task->rusage_cpu_callt != 0) {
2862                         uint64_t save_abstime = 0;
2863
2864                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_DEADLINE;
2865                         task->rusage_cpu_deadline = deadline;
2866
2867                         nanoseconds_to_absolutetime(deadline, &abstime);
2868                         save_abstime = abstime;
2869                         clock_absolutetime_interval_to_deadline(save_abstime, &abstime);
2870                         thread_call_enter_delayed(task->rusage_cpu_callt, abstime);
2871                 }
2872         }
2873
2874         return 0;
2875 }
2876
2877 int
2878 task_clear_cpuusage(task_t task, int cpumon_entitled)
2879 {
2880         int retval = 0;
2881
2882         task_lock(task);
2883         retval = task_clear_cpuusage_locked(task, cpumon_entitled);
2884         task_unlock(task);
2885
2886         return retval;
2887 }
2888
2889 static int
2890 task_clear_cpuusage_locked(task_t task, int cpumon_entitled)
2891 {
2892         thread_call_t savecallt;
2893
2894         /* cancel percentage handling if set */
2895         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) {
2896                 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PROC_LIMIT;
2897                 ledger_set_limit(task->ledger, task_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
2898                 task->rusage_cpu_percentage = 0;
2899                 task->rusage_cpu_interval = 0;
2900         }
2901
2902         /*
2903          * Disable the CPU usage monitor.
2904          */
2905         if (cpumon_entitled) {
2906                 task_disable_cpumon(task);
2907         }
2908
2909         /* cancel deadline handling if set */
2910         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) {
2911                 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_DEADLINE;
2912                 if (task->rusage_cpu_callt != 0) {
2913                         savecallt = task->rusage_cpu_callt;
2914                         task->rusage_cpu_callt = NULL;
2915                         task->rusage_cpu_deadline = 0;
2916                         task_unlock(task);
2917                         thread_call_cancel_wait(savecallt);
2918                         thread_call_free(savecallt);
2919                         task_lock(task);
2920                 }
2921         }
2922         return 0;
2923 }
2924
2925 /* called by ledger unit to enforce action due to resource usage criteria being met */
2926 static void
2927 task_action_cpuusage(thread_call_param_t param0, __unused thread_call_param_t param1)
2928 {
2929         task_t task = (task_t)param0;
2930         (void)task_apply_resource_actions(task, TASK_POLICY_CPU_RESOURCE_USAGE);
2931         return;
2932 }
2933
2934
2935 /*
2936  * Routines for taskwatch and pidbind
2937  */
2938
2939 #if CONFIG_EMBEDDED
2940
2941 lck_mtx_t       task_watch_mtx;
2942
2943 void
2944 task_watch_init(void)
2945 {
2946         lck_mtx_init(&task_watch_mtx, &task_lck_grp, &task_lck_attr);
2947 }
2948
2949 static void
2950 task_watch_lock(void)
2951 {
2952         lck_mtx_lock(&task_watch_mtx);
2953 }
2954
2955 static void
2956 task_watch_unlock(void)
2957 {
2958         lck_mtx_unlock(&task_watch_mtx);
2959 }
2960
2961 static void
2962 add_taskwatch_locked(task_t task, task_watch_t * twp)
2963 {
2964         queue_enter(&task->task_watchers, twp, task_watch_t *, tw_links);
2965         task->num_taskwatchers++;
2966 }
2967
2968 static void
2969 remove_taskwatch_locked(task_t task, task_watch_t * twp)
2970 {
2971         queue_remove(&task->task_watchers, twp, task_watch_t *, tw_links);
2972         task->num_taskwatchers--;
2973 }
2974
2975
2976 int
2977 proc_lf_pidbind(task_t curtask, uint64_t tid, task_t target_task, int bind)
2978 {
2979         thread_t target_thread = NULL;
2980         int ret = 0, setbg = 0;
2981         task_watch_t *twp = NULL;
2982         task_t task = TASK_NULL;
2983
2984         target_thread = task_findtid(curtask, tid);
2985         if (target_thread == NULL) {
2986                 return ESRCH;
2987         }
2988         /* holds thread reference */
2989
2990         if (bind != 0) {
2991                 /* task is still active ? */
2992                 task_lock(target_task);
2993                 if (target_task->active == 0) {
2994                         task_unlock(target_task);
2995                         ret = ESRCH;
2996                         goto out;
2997                 }
2998                 task_unlock(target_task);
2999
3000                 twp = (task_watch_t *)kalloc(sizeof(task_watch_t));
3001                 if (twp == NULL) {
3002                         task_watch_unlock();
3003                         ret = ENOMEM;
3004                         goto out;
3005                 }
3006
3007                 bzero(twp, sizeof(task_watch_t));
3008
3009                 task_watch_lock();
3010
3011                 if (target_thread->taskwatch != NULL) {
3012                         /* already bound to another task */
3013                         task_watch_unlock();
3014
3015                         kfree(twp, sizeof(task_watch_t));
3016                         ret = EBUSY;
3017                         goto out;
3018                 }
3019
3020                 task_reference(target_task);
3021
3022                 setbg = proc_get_effective_task_policy(target_task, TASK_POLICY_WATCHERS_BG);
3023
3024                 twp->tw_task = target_task;             /* holds the task reference */
3025                 twp->tw_thread = target_thread;         /* holds the thread reference */
3026                 twp->tw_state = setbg;
3027                 twp->tw_importance = target_thread->importance;
3028
3029                 add_taskwatch_locked(target_task, twp);
3030
3031                 target_thread->taskwatch = twp;
3032
3033                 task_watch_unlock();
3034
3035                 if (setbg) {
3036                         set_thread_appbg(target_thread, setbg, INT_MIN);
3037                 }
3038
3039                 /* retain the thread reference as it is in twp */
3040                 target_thread = NULL;
3041         } else {
3042                 /* unbind */
3043                 task_watch_lock();
3044                 if ((twp = target_thread->taskwatch) != NULL) {
3045                         task = twp->tw_task;
3046                         target_thread->taskwatch = NULL;
3047                         remove_taskwatch_locked(task, twp);
3048
3049                         task_watch_unlock();
3050
3051                         task_deallocate(task);                  /* drop task ref in twp */
3052                         set_thread_appbg(target_thread, 0, twp->tw_importance);
3053                         thread_deallocate(target_thread);       /* drop thread ref in twp */
3054                         kfree(twp, sizeof(task_watch_t));
3055                 } else {
3056                         task_watch_unlock();
3057                         ret = 0;                /* return success if it not alredy bound */
3058                         goto out;
3059                 }
3060         }
3061 out:
3062         thread_deallocate(target_thread);       /* drop thread ref acquired in this routine */
3063         return ret;
3064 }
3065
3066 static void
3067 set_thread_appbg(thread_t thread, int setbg, __unused int importance)
3068 {
3069         int enable = (setbg ? TASK_POLICY_ENABLE : TASK_POLICY_DISABLE);
3070
3071         proc_set_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_PIDBIND_BG, enable);
3072 }
3073
3074 static void
3075 apply_appstate_watchers(task_t task)
3076 {
3077         int numwatchers = 0, i, j, setbg;
3078         thread_watchlist_t * threadlist;
3079         task_watch_t * twp;
3080
3081 retry:
3082         /* if no watchers on the list return */
3083         if ((numwatchers = task->num_taskwatchers) == 0) {
3084                 return;
3085         }
3086
3087         threadlist = (thread_watchlist_t *)kalloc(numwatchers * sizeof(thread_watchlist_t));
3088         if (threadlist == NULL) {
3089                 return;
3090         }
3091
3092         bzero(threadlist, numwatchers * sizeof(thread_watchlist_t));
3093
3094         task_watch_lock();
3095         /*serialize application of app state changes */
3096
3097         if (task->watchapplying != 0) {
3098                 lck_mtx_sleep(&task_watch_mtx, LCK_SLEEP_DEFAULT, &task->watchapplying, THREAD_UNINT);
3099                 task_watch_unlock();
3100                 kfree(threadlist, numwatchers * sizeof(thread_watchlist_t));
3101                 goto retry;
3102         }
3103
3104         if (numwatchers != task->num_taskwatchers) {
3105                 task_watch_unlock();
3106                 kfree(threadlist, numwatchers * sizeof(thread_watchlist_t));
3107                 goto retry;
3108         }
3109
3110         setbg = proc_get_effective_task_policy(task, TASK_POLICY_WATCHERS_BG);
3111
3112         task->watchapplying = 1;
3113         i = 0;
3114         queue_iterate(&task->task_watchers, twp, task_watch_t *, tw_links) {
3115                 threadlist[i].thread = twp->tw_thread;
3116                 thread_reference(threadlist[i].thread);
3117                 if (setbg != 0) {
3118                         twp->tw_importance = twp->tw_thread->importance;
3119                         threadlist[i].importance = INT_MIN;
3120                 } else {
3121                         threadlist[i].importance = twp->tw_importance;
3122                 }
3123                 i++;
3124                 if (i > numwatchers) {
3125                         break;
3126                 }
3127         }
3128
3129         task_watch_unlock();
3130
3131         for (j = 0; j < i; j++) {
3132                 set_thread_appbg(threadlist[j].thread, setbg, threadlist[j].importance);
3133                 thread_deallocate(threadlist[j].thread);
3134         }
3135         kfree(threadlist, numwatchers * sizeof(thread_watchlist_t));
3136
3137
3138         task_watch_lock();
3139         task->watchapplying = 0;
3140         thread_wakeup_one(&task->watchapplying);
3141         task_watch_unlock();
3142 }
3143
3144 void
3145 thead_remove_taskwatch(thread_t thread)
3146 {
3147         task_watch_t * twp;
3148         int importance = 0;
3149
3150         task_watch_lock();
3151         if ((twp = thread->taskwatch) != NULL) {
3152                 thread->taskwatch = NULL;
3153                 remove_taskwatch_locked(twp->tw_task, twp);
3154         }
3155         task_watch_unlock();
3156         if (twp != NULL) {
3157                 thread_deallocate(twp->tw_thread);
3158                 task_deallocate(twp->tw_task);
3159                 importance = twp->tw_importance;
3160                 kfree(twp, sizeof(task_watch_t));
3161                 /* remove the thread and networkbg */
3162                 set_thread_appbg(thread, 0, importance);
3163         }
3164 }
3165
3166 void
3167 task_removewatchers(task_t task)
3168 {
3169         int numwatchers = 0, i, j;
3170         task_watch_t ** twplist = NULL;
3171         task_watch_t * twp = NULL;
3172
3173 retry:
3174         if ((numwatchers = task->num_taskwatchers) == 0) {
3175                 return;
3176         }
3177
3178         twplist = (task_watch_t **)kalloc(numwatchers * sizeof(task_watch_t *));
3179         if (twplist == NULL) {
3180                 return;
3181         }
3182
3183         bzero(twplist, numwatchers * sizeof(task_watch_t *));
3184
3185         task_watch_lock();
3186         if (task->num_taskwatchers == 0) {
3187                 task_watch_unlock();
3188                 goto out;
3189         }
3190
3191         if (numwatchers != task->num_taskwatchers) {
3192                 task_watch_unlock();
3193                 kfree(twplist, numwatchers * sizeof(task_watch_t *));
3194                 numwatchers = 0;
3195                 goto retry;
3196         }
3197
3198         i = 0;
3199         while ((twp = (task_watch_t *)dequeue_head(&task->task_watchers)) != NULL) {
3200                 twplist[i] = twp;
3201                 task->num_taskwatchers--;
3202
3203                 /*
3204                  * Since the linkage is removed and thead state cleanup is already set up,
3205                  * remove the refernce from the thread.
3206                  */
3207                 twp->tw_thread->taskwatch = NULL;       /* removed linkage, clear thread holding ref */
3208                 i++;
3209                 if ((task->num_taskwatchers == 0) || (i > numwatchers)) {
3210                         break;
3211                 }
3212         }
3213
3214         task_watch_unlock();
3215
3216         for (j = 0; j < i; j++) {
3217                 twp = twplist[j];
3218                 /* remove thread and network bg */
3219                 set_thread_appbg(twp->tw_thread, 0, twp->tw_importance);
3220                 thread_deallocate(twp->tw_thread);
3221                 task_deallocate(twp->tw_task);
3222                 kfree(twp, sizeof(task_watch_t));
3223         }
3224
3225 out:
3226         kfree(twplist, numwatchers * sizeof(task_watch_t *));
3227 }
3228 #endif /* CONFIG_EMBEDDED */
3229
3230 /*
3231  * Routines for importance donation/inheritance/boosting
3232  */
3233
3234 static void
3235 task_importance_update_live_donor(task_t target_task)
3236 {
3237 #if IMPORTANCE_INHERITANCE
3238
3239         ipc_importance_task_t task_imp;
3240
3241         task_imp = ipc_importance_for_task(target_task, FALSE);
3242         if (IIT_NULL != task_imp) {
3243                 ipc_importance_task_update_live_donor(task_imp);
3244                 ipc_importance_task_release(task_imp);
3245         }
3246 #endif /* IMPORTANCE_INHERITANCE */
3247 }
3248
3249 void
3250 task_importance_mark_donor(task_t task, boolean_t donating)
3251 {
3252 #if IMPORTANCE_INHERITANCE
3253         ipc_importance_task_t task_imp;
3254
3255         task_imp = ipc_importance_for_task(task, FALSE);
3256         if (IIT_NULL != task_imp) {
3257                 ipc_importance_task_mark_donor(task_imp, donating);
3258                 ipc_importance_task_release(task_imp);
3259         }
3260 #endif /* IMPORTANCE_INHERITANCE */
3261 }
3262
3263 void
3264 task_importance_mark_live_donor(task_t task, boolean_t live_donating)
3265 {
3266 #if IMPORTANCE_INHERITANCE
3267         ipc_importance_task_t task_imp;
3268
3269         task_imp = ipc_importance_for_task(task, FALSE);
3270         if (IIT_NULL != task_imp) {
3271                 ipc_importance_task_mark_live_donor(task_imp, live_donating);
3272                 ipc_importance_task_release(task_imp);
3273         }
3274 #endif /* IMPORTANCE_INHERITANCE */
3275 }
3276
3277 void
3278 task_importance_mark_receiver(task_t task, boolean_t receiving)
3279 {
3280 #if IMPORTANCE_INHERITANCE
3281         ipc_importance_task_t task_imp;
3282
3283         task_imp = ipc_importance_for_task(task, FALSE);
3284         if (IIT_NULL != task_imp) {
3285                 ipc_importance_task_mark_receiver(task_imp, receiving);
3286                 ipc_importance_task_release(task_imp);
3287         }
3288 #endif /* IMPORTANCE_INHERITANCE */
3289 }
3290
3291 void
3292 task_importance_mark_denap_receiver(task_t task, boolean_t denap)
3293 {
3294 #if IMPORTANCE_INHERITANCE
3295         ipc_importance_task_t task_imp;
3296
3297         task_imp = ipc_importance_for_task(task, FALSE);
3298         if (IIT_NULL != task_imp) {
3299                 ipc_importance_task_mark_denap_receiver(task_imp, denap);
3300                 ipc_importance_task_release(task_imp);
3301         }
3302 #endif /* IMPORTANCE_INHERITANCE */
3303 }
3304
3305 void
3306 task_importance_reset(__imp_only task_t task)
3307 {
3308 #if IMPORTANCE_INHERITANCE
3309         ipc_importance_task_t task_imp;
3310
3311         /* TODO: Lower importance downstream before disconnect */
3312         task_imp = task->task_imp_base;
3313         ipc_importance_reset(task_imp, FALSE);
3314         task_importance_update_live_donor(task);
3315 #endif /* IMPORTANCE_INHERITANCE */
3316 }
3317
3318 void
3319 task_importance_init_from_parent(__imp_only task_t new_task, __imp_only task_t parent_task)
3320 {
3321 #if IMPORTANCE_INHERITANCE
3322         ipc_importance_task_t new_task_imp = IIT_NULL;
3323
3324         new_task->task_imp_base = NULL;
3325         if (!parent_task) {
3326                 return;
3327         }
3328
3329         if (task_is_marked_importance_donor(parent_task)) {
3330                 new_task_imp = ipc_importance_for_task(new_task, FALSE);
3331                 assert(IIT_NULL != new_task_imp);
3332                 ipc_importance_task_mark_donor(new_task_imp, TRUE);
3333         }
3334         if (task_is_marked_live_importance_donor(parent_task)) {
3335                 if (IIT_NULL == new_task_imp) {
3336                         new_task_imp = ipc_importance_for_task(new_task, FALSE);
3337                 }
3338                 assert(IIT_NULL != new_task_imp);
3339                 ipc_importance_task_mark_live_donor(new_task_imp, TRUE);
3340         }
3341         /* Do not inherit 'receiver' on fork, vfexec or true spawn */
3342         if (task_is_exec_copy(new_task) &&
3343             task_is_marked_importance_receiver(parent_task)) {
3344                 if (IIT_NULL == new_task_imp) {
3345                         new_task_imp = ipc_importance_for_task(new_task, FALSE);
3346                 }
3347                 assert(IIT_NULL != new_task_imp);
3348                 ipc_importance_task_mark_receiver(new_task_imp, TRUE);
3349         }
3350         if (task_is_marked_importance_denap_receiver(parent_task)) {
3351                 if (IIT_NULL == new_task_imp) {
3352                         new_task_imp = ipc_importance_for_task(new_task, FALSE);
3353                 }
3354                 assert(IIT_NULL != new_task_imp);
3355                 ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
3356         }
3357         if (IIT_NULL != new_task_imp) {
3358                 assert(new_task->task_imp_base == new_task_imp);
3359                 ipc_importance_task_release(new_task_imp);
3360         }
3361 #endif /* IMPORTANCE_INHERITANCE */
3362 }
3363
3364 #if IMPORTANCE_INHERITANCE
3365 /*
3366  * Sets the task boost bit to the provided value.  Does NOT run the update function.
3367  *
3368  * Task lock must be held.
3369  */
3370 static void
3371 task_set_boost_locked(task_t task, boolean_t boost_active)
3372 {
3373 #if IMPORTANCE_TRACE
3374         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_START),
3375             proc_selfpid(), task_pid(task), trequested_0(task), trequested_1(task), 0);
3376 #endif /* IMPORTANCE_TRACE */
3377
3378         task->requested_policy.trp_boosted = boost_active;
3379
3380 #if IMPORTANCE_TRACE
3381         if (boost_active == TRUE) {
3382                 DTRACE_BOOST2(boost, task_t, task, int, task_pid(task));
3383         } else {
3384                 DTRACE_BOOST2(unboost, task_t, task, int, task_pid(task));
3385         }
3386         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_END),
3387             proc_selfpid(), task_pid(task),
3388             trequested_0(task), trequested_1(task), 0);
3389 #endif /* IMPORTANCE_TRACE */
3390 }
3391
3392 /*
3393  * Sets the task boost bit to the provided value and applies the update.
3394  *
3395  * Task lock must be held.  Must call update complete after unlocking the task.
3396  */
3397 void
3398 task_update_boost_locked(task_t task, boolean_t boost_active, task_pend_token_t pend_token)
3399 {
3400         task_set_boost_locked(task, boost_active);
3401
3402         task_policy_update_locked(task, pend_token);
3403 }
3404
3405 /*
3406  * Check if this task should donate importance.
3407  *
3408  * May be called without taking the task lock. In that case, donor status can change
3409  * so you must check only once for each donation event.
3410  */
3411 boolean_t
3412 task_is_importance_donor(task_t task)
3413 {
3414         if (task->task_imp_base == IIT_NULL) {
3415                 return FALSE;
3416         }
3417         return ipc_importance_task_is_donor(task->task_imp_base);
3418 }
3419
3420 /*
3421  * Query the status of the task's donor mark.
3422  */
3423 boolean_t
3424 task_is_marked_importance_donor(task_t task)
3425 {
3426         if (task->task_imp_base == IIT_NULL) {
3427                 return FALSE;
3428         }
3429         return ipc_importance_task_is_marked_donor(task->task_imp_base);
3430 }
3431
3432 /*
3433  * Query the status of the task's live donor and donor mark.
3434  */
3435 boolean_t
3436 task_is_marked_live_importance_donor(task_t task)
3437 {
3438         if (task->task_imp_base == IIT_NULL) {
3439                 return FALSE;
3440         }
3441         return ipc_importance_task_is_marked_live_donor(task->task_imp_base);
3442 }
3443
3444
3445 /*
3446  * This routine may be called without holding task lock
3447  * since the value of imp_receiver can never be unset.
3448  */
3449 boolean_t
3450 task_is_importance_receiver(task_t task)
3451 {
3452         if (task->task_imp_base == IIT_NULL) {
3453                 return FALSE;
3454         }
3455         return ipc_importance_task_is_marked_receiver(task->task_imp_base);
3456 }
3457
3458 /*
3459  * Query the task's receiver mark.
3460  */
3461 boolean_t
3462 task_is_marked_importance_receiver(task_t task)
3463 {
3464         if (task->task_imp_base == IIT_NULL) {
3465                 return FALSE;
3466         }
3467         return ipc_importance_task_is_marked_receiver(task->task_imp_base);
3468 }
3469
3470 /*
3471  * This routine may be called without holding task lock
3472  * since the value of de-nap receiver can never be unset.
3473  */
3474 boolean_t
3475 task_is_importance_denap_receiver(task_t task)
3476 {
3477         if (task->task_imp_base == IIT_NULL) {
3478                 return FALSE;
3479         }
3480         return ipc_importance_task_is_denap_receiver(task->task_imp_base);
3481 }
3482
3483 /*
3484  * Query the task's de-nap receiver mark.
3485  */
3486 boolean_t
3487 task_is_marked_importance_denap_receiver(task_t task)
3488 {
3489         if (task->task_imp_base == IIT_NULL) {
3490                 return FALSE;
3491         }
3492         return ipc_importance_task_is_marked_denap_receiver(task->task_imp_base);
3493 }
3494
3495 /*
3496  * This routine may be called without holding task lock
3497  * since the value of imp_receiver can never be unset.
3498  */
3499 boolean_t
3500 task_is_importance_receiver_type(task_t task)
3501 {
3502         if (task->task_imp_base == IIT_NULL) {
3503                 return FALSE;
3504         }
3505         return task_is_importance_receiver(task) ||
3506                task_is_importance_denap_receiver(task);
3507 }
3508
3509 /*
3510  * External importance assertions are managed by the process in userspace
3511  * Internal importance assertions are the responsibility of the kernel
3512  * Assertions are changed from internal to external via task_importance_externalize_assertion
3513  */
3514
3515 int
3516 task_importance_hold_internal_assertion(task_t target_task, uint32_t count)
3517 {
3518         ipc_importance_task_t task_imp;
3519         kern_return_t ret;
3520
3521         /* may be first time, so allow for possible importance setup */
3522         task_imp = ipc_importance_for_task(target_task, FALSE);
3523         if (IIT_NULL == task_imp) {
3524                 return EOVERFLOW;
3525         }
3526         ret = ipc_importance_task_hold_internal_assertion(task_imp, count);
3527         ipc_importance_task_release(task_imp);
3528
3529         return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3530 }
3531
3532 int
3533 task_importance_hold_file_lock_assertion(task_t target_task, uint32_t count)
3534 {
3535         ipc_importance_task_t task_imp;
3536         kern_return_t ret;
3537
3538         /* may be first time, so allow for possible importance setup */
3539         task_imp = ipc_importance_for_task(target_task, FALSE);
3540         if (IIT_NULL == task_imp) {
3541                 return EOVERFLOW;
3542         }
3543         ret = ipc_importance_task_hold_file_lock_assertion(task_imp, count);
3544         ipc_importance_task_release(task_imp);
3545
3546         return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3547 }
3548
3549 int
3550 task_importance_hold_legacy_external_assertion(task_t target_task, uint32_t count)
3551 {
3552         ipc_importance_task_t task_imp;
3553         kern_return_t ret;
3554
3555         /* must already have set up an importance */
3556         task_imp = target_task->task_imp_base;
3557         if (IIT_NULL == task_imp) {
3558                 return EOVERFLOW;
3559         }
3560         ret = ipc_importance_task_hold_legacy_external_assertion(task_imp, count);
3561         return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3562 }
3563
3564 int
3565 task_importance_drop_file_lock_assertion(task_t target_task, uint32_t count)
3566 {
3567         ipc_importance_task_t task_imp;
3568         kern_return_t ret;
3569
3570         /* must already have set up an importance */
3571         task_imp = target_task->task_imp_base;
3572         if (IIT_NULL == task_imp) {
3573                 return EOVERFLOW;
3574         }
3575         ret = ipc_importance_task_drop_file_lock_assertion(target_task->task_imp_base, count);
3576         return (KERN_SUCCESS != ret) ? EOVERFLOW : 0;
3577 }
3578
3579 int
3580 task_importance_drop_legacy_external_assertion(task_t target_task, uint32_t count)
3581 {
3582         ipc_importance_task_t task_imp;
3583         kern_return_t ret;
3584
3585         /* must already have set up an importance */
3586         task_imp = target_task->task_imp_base;
3587         if (IIT_NULL == task_imp) {
3588                 return EOVERFLOW;
3589         }
3590         ret = ipc_importance_task_drop_legacy_external_assertion(task_imp, count);
3591         return (KERN_SUCCESS != ret) ? EOVERFLOW : 0;
3592 }
3593
3594 static void
3595 task_add_importance_watchport(task_t task, mach_port_t port, int *boostp)
3596 {
3597         int boost = 0;
3598
3599         __imptrace_only int released_pid = 0;
3600         __imptrace_only int pid = task_pid(task);
3601
3602         ipc_importance_task_t release_imp_task = IIT_NULL;
3603
3604         if (IP_VALID(port) != 0) {
3605                 ipc_importance_task_t new_imp_task = ipc_importance_for_task(task, FALSE);
3606
3607                 ip_lock(port);
3608
3609                 /*
3610                  * The port must have been marked tempowner already.
3611                  * This also filters out ports whose receive rights
3612                  * are already enqueued in a message, as you can't
3613                  * change the right's destination once it's already
3614                  * on its way.
3615                  */
3616                 if (port->ip_tempowner != 0) {
3617                         assert(port->ip_impdonation != 0);
3618
3619                         boost = port->ip_impcount;
3620                         if (IIT_NULL != port->ip_imp_task) {
3621                                 /*
3622                                  * if this port is already bound to a task,
3623                                  * release the task reference and drop any
3624                                  * watchport-forwarded boosts
3625                                  */
3626                                 release_imp_task = port->ip_imp_task;
3627                                 port->ip_imp_task = IIT_NULL;
3628                         }
3629
3630                         /* mark the port is watching another task (reference held in port->ip_imp_task) */
3631                         if (ipc_importance_task_is_marked_receiver(new_imp_task)) {
3632                                 port->ip_imp_task = new_imp_task;
3633                                 new_imp_task = IIT_NULL;
3634                         }
3635                 }
3636                 ip_unlock(port);
3637
3638                 if (IIT_NULL != new_imp_task) {
3639                         ipc_importance_task_release(new_imp_task);
3640                 }
3641
3642                 if (IIT_NULL != release_imp_task) {
3643                         if (boost > 0) {
3644                                 ipc_importance_task_drop_internal_assertion(release_imp_task, boost);
3645                         }
3646
3647                         // released_pid = task_pid(release_imp_task); /* TODO: Need ref-safe way to get pid */
3648                         ipc_importance_task_release(release_imp_task);
3649                 }
3650 #if IMPORTANCE_TRACE
3651                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_WATCHPORT, 0)) | DBG_FUNC_NONE,
3652                     proc_selfpid(), pid, boost, released_pid, 0);
3653 #endif /* IMPORTANCE_TRACE */
3654         }
3655
3656         *boostp = boost;
3657         return;
3658 }
3659
3660 #endif /* IMPORTANCE_INHERITANCE */
3661
3662 /*
3663  * Routines for VM to query task importance
3664  */
3665
3666
3667 /*
3668  * Order to be considered while estimating importance
3669  * for low memory notification and purging purgeable memory.
3670  */
3671 #define TASK_IMPORTANCE_FOREGROUND     4
3672 #define TASK_IMPORTANCE_NOTDARWINBG    1
3673
3674
3675 /*
3676  * (Un)Mark the task as a privileged listener for memory notifications.
3677  * if marked, this task will be among the first to be notified amongst
3678  * the bulk of all other tasks when the system enters a pressure level
3679  * of interest to this task.
3680  */
3681 int
3682 task_low_mem_privileged_listener(task_t task, boolean_t new_value, boolean_t *old_value)
3683 {
3684         if (old_value != NULL) {
3685                 *old_value = (boolean_t)task->low_mem_privileged_listener;
3686         } else {
3687                 task_lock(task);
3688                 task->low_mem_privileged_listener = (uint32_t)new_value;
3689                 task_unlock(task);
3690         }
3691
3692         return 0;
3693 }
3694
3695 /*
3696  * Checks if the task is already notified.
3697  *
3698  * Condition: task lock should be held while calling this function.
3699  */
3700 boolean_t
3701 task_has_been_notified(task_t task, int pressurelevel)
3702 {
3703         if (task == NULL) {
3704                 return FALSE;
3705         }
3706
3707         if (pressurelevel == kVMPressureWarning) {
3708                 return task->low_mem_notified_warn ? TRUE : FALSE;
3709         } else if (pressurelevel == kVMPressureCritical) {
3710                 return task->low_mem_notified_critical ? TRUE : FALSE;
3711         } else {
3712                 return TRUE;
3713         }
3714 }
3715
3716
3717 /*
3718  * Checks if the task is used for purging.
3719  *
3720  * Condition: task lock should be held while calling this function.
3721  */
3722 boolean_t
3723 task_used_for_purging(task_t task, int pressurelevel)
3724 {
3725         if (task == NULL) {
3726                 return FALSE;
3727         }
3728
3729         if (pressurelevel == kVMPressureWarning) {
3730                 return task->purged_memory_warn ? TRUE : FALSE;
3731         } else if (pressurelevel == kVMPressureCritical) {
3732                 return task->purged_memory_critical ? TRUE : FALSE;
3733         } else {
3734                 return TRUE;
3735         }
3736 }
3737
3738
3739 /*
3740  * Mark the task as notified with memory notification.
3741  *
3742  * Condition: task lock should be held while calling this function.
3743  */
3744 void
3745 task_mark_has_been_notified(task_t task, int pressurelevel)
3746 {
3747         if (task == NULL) {
3748                 return;
3749         }
3750
3751         if (pressurelevel == kVMPressureWarning) {
3752                 task->low_mem_notified_warn = 1;
3753         } else if (pressurelevel == kVMPressureCritical) {
3754                 task->low_mem_notified_critical = 1;
3755         }
3756 }
3757
3758
3759 /*
3760  * Mark the task as purged.
3761  *
3762  * Condition: task lock should be held while calling this function.
3763  */
3764 void
3765 task_mark_used_for_purging(task_t task, int pressurelevel)
3766 {
3767         if (task == NULL) {
3768                 return;
3769         }
3770
3771         if (pressurelevel == kVMPressureWarning) {
3772                 task->purged_memory_warn = 1;
3773         } else if (pressurelevel == kVMPressureCritical) {
3774                 task->purged_memory_critical = 1;
3775         }
3776 }
3777
3778
3779 /*
3780  * Mark the task eligible for low memory notification.
3781  *
3782  * Condition: task lock should be held while calling this function.
3783  */
3784 void
3785 task_clear_has_been_notified(task_t task, int pressurelevel)
3786 {
3787         if (task == NULL) {
3788                 return;
3789         }
3790
3791         if (pressurelevel == kVMPressureWarning) {
3792                 task->low_mem_notified_warn = 0;
3793         } else if (pressurelevel == kVMPressureCritical) {
3794                 task->low_mem_notified_critical = 0;
3795         }
3796 }
3797
3798
3799 /*
3800  * Mark the task eligible for purging its purgeable memory.
3801  *
3802  * Condition: task lock should be held while calling this function.
3803  */
3804 void
3805 task_clear_used_for_purging(task_t task)
3806 {
3807         if (task == NULL) {
3808                 return;
3809         }
3810
3811         task->purged_memory_warn = 0;
3812         task->purged_memory_critical = 0;
3813 }
3814
3815
3816 /*
3817  * Estimate task importance for purging its purgeable memory
3818  * and low memory notification.
3819  *
3820  * Importance is calculated in the following order of criteria:
3821  * -Task role : Background vs Foreground
3822  * -Boost status: Not boosted vs Boosted
3823  * -Darwin BG status.
3824  *
3825  * Returns: Estimated task importance. Less important task will have lower
3826  *          estimated importance.
3827  */
3828 int
3829 task_importance_estimate(task_t task)
3830 {
3831         int task_importance = 0;
3832
3833         if (task == NULL) {
3834                 return 0;
3835         }
3836
3837         if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) == TASK_FOREGROUND_APPLICATION) {
3838                 task_importance += TASK_IMPORTANCE_FOREGROUND;
3839         }
3840
3841         if (proc_get_effective_task_policy(task, TASK_POLICY_DARWIN_BG) == 0) {
3842                 task_importance += TASK_IMPORTANCE_NOTDARWINBG;
3843         }
3844
3845         return task_importance;
3846 }
3847
3848 boolean_t
3849 task_has_assertions(task_t task)
3850 {
3851         return task->task_imp_base->iit_assertcnt? TRUE : FALSE;
3852 }
3853
3854
3855 kern_return_t
3856 send_resource_violation(typeof(send_cpu_usage_violation) sendfunc,
3857     task_t violator,
3858     struct ledger_entry_info *linfo,
3859     resource_notify_flags_t flags)
3860 {
3861 #ifndef MACH_BSD
3862         return KERN_NOT_SUPPORTED;
3863 #else
3864         kern_return_t   kr = KERN_SUCCESS;
3865         proc_t          proc = NULL;
3866         posix_path_t    proc_path = "";
3867         proc_name_t     procname = "<unknown>";
3868         int             pid = -1;
3869         clock_sec_t     secs;
3870         clock_nsec_t    nsecs;
3871         mach_timespec_t timestamp;
3872         thread_t        curthread = current_thread();
3873         ipc_port_t      dstport = MACH_PORT_NULL;
3874
3875         if (!violator) {
3876                 kr = KERN_INVALID_ARGUMENT; goto finish;
3877         }
3878
3879         /* extract violator information */
3880         task_lock(violator);
3881         if (!(proc = get_bsdtask_info(violator))) {
3882                 task_unlock(violator);
3883                 kr = KERN_INVALID_ARGUMENT; goto finish;
3884         }
3885         (void)mig_strncpy(procname, proc_best_name(proc), sizeof(procname));
3886         pid = task_pid(violator);
3887         if (flags & kRNFatalLimitFlag) {
3888                 kr = proc_pidpathinfo_internal(proc, 0, proc_path,
3889                     sizeof(proc_path), NULL);
3890         }
3891         task_unlock(violator);
3892         if (kr) {
3893                 goto finish;
3894         }
3895
3896         /* violation time ~ now */
3897         clock_get_calendar_nanotime(&secs, &nsecs);
3898         timestamp.tv_sec = (int32_t)secs;
3899         timestamp.tv_nsec = (int32_t)nsecs;
3900         /* 25567702 tracks widening mach_timespec_t */
3901
3902         /* send message */
3903         kr = host_get_special_port(host_priv_self(), HOST_LOCAL_NODE,
3904             HOST_RESOURCE_NOTIFY_PORT, &dstport);
3905         if (kr) {
3906                 goto finish;
3907         }
3908
3909         thread_set_honor_qlimit(curthread);
3910         kr = sendfunc(dstport,
3911             procname, pid, proc_path, timestamp,
3912             linfo->lei_balance, linfo->lei_last_refill,
3913             linfo->lei_limit, linfo->lei_refill_period,
3914             flags);
3915         thread_clear_honor_qlimit(curthread);
3916
3917         ipc_port_release_send(dstport);
3918
3919 finish:
3920         return kr;
3921 #endif      /* MACH_BSD */
3922 }
3923
3924
3925 /*
3926  * Resource violations trace four 64-bit integers.  For K32, two additional
3927  * codes are allocated, the first with the low nibble doubled.  So if the K64
3928  * code is 0x042, the K32 codes would be 0x044 and 0x45.
3929  */
3930 #ifdef __LP64__
3931 void
3932 trace_resource_violation(uint16_t code,
3933     struct ledger_entry_info *linfo)
3934 {
3935         KERNEL_DBG_IST_SANE(KDBG_CODE(DBG_MACH, DBG_MACH_RESOURCE, code),
3936             linfo->lei_balance, linfo->lei_last_refill,
3937             linfo->lei_limit, linfo->lei_refill_period);
3938 }
3939 #else /* K32 */
3940 /* TODO: create/find a trace_two_LLs() for K32 systems */
3941 #define MASK32 0xffffffff
3942 void
3943 trace_resource_violation(uint16_t code,
3944     struct ledger_entry_info *linfo)
3945 {
3946         int8_t lownibble = (code & 0x3) * 2;
3947         int16_t codeA = (code & 0xffc) | lownibble;
3948         int16_t codeB = codeA + 1;
3949
3950         int32_t balance_high = (linfo->lei_balance >> 32) & MASK32;
3951         int32_t balance_low = linfo->lei_balance & MASK32;
3952         int32_t last_refill_high = (linfo->lei_last_refill >> 32) & MASK32;
3953         int32_t last_refill_low = linfo->lei_last_refill & MASK32;
3954
3955         int32_t limit_high = (linfo->lei_limit >> 32) & MASK32;
3956         int32_t limit_low = linfo->lei_limit & MASK32;
3957         int32_t refill_period_high = (linfo->lei_refill_period >> 32) & MASK32;
3958         int32_t refill_period_low = linfo->lei_refill_period & MASK32;
3959
3960         KERNEL_DBG_IST_SANE(KDBG_CODE(DBG_MACH, DBG_MACH_RESOURCE, codeA),
3961             balance_high, balance_low,
3962             last_refill_high, last_refill_low);
3963         KERNEL_DBG_IST_SANE(KDBG_CODE(DBG_MACH, DBG_MACH_RESOURCE, codeB),
3964             limit_high, limit_low,
3965             refill_period_high, refill_period_low);
3966 }
3967 #endif /* K64/K32 */