xnu-2782.40.9.tar.gz

[apple/xnu.git] / osfmk / kern / task.c
diff --git a/osfmk/kern/task.c b/osfmk/kern/task.c

index aa5d2b36044d20c4ff97ec5491fa4e5a945fb3b4..999dfefcf89c0df40050ec72ca85afee189a52a9 100644 (file)
--- a/osfmk/kern/task.c
+++ b/osfmk/kern/task.c
@@ -86,9 +86,6 @@
   * Copyright (c) 2005 SPARTA, Inc.
   */
  
-#include <fast_tas.h>
-#include <platforms.h>
-
  #include <mach/mach_types.h>
  #include <mach/boolean.h>
  #include <mach/host_priv.h>
@@ -98,6 +95,7 @@
  #include <mach/task_info.h>
  #include <mach/task_special_ports.h>
  
+#include <ipc/ipc_importance.h>
  #include <ipc/ipc_types.h>
  #include <ipc/ipc_space.h>
  #include <ipc/ipc_entry.h>
@@ -108,6 +106,7 @@
  #include <kern/misc_protos.h>
  #include <kern/task.h>
  #include <kern/thread.h>
+#include <kern/coalition.h>
  #include <kern/zalloc.h>
  #include <kern/kalloc.h>
  #include <kern/processor.h>
@@ -140,14 +139,9 @@
  #include <mach/mach_host_server.h>
  #include <mach/host_security_server.h>
  #include <mach/mach_port_server.h>
-#include <mach/security_server.h>
  
  #include <vm/vm_shared_region.h>
  
-#if CONFIG_MACF_MACH
-#include <security/mac_mach_internal.h>
-#endif
-
  #if CONFIG_COUNTERS
  #include <pmc/pmc.h>
  #endif /* CONFIG_COUNTERS */
@@ -155,6 +149,18 @@
  #include <libkern/OSDebug.h>
  #include <libkern/OSAtomic.h>
  
+#if CONFIG_ATM
+#include <atm/atm_internal.h>
+#endif
+
+#include <kern/sfi.h>
+
+#if KPERF
+extern int kpc_force_all_ctrs(task_t, int);
+#endif
+
+uint32_t qos_override_mode;
+
  task_t                 kernel_task;
  zone_t                 task_zone;
  lck_attr_t      task_lck_attr;
@@ -171,8 +177,16 @@ zinfo_usage_store_t tasks_tkm_shared;
  expired_task_statistics_t              dead_task_statistics;
  lck_spin_t             dead_task_statistics_lock;
  
-static ledger_template_t task_ledger_template = NULL;
-struct _task_ledger_indices task_ledgers __attribute__((used)) = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
+ledger_template_t task_ledger_template = NULL;
+
+struct _task_ledger_indices task_ledgers __attribute__((used)) =
+       {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        { 0 /* initialized at runtime */},
+#ifdef CONFIG_BANK
+        -1, -1,
+#endif
+       };
+
  void init_task_ledgers(void);
  void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
  void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
@@ -207,6 +221,10 @@ int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which
  int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
  
  int max_task_footprint = 0; /* Per-task limit on physical memory consumption */
+#if MACH_ASSERT
+int pmap_ledgers_panic = 1;
+#endif /* MACH_ASSERT */
+
  int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
  
  int hwm_user_cores = 0; /* high watermark violations generate user core files */
@@ -220,6 +238,9 @@ extern char *proc_name_address(struct proc *p);
  extern void    memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb);
  #endif
  #endif
+#if MACH_ASSERT
+extern int pmap_ledgers_panic;
+#endif /* MACH_ASSERT */
  
  /* Forwards */
  
@@ -254,9 +275,9 @@ task_set_64bit(
                 task_t task,
                 boolean_t is64bit)
  {
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
         thread_t thread;
-#endif /* defined(__i386__) || defined(__x86_64__) */
+#endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
  
         task_lock(task);
  
@@ -276,13 +297,13 @@ task_set_64bit(
          * state with respect to its task's 64-bitness.
          */
  
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)     
         queue_iterate(&task->threads, thread, thread_t, task_threads) {
                 thread_mtx_lock(thread);
                 machine_thread_switch_addrmode(thread);
                 thread_mtx_unlock(thread);
         }
-#endif /* defined(__i386__) || defined(__x86_64__) */  
+#endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */    
  
  out:
         task_unlock(task);
@@ -298,6 +319,18 @@ task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size)
         task_unlock(task);
  }
  
+void
+task_atm_reset(__unused task_t task) {
+
+#if CONFIG_ATM
+       if (task->atm_context != NULL) {
+                atm_task_descriptor_destroy(task->atm_context);
+                task->atm_context = NULL;
+       }
+#endif
+
+}
+
  #if TASK_REFERENCE_LEAK_DEBUG
  #include <kern/btlog.h>
  
@@ -365,20 +398,21 @@ task_init(void)
         zone_change(task_zone, Z_NOENCRYPT, TRUE);
  
         /*
-        * Configure per-task memory limit. The boot arg takes precedence over the 
-        * device tree.
+        * Configure per-task memory limit.
+        * The boot-arg is interpreted as Megabytes,
+        * and takes precedence over the device tree.
+        * Setting the boot-arg to 0 disables task limits.
          */
         if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint,
                         sizeof (max_task_footprint))) {
-               max_task_footprint = 0;
-       }
-
-       if (max_task_footprint == 0) {
                 /*
                  * No limit was found in boot-args, so go look in the device tree.
                  */
                 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint,
                                 sizeof(max_task_footprint))) {
+                       /*
+                        * No limit was found in device tree.
+                        */
                         max_task_footprint = 0;
                 }
         }
@@ -398,11 +432,22 @@ task_init(void)
  #endif
         }
  
+#if MACH_ASSERT
+       PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
+                         sizeof (pmap_ledgers_panic));
+#endif /* MACH_ASSERT */
+
         if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
                         sizeof (hwm_user_cores))) {
                 hwm_user_cores = 0;
         }
  
+       if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
+               printf("QOS override mode: 0x%08x\n", qos_override_mode);
+       } else {
+               qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
+       }
+
         proc_init_cpumon_params();
  
         if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
@@ -423,7 +468,16 @@ task_init(void)
                 disable_exc_resource = 0;
         }
  
+/*
+ * If we have coalitions, coalition_init() will call init_task_ledgers() as it
+ * sets up the ledgers for the default coalition. If we don't have coalitions,
+ * then we have to call it now.
+ */
+#if CONFIG_COALITIONS
+       assert(task_ledger_template);
+#else /* CONFIG_COALITIONS */
         init_task_ledgers();
+#endif /* CONFIG_COALITIONS */
  
  #if TASK_REFERENCE_LEAK_DEBUG
         simple_lock_init(&task_ref_lock, 0);
@@ -439,15 +493,16 @@ task_init(void)
          * Create the kernel task as the first task.
          */
  #ifdef __LP64__
-       if (task_create_internal(TASK_NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS)
+       if (task_create_internal(TASK_NULL, COALITION_NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS)
  #else
-       if (task_create_internal(TASK_NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS)
+       if (task_create_internal(TASK_NULL, COALITION_NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS)
  #endif
                 panic("task_init\n");
  
         vm_map_deallocate(kernel_task->map);
         kernel_task->map = kernel_map;
         lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
+
  }
  
  /*
@@ -512,18 +567,28 @@ host_security_create_task_token(
   *
   * phys_footprint
   *   Physical footprint: This is the sum of:
- *     + phys_mem [task's resident memory]
- *     + phys_compressed
- *     + iokit_mem
+ *     + internal
+ *     + internal_compressed
+ *     + iokit_mapped
+ *     - alternate_accounting
   *
- * iokit_mem
- *   IOKit mappings: The total size of all IOKit mappings in this task [regardless of clean/dirty state].
- * 
- * phys_compressed
- *   Physical compressed: Amount of this task's resident memory which is held by the compressor.
+ * internal
+ *   The task's anonymous memory, which on iOS is always resident.
+ *
+ * internal_compressed
+ *   Amount of this task's internal memory which is held by the compressor.
   *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
   *   and could be either decompressed back into memory, or paged out to storage, depending
   *   on our implementation.
+ *
+ * iokit_mapped
+ *   IOKit mappings: The total size of all IOKit mappings in this task, regardless of
+     clean/dirty or internal/external state].
+ *
+ * alternate_accounting
+ *   The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
+ *   are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
+ *   double counting.
   */
  void
  init_task_ledgers(void)
@@ -545,26 +610,94 @@ init_task_ledgers(void)
             "bytes");
         task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
             "bytes");
-       task_ledgers.iokit_mem = ledger_entry_add(t, "iokit_mem", "mappings",
+       task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
+           "bytes");
+       task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
+           "bytes");
+       task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
             "bytes");
         task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
             "bytes");
-       task_ledgers.phys_compressed = ledger_entry_add(t, "phys_compressed", "physmem",
+       task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
             "bytes");
+       task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
+       task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
+       task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
+       task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
         task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
             "count");
         task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
             "count");
+       
+       sfi_class_id_t class_id, ledger_alias;
+       for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
+               task_ledgers.sfi_wait_times[class_id] = -1;
+       }
+
+       /* don't account for UNSPECIFIED */
+       for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
+               ledger_alias = sfi_get_ledger_alias_for_class(class_id);
+               if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
+                       /* Check to see if alias has been registered yet */
+                       if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
+                               task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
+                       } else {
+                               /* Otherwise, initialize it first */
+                               task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
+                       }
+               } else {
+                       task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
+               }
  
-       if ((task_ledgers.cpu_time < 0) || (task_ledgers.tkm_private < 0) ||
-           (task_ledgers.tkm_shared < 0) || (task_ledgers.phys_mem < 0) ||
-           (task_ledgers.wired_mem < 0) || (task_ledgers.iokit_mem < 0) ||
-           (task_ledgers.phys_footprint < 0) || (task_ledgers.phys_compressed < 0) ||
-           (task_ledgers.platform_idle_wakeups < 0) || (task_ledgers.interrupt_wakeups < 0)) {
+               if (task_ledgers.sfi_wait_times[class_id] < 0) {
+                       panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
+               }
+       }
+
+#ifdef CONFIG_BANK
+       task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
+       task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
+#endif
+
+       assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
+
+       if ((task_ledgers.cpu_time < 0) ||
+           (task_ledgers.tkm_private < 0) ||
+           (task_ledgers.tkm_shared < 0) ||
+           (task_ledgers.phys_mem < 0) ||
+           (task_ledgers.wired_mem < 0) ||
+           (task_ledgers.internal < 0) ||
+           (task_ledgers.iokit_mapped < 0) ||
+           (task_ledgers.alternate_accounting < 0) ||
+           (task_ledgers.phys_footprint < 0) ||
+           (task_ledgers.internal_compressed < 0) ||
+           (task_ledgers.purgeable_volatile < 0) ||
+           (task_ledgers.purgeable_nonvolatile < 0) ||
+           (task_ledgers.purgeable_volatile_compressed < 0) ||
+           (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
+           (task_ledgers.platform_idle_wakeups < 0) ||
+           (task_ledgers.interrupt_wakeups < 0)
+#ifdef CONFIG_BANK
+           || (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0)
+#endif
+           ) {
                 panic("couldn't create entries for task ledger template");
         }
  
         ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
+#if MACH_ASSERT
+       if (pmap_ledgers_panic) {
+               ledger_panic_on_negative(t, task_ledgers.phys_footprint);
+               ledger_panic_on_negative(t, task_ledgers.internal);
+               ledger_panic_on_negative(t, task_ledgers.internal_compressed);
+               ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
+               ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
+               ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
+               ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
+               ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
+               ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
+       }
+#endif /* MACH_ASSERT */
  
  #if CONFIG_JETSAM
         ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
@@ -579,6 +712,7 @@ init_task_ledgers(void)
  kern_return_t
  task_create_internal(
         task_t          parent_task,
+       coalition_t     parent_coalition __unused,
         boolean_t       inherit_memory,
         boolean_t       is_64bit,
         task_t          *child_task)            /* OUT */
@@ -605,6 +739,10 @@ task_create_internal(
  
         new_task->ledger = ledger;
  
+#if defined(CONFIG_SCHED_MULTIQ)
+       new_task->sched_group = sched_group_create();
+#endif
+
         /* if inherit_memory is true, parent_task MUST not be NULL */
         if (inherit_memory)
                 new_task->map = vm_map_fork(ledger, parent_task->map);
@@ -639,6 +777,13 @@ task_create_internal(
         new_task->t_flags = 0;
         new_task->importance = 0;
  
+#if CONFIG_ATM
+       new_task->atm_context = NULL;
+#endif
+#if CONFIG_BANK
+       new_task->bank_context = NULL;
+#endif
+
         zinfo_task_init(new_task);
  
  #ifdef MACH_BSD
@@ -666,11 +811,6 @@ task_create_internal(
         queue_init(&new_task->semaphore_list);
         new_task->semaphores_owned = 0;
  
-#if CONFIG_MACF_MACH
-       new_task->label = labelh_new(1);
-       mac_task_label_init (&new_task->maclabel);
-#endif
-
         ipc_task_init(new_task, parent_task);
  
         new_task->total_user_time = 0;
@@ -698,6 +838,10 @@ task_create_internal(
         new_task->suspends_outstanding = 0;
  #endif
  
+#if HYPERVISOR
+       new_task->hv_task_target = NULL;
+#endif /* HYPERVISOR */
+
  
         new_task->low_mem_notified_warn = 0;
         new_task->low_mem_notified_critical = 0;
@@ -705,11 +849,7 @@ task_create_internal(
         new_task->purged_memory_critical = 0;
         new_task->mem_notify_reserved = 0;
  #if IMPORTANCE_INHERITANCE
-       new_task->imp_receiver = 0;
-       new_task->imp_donor = 0;
-       new_task->imp_reserved = 0;
-       new_task->task_imp_assertcnt = 0;
-       new_task->task_imp_externcnt = 0;
+       new_task->task_imp_base = NULL;
  #endif /* IMPORTANCE_INHERITANCE */
  
  #if    defined(__x86_64__)     
@@ -743,11 +883,36 @@ task_create_internal(
                 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
  
  #if IMPORTANCE_INHERITANCE
-               new_task->imp_donor = parent_task->imp_donor;
+               ipc_importance_task_t new_task_imp = IIT_NULL;
+
+               if (task_is_marked_importance_donor(parent_task)) {
+                       new_task_imp = ipc_importance_for_task(new_task, FALSE);
+                       assert(IIT_NULL != new_task_imp);
+                       ipc_importance_task_mark_donor(new_task_imp, TRUE);
+               }
                 /* Embedded doesn't want this to inherit */
-               new_task->imp_receiver = parent_task->imp_receiver;
+               if (task_is_marked_importance_receiver(parent_task)) {
+                       if (IIT_NULL == new_task_imp)
+                               new_task_imp = ipc_importance_for_task(new_task, FALSE);
+                       assert(IIT_NULL != new_task_imp);
+                       ipc_importance_task_mark_receiver(new_task_imp, TRUE);
+               }
+               if (task_is_marked_importance_denap_receiver(parent_task)) {
+                       if (IIT_NULL == new_task_imp)
+                               new_task_imp = ipc_importance_for_task(new_task, FALSE);
+                       assert(IIT_NULL != new_task_imp);
+                       ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
+               }
+               
+               if (IIT_NULL != new_task_imp) {
+                       assert(new_task->task_imp_base == new_task_imp);
+                       ipc_importance_task_release(new_task_imp);
+               }
  #endif /* IMPORTANCE_INHERITANCE */
  
+               new_task->priority = BASEPRI_DEFAULT;
+               new_task->max_priority = MAXPRI_USER;
+
                 new_task->requested_policy.t_apptype     = parent_task->requested_policy.t_apptype;
  
                 new_task->requested_policy.int_darwinbg  = parent_task->requested_policy.int_darwinbg;
@@ -758,6 +923,7 @@ task_create_internal(
                 new_task->requested_policy.ext_iopassive = parent_task->requested_policy.ext_iopassive;
                 new_task->requested_policy.bg_iotier     = parent_task->requested_policy.bg_iotier;
                 new_task->requested_policy.terminated    = parent_task->requested_policy.terminated;
+               new_task->requested_policy.t_qos_clamp   = parent_task->requested_policy.t_qos_clamp;
  
                 task_policy_create(new_task, parent_task->requested_policy.t_boosted);
         } else {
@@ -771,21 +937,42 @@ task_create_internal(
                 new_task->all_image_info_size = (mach_vm_size_t)0;
  
                 new_task->pset_hint = PROCESSOR_SET_NULL;
+
+               if (kernel_task == TASK_NULL) {
+                       new_task->priority = BASEPRI_KERNEL;
+                       new_task->max_priority = MAXPRI_KERNEL;
+               } else {
+                       new_task->priority = BASEPRI_DEFAULT;
+                       new_task->max_priority = MAXPRI_USER;
+               }
         }
  
-       if (kernel_task == TASK_NULL) {
-               new_task->priority = BASEPRI_KERNEL;
-               new_task->max_priority = MAXPRI_KERNEL;
-       } else if (proc_get_effective_task_policy(new_task, TASK_POLICY_LOWPRI_CPU)) {
-               new_task->priority = MAXPRI_THROTTLE;
-               new_task->max_priority = MAXPRI_THROTTLE;
+       new_task->coalition = COALITION_NULL;
+
+#if CONFIG_COALITIONS
+       if (parent_coalition) {
+               coalition_adopt_task(parent_coalition, new_task);
+       } else if (parent_task && parent_task->coalition) {
+               coalition_adopt_task(parent_task->coalition, new_task);
         } else {
-               new_task->priority = BASEPRI_DEFAULT;
-               new_task->max_priority = MAXPRI_USER;
+               coalition_default_adopt_task(new_task);
+       }
+
+       if (new_task->coalition == COALITION_NULL) {
+               panic("created task is not a member of any coalition");
         }
+#endif /* CONFIG_COALITIONS */
+
+       /* Allocate I/O Statistics */
+       new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
+       assert(new_task->task_io_stats != NULL);
+       bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
+
+       bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats));
  
         bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
         new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0;
+       new_task->task_gpu_ns = 0;
         lck_mtx_lock(&tasks_threads_lock);
         queue_enter(&tasks, new_task, task_t, tasks);
         tasks_count++;
@@ -795,6 +982,9 @@ task_create_internal(
                 new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
  
         new_task->task_volatile_objects = 0;
+       new_task->task_nonvolatile_objects = 0;
+       new_task->task_purgeable_disowning = FALSE;
+       new_task->task_purgeable_disowned = FALSE;
  
         ipc_task_enable(new_task);
  
@@ -802,6 +992,8 @@ task_create_internal(
         return(KERN_SUCCESS);
  }
  
+int task_dropped_imp_count = 0;
+
  /*
   *     task_deallocate:
   *
@@ -812,18 +1004,57 @@ task_deallocate(
         task_t          task)
  {
         ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
+       uint32_t refs;
  
         if (task == TASK_NULL)
             return;
  
-       if (task_deallocate_internal(task) > 0)
+       refs = task_deallocate_internal(task);
+
+#if IMPORTANCE_INHERITANCE
+       if (refs > 1)
                 return;
+       
+       if (refs == 1) {
+               /*
+                * If last ref potentially comes from the task's importance,
+                * disconnect it.  But more task refs may be added before
+                * that completes, so wait for the reference to go to zero
+                * naturually (it may happen on a recursive task_deallocate()
+                * from the ipc_importance_disconnect_task() call).
+                */
+               if (IIT_NULL != task->task_imp_base)
+                       ipc_importance_disconnect_task(task);
+               return;
+       }
+#else
+       if (refs > 0)
+               return;
+#endif /* IMPORTANCE_INHERITANCE */
  
         lck_mtx_lock(&tasks_threads_lock);
         queue_remove(&terminated_tasks, task, task_t, tasks);
         terminated_tasks_count--;
         lck_mtx_unlock(&tasks_threads_lock);
  
+       /*
+        * remove the reference on atm descriptor
+        */
+        task_atm_reset(task);
+
+#if CONFIG_BANK
+       /*
+        * remove the reference on bank context
+        */
+       if (task->bank_context != NULL) {
+               bank_task_destroy(task->bank_context);
+               task->bank_context = NULL;
+       }
+#endif
+
+       if (task->task_io_stats)
+               kfree(task->task_io_stats, sizeof(struct io_stat_info));
+
         /*
          *      Give the machine dependent code a chance
          *      to perform cleanup before ripping apart
@@ -836,6 +1067,26 @@ task_deallocate(
         if (task->affinity_space)
                 task_affinity_deallocate(task);
  
+#if MACH_ASSERT
+       if (task->ledger != NULL &&
+           task->map != NULL &&
+           task->map->pmap != NULL &&
+           task->map->pmap->ledger != NULL) {
+               assert(task->ledger == task->map->pmap->ledger);
+       }
+#endif /* MACH_ASSERT */
+
+       vm_purgeable_disown(task);
+       assert(task->task_purgeable_disowned);
+       if (task->task_volatile_objects != 0 ||
+           task->task_nonvolatile_objects != 0) {
+               panic("task_deallocate(%p): "
+                     "volatile_objects=%d nonvolatile_objects=%d\n",
+                     task,
+                     task->task_volatile_objects,
+                     task->task_nonvolatile_objects);
+       }
+
         vm_map_deallocate(task->map);
         is_release(task->itk_space);
  
@@ -844,6 +1095,10 @@ task_deallocate(
         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
                            &platform_idle_wakeups, &debit);
  
+#if defined(CONFIG_SCHED_MULTIQ)
+       sched_group_destroy(task->sched_group);
+#endif
+
         /* Accumulate statistics for dead tasks */
         lck_spin_lock(&dead_task_statistics_lock);
         dead_task_statistics.total_user_time += task->total_user_time;
@@ -858,10 +1113,6 @@ task_deallocate(
         lck_spin_unlock(&dead_task_statistics_lock);
         lck_mtx_destroy(&task->lock, &task_lck_grp);
  
-#if CONFIG_MACF_MACH
-       labelh_release(task->label);
-#endif
-
         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
             &debit)) {
                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
@@ -879,15 +1130,14 @@ task_deallocate(
         btlog_remove_entries_for_element(task_ref_btlog, task);
  #endif
  
-       if (task->task_volatile_objects) {
-               /*
-                * This task still "owns" some volatile VM objects.
-                * Disown them now to avoid leaving them pointing back at
-                * an invalid task.
-                */
-               vm_purgeable_disown(task);
-               assert(task->task_volatile_objects == 0);
+#if CONFIG_COALITIONS
+       if (!task->coalition) {
+               panic("deallocating task was not a member of any coalition");
         }
+       coalition_release(task->coalition);
+#endif /* CONFIG_COALITIONS */
+
+       task->coalition = COALITION_NULL;
  
         zfree(task_zone, task);
  }
@@ -936,6 +1186,30 @@ task_terminate(
         return (task_terminate_internal(task));
  }
  
+#if MACH_ASSERT
+extern int proc_pid(struct proc *);
+extern void proc_name_kdp(task_t t, char *buf, int size);
+#endif /* MACH_ASSERT */
+
+#define VM_MAP_PARTIAL_REAP 0x54  /* 0x150 */
+static void
+__unused task_partial_reap(task_t task, __unused int pid)
+{
+        unsigned int    reclaimed_resident = 0;
+        unsigned int    reclaimed_compressed = 0;
+       uint64_t        task_page_count;
+
+       task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
+
+       KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
+                              pid, task_page_count, 0, 0, 0);
+
+       vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
+
+        KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
+                              pid, reclaimed_resident, reclaimed_compressed, 0, 0);
+}
+
  kern_return_t
  task_terminate_internal(
         task_t                  task)
@@ -943,6 +1217,7 @@ task_terminate_internal(
         thread_t                        thread, self;
         task_t                          self_task;
         boolean_t                       interrupt_save;
+       int                             pid = 0;
  
         assert(task != kernel_task);
  
@@ -979,14 +1254,6 @@ task_terminate_internal(
                 return (KERN_FAILURE);
         }
  
-#if MACH_ASSERT
-       if (task->suspends_outstanding != 0) {
-               printf("WARNING: %s (%d) exiting with %d outstanding suspensions\n",
-                       proc_name_address(task->bsd_info), proc_pid(task->bsd_info),
-                       task->suspends_outstanding);
-       }
-#endif
-
         if (self_task != task)
                 task_unlock(self_task);
  
@@ -1022,8 +1289,22 @@ task_terminate_internal(
                         thread_terminate_internal(thread);
         }
  
+#ifdef MACH_BSD
+       if (task->bsd_info != NULL) {
+               pid = proc_pid(task->bsd_info);
+       }
+#endif /* MACH_BSD */
+
         task_unlock(task);
  
+       proc_set_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE,
+                            TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
+
+        /* Early object reap phase */
+
+// PR-17045188: Revisit implementation
+//        task_partial_reap(task, pid);
+
  
         /*
          *      Destroy all synchronizers owned by the task.
@@ -1035,8 +1316,19 @@ task_terminate_internal(
          */
         ipc_space_terminate(task->itk_space);
  
-       if (vm_map_has_4GB_pagezero(task->map))
-               vm_map_clear_4GB_pagezero(task->map);
+#if 00
+       /* if some ledgers go negative on tear-down again... */
+       ledger_disable_panic_on_negative(task->map->pmap->ledger,
+                                        task_ledgers.phys_footprint);
+       ledger_disable_panic_on_negative(task->map->pmap->ledger,
+                                        task_ledgers.internal);
+       ledger_disable_panic_on_negative(task->map->pmap->ledger,
+                                        task_ledgers.internal_compressed);
+       ledger_disable_panic_on_negative(task->map->pmap->ledger,
+                                        task_ledgers.iokit_mapped);
+       ledger_disable_panic_on_negative(task->map->pmap->ledger,
+                                        task_ledgers.alternate_accounting);
+#endif
  
         /*
          * If the current thread is a member of the task
@@ -1054,6 +1346,22 @@ task_terminate_internal(
         /* release our shared region */
         vm_shared_region_set(task, NULL);
  
+#if MACH_ASSERT
+       /*
+        * Identify the pmap's process, in case the pmap ledgers drift
+        * and we have to report it.
+        */
+       char procname[17];
+       if (task->bsd_info) {
+               pid = proc_pid(task->bsd_info);
+               proc_name_kdp(task, procname, sizeof (procname));
+       } else {
+               pid = 0;
+               strlcpy(procname, "<unknown>", sizeof (procname));
+       }
+       pmap_set_process(task->map->pmap, pid, procname);
+#endif /* MACH_ASSERT */
+
         lck_mtx_lock(&tasks_threads_lock);
         queue_remove(&tasks, task, task_t, tasks);
         queue_enter(&terminated_tasks, task, task_t, tasks);
@@ -1067,6 +1375,19 @@ task_terminate_internal(
          */
         thread_interrupt_level(interrupt_save);
  
+#if KPERF
+       /* force the task to release all ctrs */
+       if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS)
+               kpc_force_all_ctrs(task, 0);
+#endif
+
+#if CONFIG_COALITIONS
+       /*
+        * Leave our coalition. (drop activation but not reference)
+        */
+       coalition_remove_task(task);
+#endif
+
         /*
          * Get rid of the task active reference on itself.
          */
@@ -2114,6 +2435,24 @@ host_security_set_task_token(
          return(kr);
  }
  
+kern_return_t
+task_send_trace_memory(
+       task_t        target_task,
+       __unused uint32_t pid,
+       __unused uint64_t uniqueid)
+{
+       kern_return_t kr = KERN_INVALID_ARGUMENT;
+       if (target_task == TASK_NULL)
+               return (KERN_INVALID_ARGUMENT);
+
+#if CONFIG_ATM
+       kr = atm_send_proc_inspect_notification(target_task,
+                                 pid,
+                                 uniqueid);
+
+#endif
+       return (kr);
+}
  /*
   * This routine was added, pretty much exclusively, for registering the
   * RPC glue vector for in-kernel short circuited tasks.  Rather than
@@ -2133,6 +2472,25 @@ task_set_info(
                 return(KERN_INVALID_ARGUMENT);
  
         switch (flavor) {
+
+#if CONFIG_ATM
+               case TASK_TRACE_MEMORY_INFO:
+               {
+                       if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
+                               return (KERN_INVALID_ARGUMENT);
+                       
+                       assert(task_info_in != NULL);
+                       task_trace_memory_info_t mem_info;
+                       mem_info = (task_trace_memory_info_t) task_info_in;
+                       kern_return_t kr = atm_register_trace_memory(task,
+                                               mem_info->user_memory_address,
+                                               mem_info->buffer_size,
+                                               mem_info->mailbox_array_size);
+                       return kr;
+                       break;
+               }
+
+#endif
             default:
                 return (KERN_INVALID_ARGUMENT);
         }
@@ -2650,7 +3008,18 @@ task_info(
                         break;
                 }
  
-               task_power_info_locked(task, (task_power_info_t)task_info_out);
+               task_power_info_locked(task, (task_power_info_t)task_info_out, NULL);
+               break;
+       }
+
+       case TASK_POWER_INFO_V2:
+       {
+               if (*task_info_count < TASK_POWER_INFO_V2_COUNT) {
+                       error = KERN_INVALID_ARGUMENT;
+                       break;
+               }
+               task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
+               task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy);
                 break;
         }
  
@@ -2748,6 +3117,49 @@ task_info(
                 break;
         }
  
+       case TASK_WAIT_STATE_INFO:
+       {
+               /* 
+                * Deprecated flavor. Currently allowing some results until all users 
+                * stop calling it. The results may not be accurate.
+         */
+               task_wait_state_info_t  wait_state_info;
+               uint64_t total_sfi_ledger_val = 0;
+
+               if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
+                  error = KERN_INVALID_ARGUMENT;
+                  break;
+               }
+
+               wait_state_info = (task_wait_state_info_t) task_info_out;
+
+               wait_state_info->total_wait_state_time = 0;
+               bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
+
+               int i, prev_lentry = -1;
+               int64_t  val_credit, val_debit;
+
+               for (i = 0; i < MAX_SFI_CLASS_ID; i++){
+                       val_credit =0;
+                       /*
+                        * checking with prev_lentry != entry ensures adjacent classes 
+                        * which share the same ledger do not add wait times twice.
+                        * Note: Use ledger() call to get data for each individual sfi class.
+                        */
+                       if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
+                               KERN_SUCCESS == ledger_get_entries(task->ledger, 
+                                               task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
+                               total_sfi_ledger_val += val_credit;
+                       }
+                       prev_lentry = task_ledgers.sfi_wait_times[i];
+               }
+
+               wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val; 
+               *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
+
+               break;
+       }
+
         default:
                 error = KERN_INVALID_ARGUMENT;
         }
@@ -2765,7 +3177,8 @@ task_info(
  void
  task_power_info_locked(
         task_t                  task,
-       task_power_info_t       info)
+       task_power_info_t       info,
+       gpu_energy_data_t       ginfo)
  {
         thread_t                thread;
         ledger_amount_t         tmp;
@@ -2783,6 +3196,10 @@ task_power_info_locked(
         info->total_user = task->total_user_time;
         info->total_system = task->total_system_time;
  
+       if (ginfo) {
+               ginfo->task_gpu_utilisation = task->task_gpu_ns;
+       }
+
         queue_iterate(&task->threads, thread, thread_t, task_threads) {
                 uint64_t        tval;
                 spl_t           x;
@@ -2807,11 +3224,43 @@ task_power_info_locked(
                         info->total_user += tval;
                 }
  
+               if (ginfo) {
+                       ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
+               }
                 thread_unlock(thread);
                 splx(x);
         }
  }
  
+/* 
+ *     task_gpu_utilisation
+ *
+ *     Returns the total gpu time used by the all the threads of the task
+ *  (both dead and alive)
+ */
+uint64_t
+task_gpu_utilisation(
+       task_t  task)
+{
+       uint64_t gpu_time = 0;
+       thread_t thread;
+
+       task_lock(task);
+       gpu_time += task->task_gpu_ns;
+
+       queue_iterate(&task->threads, thread, thread_t, task_threads) {
+               spl_t x;
+               x = splsched();
+               thread_lock(thread);
+               gpu_time += ml_gpu_stat(thread);
+               thread_unlock(thread);
+               splx(x);
+       }
+
+       task_unlock(task);
+       return gpu_time;
+}
+
  kern_return_t
  task_purgable_info(
         task_t                  task,
@@ -3035,26 +3484,6 @@ task_set_policy(
         return(KERN_FAILURE);
  }
  
-#if    FAST_TAS
-kern_return_t
-task_set_ras_pc(
-       task_t          task,
-       vm_offset_t     pc,
-       vm_offset_t     endpc)
-{
-       extern int fast_tas_debug;
- 
-       if (fast_tas_debug) {
-               printf("task 0x%x: setting fast_tas to [0x%x, 0x%x]\n",
-                      task, pc, endpc);
-       }
-       task_lock(task);
-       task->fast_tas_base = pc;
-       task->fast_tas_end =  endpc;
-       task_unlock(task);
-       return KERN_SUCCESS;
-} 
-#else  /* FAST_TAS */
  kern_return_t
  task_set_ras_pc(
         __unused task_t task,
@@ -3063,7 +3492,6 @@ task_set_ras_pc(
  {
         return KERN_FAILURE;
  }
-#endif /* FAST_TAS */
  
  void
  task_synchronizer_destroy_all(task_t task)
@@ -3156,6 +3584,15 @@ THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb)
  
  #ifdef MACH_BSD
         pid = proc_selfpid();
+
+       if (pid == 1) {
+               /*
+                * Cannot have ReportCrash analyzing
+                * a suspended initproc.
+                */
+               return;
+       }
+
         if (task->bsd_info != NULL)
                 procname = proc_name_address(current_task()->bsd_info);
  #endif
@@ -3199,7 +3636,14 @@ THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb)
         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
         EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
+       
+       /*
+        * Use the _internal_ variant so that no user-space
+        * process can resume our task from under us.
+        */
+       task_suspend_internal(task);
         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
+       task_resume_internal(task);
  }
  
  /*
@@ -3208,7 +3652,9 @@ THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb)
  void
  task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
  {
-       ledger_amount_t max_footprint_mb;
+       ledger_amount_t max_footprint, max_footprint_mb;
+       ledger_amount_t footprint_after_purge;
+       task_t task;
  
         if (warning == LEDGER_WARNING_DIPPED_BELOW) {
                 /*
@@ -3217,14 +3663,36 @@ task_footprint_exceeded(int warning, __unused const void *param0, __unused const
                 return;
         }
  
-       ledger_get_limit(current_task()->ledger, task_ledgers.phys_footprint, &max_footprint_mb);
-       max_footprint_mb >>= 20;
+       task = current_task();
+
+       ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
+       max_footprint_mb = max_footprint >> 20;
+
+       /*
+        * Try and purge all "volatile" memory in that task first.
+        */
+       (void) task_purge_volatile_memory(task);
+       /* are we still over the limit ? */
+       ledger_get_balance(task->ledger,
+                          task_ledgers.phys_footprint,
+                          &footprint_after_purge);
+       if ((!warning &&
+            footprint_after_purge <= max_footprint) ||
+           (warning &&
+            footprint_after_purge <= ((max_footprint *
+                                       PHYS_FOOTPRINT_WARNING_LEVEL) / 100))) {
+               /* all better now */
+               ledger_reset_callback_state(task->ledger,
+                                           task_ledgers.phys_footprint);
+               return;
+       }
+       /* still over the limit after purging... */
  
         /*
          * If this an actual violation (not a warning),
          * generate a non-fatal high watermark EXC_RESOURCE.
          */
-       if ((warning == 0) && (current_task()->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) {
+       if ((warning == 0) && (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) {
                 THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE((int)max_footprint_mb);
         }
  
@@ -3382,68 +3850,6 @@ task_findtid(task_t task, uint64_t tid)
         return(THREAD_NULL);
  }
  
-
-#if CONFIG_MACF_MACH
-/*
- * Protect 2 task labels against modification by adding a reference on
- * both label handles. The locks do not actually have to be held while
- * using the labels as only labels with one reference can be modified
- * in place.
- */
-
-void
-tasklabel_lock2(
-       task_t a,
-       task_t b)
-{
-       labelh_reference(a->label);
-       labelh_reference(b->label);
-}
-
-void
-tasklabel_unlock2(
-       task_t a,
-       task_t b)
-{
-       labelh_release(a->label);
-       labelh_release(b->label);
-}
-
-void
-mac_task_label_update_internal(
-       struct label    *pl,
-       struct task     *task)
-{
-
-       tasklabel_lock(task);
-       task->label = labelh_modify(task->label);
-       mac_task_label_update(pl, &task->maclabel);
-       tasklabel_unlock(task);
-       ip_lock(task->itk_self);
-       mac_port_label_update_cred(pl, &task->itk_self->ip_label);
-       ip_unlock(task->itk_self);
-}
-
-void
-mac_task_label_modify(
-       struct task     *task,
-       void            *arg,
-       void (*f)       (struct label *l, void *arg))
-{
-
-       tasklabel_lock(task);
-       task->label = labelh_modify(task->label);
-       (*f)(&task->maclabel, arg);
-       tasklabel_unlock(task);
-}
-
-struct label *
-mac_task_get_label(struct task *task)
-{
-       return (&task->maclabel);
-}
-#endif
-
  /*
   * Control the CPU usage monitor for a task.
   */
@@ -3624,3 +4030,91 @@ THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void)
                 task_terminate_internal(task);
         }
  }
+
+kern_return_t
+task_purge_volatile_memory(
+       task_t  task)
+{
+       vm_map_t        map;
+       int             num_object_purged;
+
+       if (task == TASK_NULL)
+               return KERN_INVALID_TASK;
+
+       task_lock(task);
+
+       if (!task->active) {
+               task_unlock(task);
+               return KERN_INVALID_TASK;
+       }
+       map = task->map;
+       if (map == VM_MAP_NULL) {
+               task_unlock(task);
+               return KERN_INVALID_TASK;
+       }
+       vm_map_reference(task->map);
+
+       task_unlock(task);
+
+       num_object_purged = vm_map_purge(map);
+       vm_map_deallocate(map);
+
+       return KERN_SUCCESS;
+}
+
+/* Placeholders for the task set/get voucher interfaces */
+kern_return_t 
+task_get_mach_voucher(
+       task_t                  task,
+       mach_voucher_selector_t __unused which,
+       ipc_voucher_t           *voucher)
+{
+       if (TASK_NULL == task)
+               return KERN_INVALID_TASK;
+
+       *voucher = NULL;
+       return KERN_SUCCESS;
+}
+
+kern_return_t 
+task_set_mach_voucher(
+       task_t                  task,
+       ipc_voucher_t           __unused voucher)
+{
+       if (TASK_NULL == task)
+               return KERN_INVALID_TASK;
+
+       return KERN_SUCCESS;
+}
+
+kern_return_t
+task_swap_mach_voucher(
+       task_t                  task,
+       ipc_voucher_t           new_voucher,
+       ipc_voucher_t           *in_out_old_voucher)
+{
+       if (TASK_NULL == task)
+               return KERN_INVALID_TASK;
+
+       *in_out_old_voucher = new_voucher;
+       return KERN_SUCCESS;
+}
+
+void task_set_gpu_denied(task_t task, boolean_t denied)
+{
+       task_lock(task);
+
+       if (denied) {
+               task->t_flags |= TF_GPU_DENIED;
+       } else {
+               task->t_flags &= ~TF_GPU_DENIED;
+       }
+
+       task_unlock(task);
+}
+
+boolean_t task_is_gpu_denied(task_t task)
+{
+       /* We don't need the lock to read this flag */
+       return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
+}