X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/15129b1c8dbb3650c63b70adb1cad9af601c6c17..c18c124eaa464aaaa5549e99e5a70fc9cbb50944:/osfmk/kern/task.c

diff --git a/osfmk/kern/task.c b/osfmk/kern/task.c
index aa5d2b360..999dfefcf 100644
--- a/osfmk/kern/task.c
+++ b/osfmk/kern/task.c
@@ -86,9 +86,6 @@
  * Copyright (c) 2005 SPARTA, Inc.
  */
 
-#include <fast_tas.h>
-#include <platforms.h>
-
 #include <mach/mach_types.h>
 #include <mach/boolean.h>
 #include <mach/host_priv.h>
@@ -98,6 +95,7 @@
 #include <mach/task_info.h>
 #include <mach/task_special_ports.h>
 
+#include <ipc/ipc_importance.h>
 #include <ipc/ipc_types.h>
 #include <ipc/ipc_space.h>
 #include <ipc/ipc_entry.h>
@@ -108,6 +106,7 @@
 #include <kern/misc_protos.h>
 #include <kern/task.h>
 #include <kern/thread.h>
+#include <kern/coalition.h>
 #include <kern/zalloc.h>
 #include <kern/kalloc.h>
 #include <kern/processor.h>
@@ -140,14 +139,9 @@
 #include <mach/mach_host_server.h>
 #include <mach/host_security_server.h>
 #include <mach/mach_port_server.h>
-#include <mach/security_server.h>
 
 #include <vm/vm_shared_region.h>
 
-#if CONFIG_MACF_MACH
-#include <security/mac_mach_internal.h>
-#endif
-
 #if CONFIG_COUNTERS
 #include <pmc/pmc.h>
 #endif /* CONFIG_COUNTERS */
@@ -155,6 +149,18 @@
 #include <libkern/OSDebug.h>
 #include <libkern/OSAtomic.h>
 
+#if CONFIG_ATM
+#include <atm/atm_internal.h>
+#endif
+
+#include <kern/sfi.h>
+
+#if KPERF
+extern int kpc_force_all_ctrs(task_t, int);
+#endif
+
+uint32_t qos_override_mode;
+
 task_t			kernel_task;
 zone_t			task_zone;
 lck_attr_t      task_lck_attr;
@@ -171,8 +177,16 @@ zinfo_usage_store_t tasks_tkm_shared;
 expired_task_statistics_t		dead_task_statistics;
 lck_spin_t		dead_task_statistics_lock;
 
-static ledger_template_t task_ledger_template = NULL;
-struct _task_ledger_indices task_ledgers __attribute__((used)) = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
+ledger_template_t task_ledger_template = NULL;
+
+struct _task_ledger_indices task_ledgers __attribute__((used)) =
+	{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	 { 0 /* initialized at runtime */},
+#ifdef CONFIG_BANK
+	 -1, -1,
+#endif
+	};
+
 void init_task_ledgers(void);
 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
@@ -207,6 +221,10 @@ int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which
 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
 
 int max_task_footprint = 0; /* Per-task limit on physical memory consumption */
+#if MACH_ASSERT
+int pmap_ledgers_panic = 1;
+#endif /* MACH_ASSERT */
+
 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
 
 int hwm_user_cores = 0; /* high watermark violations generate user core files */
@@ -220,6 +238,9 @@ extern char	*proc_name_address(struct proc *p);
 extern void	memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb);
 #endif
 #endif
+#if MACH_ASSERT
+extern int pmap_ledgers_panic;
+#endif /* MACH_ASSERT */
 
 /* Forwards */
 
@@ -254,9 +275,9 @@ task_set_64bit(
 		task_t task,
 		boolean_t is64bit)
 {
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
 	thread_t thread;
-#endif /* defined(__i386__) || defined(__x86_64__) */
+#endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
 
 	task_lock(task);
 
@@ -276,13 +297,13 @@ task_set_64bit(
 	 * state with respect to its task's 64-bitness.
 	 */
 
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)	
 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
 		thread_mtx_lock(thread);
 		machine_thread_switch_addrmode(thread);
 		thread_mtx_unlock(thread);
 	}
-#endif /* defined(__i386__) || defined(__x86_64__) */	
+#endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */	
 
 out:
 	task_unlock(task);
@@ -298,6 +319,18 @@ task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size)
 	task_unlock(task);
 }
 
+void
+task_atm_reset(__unused task_t task) {
+
+#if CONFIG_ATM
+	if (task->atm_context != NULL) {
+		 atm_task_descriptor_destroy(task->atm_context);
+		 task->atm_context = NULL;
+	}
+#endif
+
+}
+
 #if TASK_REFERENCE_LEAK_DEBUG
 #include <kern/btlog.h>
 
@@ -365,20 +398,21 @@ task_init(void)
 	zone_change(task_zone, Z_NOENCRYPT, TRUE);
 
 	/*
-	 * Configure per-task memory limit. The boot arg takes precedence over the 
-	 * device tree.
+	 * Configure per-task memory limit.
+	 * The boot-arg is interpreted as Megabytes,
+	 * and takes precedence over the device tree.
+	 * Setting the boot-arg to 0 disables task limits.
 	 */
 	if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint,
 			sizeof (max_task_footprint))) {
-		max_task_footprint = 0;
-	}
-
-	if (max_task_footprint == 0) {
 		/*
 		 * No limit was found in boot-args, so go look in the device tree.
 		 */
 		if (!PE_get_default("kern.max_task_pmem", &max_task_footprint,
 				sizeof(max_task_footprint))) {
+			/*
+			 * No limit was found in device tree.
+			 */
 			max_task_footprint = 0;
 		}
 	}
@@ -398,11 +432,22 @@ task_init(void)
 #endif
 	}
 
+#if MACH_ASSERT
+	PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
+			  sizeof (pmap_ledgers_panic));
+#endif /* MACH_ASSERT */
+
 	if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
 			sizeof (hwm_user_cores))) {
 		hwm_user_cores = 0;
 	}
 
+	if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
+		printf("QOS override mode: 0x%08x\n", qos_override_mode);
+	} else {
+		qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
+	}
+
 	proc_init_cpumon_params();
 
 	if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
@@ -423,7 +468,16 @@ task_init(void)
 		disable_exc_resource = 0;
 	}
 
+/*
+ * If we have coalitions, coalition_init() will call init_task_ledgers() as it
+ * sets up the ledgers for the default coalition. If we don't have coalitions,
+ * then we have to call it now.
+ */
+#if CONFIG_COALITIONS
+	assert(task_ledger_template);
+#else /* CONFIG_COALITIONS */
 	init_task_ledgers();
+#endif /* CONFIG_COALITIONS */
 
 #if TASK_REFERENCE_LEAK_DEBUG
 	simple_lock_init(&task_ref_lock, 0);
@@ -439,15 +493,16 @@ task_init(void)
 	 * Create the kernel task as the first task.
 	 */
 #ifdef __LP64__
-	if (task_create_internal(TASK_NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS)
+	if (task_create_internal(TASK_NULL, COALITION_NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS)
 #else
-	if (task_create_internal(TASK_NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS)
+	if (task_create_internal(TASK_NULL, COALITION_NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS)
 #endif
 		panic("task_init\n");
 
 	vm_map_deallocate(kernel_task->map);
 	kernel_task->map = kernel_map;
 	lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
+
 }
 
 /*
@@ -512,18 +567,28 @@ host_security_create_task_token(
  *
  * phys_footprint
  *   Physical footprint: This is the sum of:
- *     + phys_mem [task's resident memory]
- *     + phys_compressed
- *     + iokit_mem
+ *     + internal
+ *     + internal_compressed
+ *     + iokit_mapped
+ *     - alternate_accounting
  *
- * iokit_mem
- *   IOKit mappings: The total size of all IOKit mappings in this task [regardless of clean/dirty state].
- * 
- * phys_compressed
- *   Physical compressed: Amount of this task's resident memory which is held by the compressor.
+ * internal
+ *   The task's anonymous memory, which on iOS is always resident.
+ *
+ * internal_compressed
+ *   Amount of this task's internal memory which is held by the compressor.
  *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
  *   and could be either decompressed back into memory, or paged out to storage, depending
  *   on our implementation.
+ *
+ * iokit_mapped
+ *   IOKit mappings: The total size of all IOKit mappings in this task, regardless of
+     clean/dirty or internal/external state].
+ *
+ * alternate_accounting
+ *   The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
+ *   are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
+ *   double counting.
  */
 void
 init_task_ledgers(void)
@@ -545,26 +610,94 @@ init_task_ledgers(void)
 	    "bytes");
 	task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
 	    "bytes");
-	task_ledgers.iokit_mem = ledger_entry_add(t, "iokit_mem", "mappings",
+	task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
+	    "bytes");
+	task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
+ 	    "bytes");
+	task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
  	    "bytes");
 	task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
  	    "bytes");
-	task_ledgers.phys_compressed = ledger_entry_add(t, "phys_compressed", "physmem",
+	task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
  	    "bytes");
+	task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
+	task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
+	task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
+	task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
 	task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
  	    "count");
 	task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
  	    "count");
+	
+	sfi_class_id_t class_id, ledger_alias;
+	for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
+		task_ledgers.sfi_wait_times[class_id] = -1;
+	}
+
+	/* don't account for UNSPECIFIED */
+	for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
+		ledger_alias = sfi_get_ledger_alias_for_class(class_id);
+		if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
+			/* Check to see if alias has been registered yet */
+			if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
+				task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
+			} else {
+				/* Otherwise, initialize it first */
+				task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
+			}
+		} else {
+			task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
+		}
 
-	if ((task_ledgers.cpu_time < 0) || (task_ledgers.tkm_private < 0) ||
-	    (task_ledgers.tkm_shared < 0) || (task_ledgers.phys_mem < 0) ||
-	    (task_ledgers.wired_mem < 0) || (task_ledgers.iokit_mem < 0) ||
-	    (task_ledgers.phys_footprint < 0) || (task_ledgers.phys_compressed < 0) ||
-	    (task_ledgers.platform_idle_wakeups < 0) || (task_ledgers.interrupt_wakeups < 0)) {
+		if (task_ledgers.sfi_wait_times[class_id] < 0) {
+			panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
+		}
+	}
+
+#ifdef CONFIG_BANK
+	task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
+	task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
+#endif
+
+	assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
+
+	if ((task_ledgers.cpu_time < 0) ||
+	    (task_ledgers.tkm_private < 0) ||
+	    (task_ledgers.tkm_shared < 0) ||
+	    (task_ledgers.phys_mem < 0) ||
+	    (task_ledgers.wired_mem < 0) ||
+	    (task_ledgers.internal < 0) ||
+	    (task_ledgers.iokit_mapped < 0) ||
+	    (task_ledgers.alternate_accounting < 0) ||
+	    (task_ledgers.phys_footprint < 0) ||
+	    (task_ledgers.internal_compressed < 0) ||
+	    (task_ledgers.purgeable_volatile < 0) ||
+	    (task_ledgers.purgeable_nonvolatile < 0) ||
+	    (task_ledgers.purgeable_volatile_compressed < 0) ||
+	    (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
+	    (task_ledgers.platform_idle_wakeups < 0) ||
+	    (task_ledgers.interrupt_wakeups < 0)
+#ifdef CONFIG_BANK
+	    || (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0)
+#endif
+	    ) {
 		panic("couldn't create entries for task ledger template");
 	}
 
 	ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
+#if MACH_ASSERT
+	if (pmap_ledgers_panic) {
+		ledger_panic_on_negative(t, task_ledgers.phys_footprint);
+		ledger_panic_on_negative(t, task_ledgers.internal);
+		ledger_panic_on_negative(t, task_ledgers.internal_compressed);
+		ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
+		ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
+		ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
+		ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
+		ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
+		ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
+	}
+#endif /* MACH_ASSERT */
 
 #if CONFIG_JETSAM
 	ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
@@ -579,6 +712,7 @@ init_task_ledgers(void)
 kern_return_t
 task_create_internal(
 	task_t		parent_task,
+	coalition_t	parent_coalition __unused,
 	boolean_t	inherit_memory,
 	boolean_t	is_64bit,
 	task_t		*child_task)		/* OUT */
@@ -605,6 +739,10 @@ task_create_internal(
 
 	new_task->ledger = ledger;
 
+#if defined(CONFIG_SCHED_MULTIQ)
+	new_task->sched_group = sched_group_create();
+#endif
+
 	/* if inherit_memory is true, parent_task MUST not be NULL */
 	if (inherit_memory)
 		new_task->map = vm_map_fork(ledger, parent_task->map);
@@ -639,6 +777,13 @@ task_create_internal(
 	new_task->t_flags = 0;
 	new_task->importance = 0;
 
+#if CONFIG_ATM
+	new_task->atm_context = NULL;
+#endif
+#if CONFIG_BANK
+	new_task->bank_context = NULL;
+#endif
+
 	zinfo_task_init(new_task);
 
 #ifdef MACH_BSD
@@ -666,11 +811,6 @@ task_create_internal(
 	queue_init(&new_task->semaphore_list);
 	new_task->semaphores_owned = 0;
 
-#if CONFIG_MACF_MACH
-	new_task->label = labelh_new(1);
-	mac_task_label_init (&new_task->maclabel);
-#endif
-
 	ipc_task_init(new_task, parent_task);
 
 	new_task->total_user_time = 0;
@@ -698,6 +838,10 @@ task_create_internal(
 	new_task->suspends_outstanding = 0;
 #endif
 
+#if HYPERVISOR
+	new_task->hv_task_target = NULL;
+#endif /* HYPERVISOR */
+
 
 	new_task->low_mem_notified_warn = 0;
 	new_task->low_mem_notified_critical = 0;
@@ -705,11 +849,7 @@ task_create_internal(
 	new_task->purged_memory_critical = 0;
 	new_task->mem_notify_reserved = 0;
 #if IMPORTANCE_INHERITANCE
-	new_task->imp_receiver = 0;
-	new_task->imp_donor = 0;
-	new_task->imp_reserved = 0;
-	new_task->task_imp_assertcnt = 0;
-	new_task->task_imp_externcnt = 0;
+	new_task->task_imp_base = NULL;
 #endif /* IMPORTANCE_INHERITANCE */
 
 #if	defined(__x86_64__)	
@@ -743,11 +883,36 @@ task_create_internal(
 		new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
 
 #if IMPORTANCE_INHERITANCE
-		new_task->imp_donor = parent_task->imp_donor;
+		ipc_importance_task_t new_task_imp = IIT_NULL;
+
+		if (task_is_marked_importance_donor(parent_task)) {
+			new_task_imp = ipc_importance_for_task(new_task, FALSE);
+			assert(IIT_NULL != new_task_imp);
+			ipc_importance_task_mark_donor(new_task_imp, TRUE);
+		}
 		/* Embedded doesn't want this to inherit */
-		new_task->imp_receiver = parent_task->imp_receiver;
+		if (task_is_marked_importance_receiver(parent_task)) {
+			if (IIT_NULL == new_task_imp)
+				new_task_imp = ipc_importance_for_task(new_task, FALSE);
+			assert(IIT_NULL != new_task_imp);
+			ipc_importance_task_mark_receiver(new_task_imp, TRUE);
+		}
+		if (task_is_marked_importance_denap_receiver(parent_task)) {
+			if (IIT_NULL == new_task_imp)
+				new_task_imp = ipc_importance_for_task(new_task, FALSE);
+			assert(IIT_NULL != new_task_imp);
+			ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
+		}
+		
+		if (IIT_NULL != new_task_imp) {
+			assert(new_task->task_imp_base == new_task_imp);
+			ipc_importance_task_release(new_task_imp);
+		}
 #endif /* IMPORTANCE_INHERITANCE */
 
+		new_task->priority = BASEPRI_DEFAULT;
+		new_task->max_priority = MAXPRI_USER;
+
 		new_task->requested_policy.t_apptype     = parent_task->requested_policy.t_apptype;
 
 		new_task->requested_policy.int_darwinbg  = parent_task->requested_policy.int_darwinbg;
@@ -758,6 +923,7 @@ task_create_internal(
 		new_task->requested_policy.ext_iopassive = parent_task->requested_policy.ext_iopassive;
 		new_task->requested_policy.bg_iotier     = parent_task->requested_policy.bg_iotier;
 		new_task->requested_policy.terminated    = parent_task->requested_policy.terminated;
+		new_task->requested_policy.t_qos_clamp   = parent_task->requested_policy.t_qos_clamp;
 
 		task_policy_create(new_task, parent_task->requested_policy.t_boosted);
 	} else {
@@ -771,21 +937,42 @@ task_create_internal(
 		new_task->all_image_info_size = (mach_vm_size_t)0;
 
 		new_task->pset_hint = PROCESSOR_SET_NULL;
+
+		if (kernel_task == TASK_NULL) {
+			new_task->priority = BASEPRI_KERNEL;
+			new_task->max_priority = MAXPRI_KERNEL;
+		} else {
+			new_task->priority = BASEPRI_DEFAULT;
+			new_task->max_priority = MAXPRI_USER;
+		}
 	}
 
-	if (kernel_task == TASK_NULL) {
-		new_task->priority = BASEPRI_KERNEL;
-		new_task->max_priority = MAXPRI_KERNEL;
-	} else if (proc_get_effective_task_policy(new_task, TASK_POLICY_LOWPRI_CPU)) {
-		new_task->priority = MAXPRI_THROTTLE;
-		new_task->max_priority = MAXPRI_THROTTLE;
+	new_task->coalition = COALITION_NULL;
+
+#if CONFIG_COALITIONS
+	if (parent_coalition) {
+		coalition_adopt_task(parent_coalition, new_task);
+	} else if (parent_task && parent_task->coalition) {
+		coalition_adopt_task(parent_task->coalition, new_task);
 	} else {
-		new_task->priority = BASEPRI_DEFAULT;
-		new_task->max_priority = MAXPRI_USER;
+		coalition_default_adopt_task(new_task);
+	}
+
+	if (new_task->coalition == COALITION_NULL) {
+		panic("created task is not a member of any coalition");
 	}
+#endif /* CONFIG_COALITIONS */
+
+	/* Allocate I/O Statistics */
+	new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
+	assert(new_task->task_io_stats != NULL);
+	bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
+
+	bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats));
 
 	bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
 	new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0;
+	new_task->task_gpu_ns = 0;
 	lck_mtx_lock(&tasks_threads_lock);
 	queue_enter(&tasks, new_task, task_t, tasks);
 	tasks_count++;
@@ -795,6 +982,9 @@ task_create_internal(
 		new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
 
 	new_task->task_volatile_objects = 0;
+	new_task->task_nonvolatile_objects = 0;
+	new_task->task_purgeable_disowning = FALSE;
+	new_task->task_purgeable_disowned = FALSE;
 
 	ipc_task_enable(new_task);
 
@@ -802,6 +992,8 @@ task_create_internal(
 	return(KERN_SUCCESS);
 }
 
+int task_dropped_imp_count = 0;
+
 /*
  *	task_deallocate:
  *
@@ -812,18 +1004,57 @@ task_deallocate(
 	task_t		task)
 {
 	ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
+	uint32_t refs;
 
 	if (task == TASK_NULL)
 	    return;
 
-	if (task_deallocate_internal(task) > 0)
+	refs = task_deallocate_internal(task);
+
+#if IMPORTANCE_INHERITANCE
+	if (refs > 1)
 		return;
+	
+	if (refs == 1) {
+		/*
+		 * If last ref potentially comes from the task's importance,
+		 * disconnect it.  But more task refs may be added before
+		 * that completes, so wait for the reference to go to zero
+		 * naturually (it may happen on a recursive task_deallocate()
+		 * from the ipc_importance_disconnect_task() call).
+		 */
+		if (IIT_NULL != task->task_imp_base)
+			ipc_importance_disconnect_task(task);
+		return;
+	}
+#else
+	if (refs > 0)
+		return;
+#endif /* IMPORTANCE_INHERITANCE */
 
 	lck_mtx_lock(&tasks_threads_lock);
 	queue_remove(&terminated_tasks, task, task_t, tasks);
 	terminated_tasks_count--;
 	lck_mtx_unlock(&tasks_threads_lock);
 
+	/*
+	 * remove the reference on atm descriptor
+	 */
+	 task_atm_reset(task);
+
+#if CONFIG_BANK
+	/*
+	 * remove the reference on bank context
+	 */
+	if (task->bank_context != NULL) {
+		bank_task_destroy(task->bank_context);
+		task->bank_context = NULL;
+	}
+#endif
+
+	if (task->task_io_stats)
+		kfree(task->task_io_stats, sizeof(struct io_stat_info));
+
 	/*
 	 *	Give the machine dependent code a chance
 	 *	to perform cleanup before ripping apart
@@ -836,6 +1067,26 @@ task_deallocate(
 	if (task->affinity_space)
 		task_affinity_deallocate(task);
 
+#if MACH_ASSERT
+	if (task->ledger != NULL &&
+	    task->map != NULL &&
+	    task->map->pmap != NULL &&
+	    task->map->pmap->ledger != NULL) {
+		assert(task->ledger == task->map->pmap->ledger);
+	}
+#endif /* MACH_ASSERT */
+
+	vm_purgeable_disown(task);
+	assert(task->task_purgeable_disowned);
+	if (task->task_volatile_objects != 0 ||
+	    task->task_nonvolatile_objects != 0) {
+		panic("task_deallocate(%p): "
+		      "volatile_objects=%d nonvolatile_objects=%d\n",
+		      task,
+		      task->task_volatile_objects,
+		      task->task_nonvolatile_objects);
+	}
+
 	vm_map_deallocate(task->map);
 	is_release(task->itk_space);
 
@@ -844,6 +1095,10 @@ task_deallocate(
 	ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
 	                   &platform_idle_wakeups, &debit);
 
+#if defined(CONFIG_SCHED_MULTIQ)
+	sched_group_destroy(task->sched_group);
+#endif
+
 	/* Accumulate statistics for dead tasks */
 	lck_spin_lock(&dead_task_statistics_lock);
 	dead_task_statistics.total_user_time += task->total_user_time;
@@ -858,10 +1113,6 @@ task_deallocate(
 	lck_spin_unlock(&dead_task_statistics_lock);
 	lck_mtx_destroy(&task->lock, &task_lck_grp);
 
-#if CONFIG_MACF_MACH
-	labelh_release(task->label);
-#endif
-
 	if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
 	    &debit)) {
 		OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
@@ -879,15 +1130,14 @@ task_deallocate(
 	btlog_remove_entries_for_element(task_ref_btlog, task);
 #endif
 
-	if (task->task_volatile_objects) {
-		/*
-		 * This task still "owns" some volatile VM objects.
-		 * Disown them now to avoid leaving them pointing back at
-		 * an invalid task.
-		 */
-		vm_purgeable_disown(task);
-		assert(task->task_volatile_objects == 0);
+#if CONFIG_COALITIONS
+	if (!task->coalition) {
+		panic("deallocating task was not a member of any coalition");
 	}
+	coalition_release(task->coalition);
+#endif /* CONFIG_COALITIONS */
+
+	task->coalition = COALITION_NULL;
 
 	zfree(task_zone, task);
 }
@@ -936,6 +1186,30 @@ task_terminate(
 	return (task_terminate_internal(task));
 }
 
+#if MACH_ASSERT
+extern int proc_pid(struct proc *);
+extern void proc_name_kdp(task_t t, char *buf, int size);
+#endif /* MACH_ASSERT */
+
+#define VM_MAP_PARTIAL_REAP 0x54  /* 0x150 */
+static void
+__unused task_partial_reap(task_t task, __unused int pid)
+{
+        unsigned int    reclaimed_resident = 0;
+        unsigned int    reclaimed_compressed = 0;
+	uint64_t        task_page_count;
+
+	task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
+
+	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
+                              pid, task_page_count, 0, 0, 0);
+
+	vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
+
+        KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
+                              pid, reclaimed_resident, reclaimed_compressed, 0, 0);
+}
+
 kern_return_t
 task_terminate_internal(
 	task_t			task)
@@ -943,6 +1217,7 @@ task_terminate_internal(
 	thread_t			thread, self;
 	task_t				self_task;
 	boolean_t			interrupt_save;
+	int				pid = 0;
 
 	assert(task != kernel_task);
 
@@ -979,14 +1254,6 @@ task_terminate_internal(
 		return (KERN_FAILURE);
 	}
 
-#if MACH_ASSERT
-	if (task->suspends_outstanding != 0) {
-		printf("WARNING: %s (%d) exiting with %d outstanding suspensions\n",
-			proc_name_address(task->bsd_info), proc_pid(task->bsd_info),
-			task->suspends_outstanding);
-	}
-#endif
-
 	if (self_task != task)
 		task_unlock(self_task);
 
@@ -1022,8 +1289,22 @@ task_terminate_internal(
 			thread_terminate_internal(thread);
 	}
 
+#ifdef MACH_BSD
+	if (task->bsd_info != NULL) {
+		pid = proc_pid(task->bsd_info);
+	}
+#endif /* MACH_BSD */
+
 	task_unlock(task);
 
+	proc_set_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE,
+	                     TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
+
+        /* Early object reap phase */
+
+// PR-17045188: Revisit implementation
+//        task_partial_reap(task, pid);
+
 
 	/*
 	 *	Destroy all synchronizers owned by the task.
@@ -1035,8 +1316,19 @@ task_terminate_internal(
 	 */
 	ipc_space_terminate(task->itk_space);
 
-	if (vm_map_has_4GB_pagezero(task->map))
-		vm_map_clear_4GB_pagezero(task->map);
+#if 00
+	/* if some ledgers go negative on tear-down again... */
+	ledger_disable_panic_on_negative(task->map->pmap->ledger,
+					 task_ledgers.phys_footprint);
+	ledger_disable_panic_on_negative(task->map->pmap->ledger,
+					 task_ledgers.internal);
+	ledger_disable_panic_on_negative(task->map->pmap->ledger,
+					 task_ledgers.internal_compressed);
+	ledger_disable_panic_on_negative(task->map->pmap->ledger,
+					 task_ledgers.iokit_mapped);
+	ledger_disable_panic_on_negative(task->map->pmap->ledger,
+					 task_ledgers.alternate_accounting);
+#endif
 
 	/*
 	 * If the current thread is a member of the task
@@ -1054,6 +1346,22 @@ task_terminate_internal(
 	/* release our shared region */
 	vm_shared_region_set(task, NULL);
 
+#if MACH_ASSERT
+	/*
+	 * Identify the pmap's process, in case the pmap ledgers drift
+	 * and we have to report it.
+	 */
+	char procname[17];
+	if (task->bsd_info) {
+		pid = proc_pid(task->bsd_info);
+		proc_name_kdp(task, procname, sizeof (procname));
+	} else {
+		pid = 0;
+		strlcpy(procname, "<unknown>", sizeof (procname));
+	}
+	pmap_set_process(task->map->pmap, pid, procname);
+#endif /* MACH_ASSERT */
+
 	lck_mtx_lock(&tasks_threads_lock);
 	queue_remove(&tasks, task, task_t, tasks);
 	queue_enter(&terminated_tasks, task, task_t, tasks);
@@ -1067,6 +1375,19 @@ task_terminate_internal(
 	 */
 	thread_interrupt_level(interrupt_save);
 
+#if KPERF
+	/* force the task to release all ctrs */
+	if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS)
+		kpc_force_all_ctrs(task, 0);
+#endif
+
+#if CONFIG_COALITIONS
+	/*
+	 * Leave our coalition. (drop activation but not reference)
+	 */
+	coalition_remove_task(task);
+#endif
+
 	/*
 	 * Get rid of the task active reference on itself.
 	 */
@@ -2114,6 +2435,24 @@ host_security_set_task_token(
         return(kr);
 }
 
+kern_return_t
+task_send_trace_memory(
+	task_t        target_task,
+	__unused uint32_t pid,
+	__unused uint64_t uniqueid)
+{
+	kern_return_t kr = KERN_INVALID_ARGUMENT;
+	if (target_task == TASK_NULL)
+		return (KERN_INVALID_ARGUMENT);
+
+#if CONFIG_ATM
+	kr = atm_send_proc_inspect_notification(target_task,
+				  pid,
+				  uniqueid);
+
+#endif
+	return (kr);
+}
 /*
  * This routine was added, pretty much exclusively, for registering the
  * RPC glue vector for in-kernel short circuited tasks.  Rather than
@@ -2133,6 +2472,25 @@ task_set_info(
 		return(KERN_INVALID_ARGUMENT);
 
 	switch (flavor) {
+
+#if CONFIG_ATM
+		case TASK_TRACE_MEMORY_INFO:
+		{
+			if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
+				return (KERN_INVALID_ARGUMENT);
+			
+			assert(task_info_in != NULL);
+			task_trace_memory_info_t mem_info;
+			mem_info = (task_trace_memory_info_t) task_info_in;
+			kern_return_t kr = atm_register_trace_memory(task,
+						mem_info->user_memory_address,
+						mem_info->buffer_size,
+						mem_info->mailbox_array_size);
+			return kr;
+			break;
+		}
+
+#endif
 	    default:
 		return (KERN_INVALID_ARGUMENT);
 	}
@@ -2650,7 +3008,18 @@ task_info(
 			break;
 		}
 
-		task_power_info_locked(task, (task_power_info_t)task_info_out);
+		task_power_info_locked(task, (task_power_info_t)task_info_out, NULL);
+		break;
+	}
+
+	case TASK_POWER_INFO_V2:
+	{
+		if (*task_info_count < TASK_POWER_INFO_V2_COUNT) {
+			error = KERN_INVALID_ARGUMENT;
+			break;
+		}
+		task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
+		task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy);
 		break;
 	}
 
@@ -2748,6 +3117,49 @@ task_info(
 		break;
 	}
 
+	case TASK_WAIT_STATE_INFO:
+	{
+		/* 
+		 * Deprecated flavor. Currently allowing some results until all users 
+		 * stop calling it. The results may not be accurate.
+         */
+		task_wait_state_info_t	wait_state_info;
+		uint64_t total_sfi_ledger_val = 0;
+
+		if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
+		   error = KERN_INVALID_ARGUMENT;
+		   break;
+		}
+
+		wait_state_info = (task_wait_state_info_t) task_info_out;
+
+		wait_state_info->total_wait_state_time = 0;
+		bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
+
+		int i, prev_lentry = -1;
+		int64_t  val_credit, val_debit;
+
+		for (i = 0; i < MAX_SFI_CLASS_ID; i++){
+			val_credit =0;
+			/*
+			 * checking with prev_lentry != entry ensures adjacent classes 
+			 * which share the same ledger do not add wait times twice.
+			 * Note: Use ledger() call to get data for each individual sfi class.
+			 */
+			if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
+				KERN_SUCCESS == ledger_get_entries(task->ledger, 
+				                task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
+				total_sfi_ledger_val += val_credit;
+			}
+			prev_lentry = task_ledgers.sfi_wait_times[i];
+		}
+
+		wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val; 
+		*task_info_count = TASK_WAIT_STATE_INFO_COUNT;
+
+		break;
+	}
+
 	default:
 		error = KERN_INVALID_ARGUMENT;
 	}
@@ -2765,7 +3177,8 @@ task_info(
 void
 task_power_info_locked(
 	task_t			task,
-	task_power_info_t	info)
+	task_power_info_t	info,
+	gpu_energy_data_t	ginfo)
 {
 	thread_t		thread;
 	ledger_amount_t		tmp;
@@ -2783,6 +3196,10 @@ task_power_info_locked(
 	info->total_user = task->total_user_time;
 	info->total_system = task->total_system_time;
 
+	if (ginfo) {
+		ginfo->task_gpu_utilisation = task->task_gpu_ns;
+	}
+
 	queue_iterate(&task->threads, thread, thread_t, task_threads) {
 		uint64_t	tval;
 		spl_t 		x;
@@ -2807,11 +3224,43 @@ task_power_info_locked(
 			info->total_user += tval;
 		}
 
+		if (ginfo) {
+			ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
+		}
 		thread_unlock(thread);
 		splx(x);
 	}
 }
 
+/* 
+ *	task_gpu_utilisation
+ *
+ *	Returns the total gpu time used by the all the threads of the task
+ *  (both dead and alive)
+ */
+uint64_t
+task_gpu_utilisation(
+	task_t	task)
+{
+	uint64_t gpu_time = 0;
+	thread_t thread;
+
+	task_lock(task);
+	gpu_time += task->task_gpu_ns;
+
+	queue_iterate(&task->threads, thread, thread_t, task_threads) {
+		spl_t x;
+		x = splsched();
+		thread_lock(thread);
+		gpu_time += ml_gpu_stat(thread);
+		thread_unlock(thread);
+		splx(x);
+	}
+
+	task_unlock(task);
+	return gpu_time;
+}
+
 kern_return_t
 task_purgable_info(
 	task_t			task,
@@ -3035,26 +3484,6 @@ task_set_policy(
 	return(KERN_FAILURE);
 }
 
-#if	FAST_TAS
-kern_return_t
-task_set_ras_pc(
- 	task_t		task,
- 	vm_offset_t	pc,
- 	vm_offset_t	endpc)
-{
-	extern int fast_tas_debug;
- 
-	if (fast_tas_debug) {
-		printf("task 0x%x: setting fast_tas to [0x%x, 0x%x]\n",
-		       task, pc, endpc);
-	}
-	task_lock(task);
-	task->fast_tas_base = pc;
-	task->fast_tas_end =  endpc;
-	task_unlock(task);
-	return KERN_SUCCESS;
-} 
-#else	/* FAST_TAS */
 kern_return_t
 task_set_ras_pc(
  	__unused task_t	task,
@@ -3063,7 +3492,6 @@ task_set_ras_pc(
 {
 	return KERN_FAILURE;
 }
-#endif	/* FAST_TAS */
 
 void
 task_synchronizer_destroy_all(task_t task)
@@ -3156,6 +3584,15 @@ THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb)
 
 #ifdef MACH_BSD
 	pid = proc_selfpid();
+
+	if (pid == 1) {
+		/*
+		 * Cannot have ReportCrash analyzing
+		 * a suspended initproc.
+		 */
+		return;
+	}
+
 	if (task->bsd_info != NULL)
 		procname = proc_name_address(current_task()->bsd_info);
 #endif
@@ -3199,7 +3636,14 @@ THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb)
 	EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
 	EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
 	EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
+	
+	/*
+	 * Use the _internal_ variant so that no user-space
+	 * process can resume our task from under us.
+	 */
+	task_suspend_internal(task);
 	exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
+	task_resume_internal(task);
 }
 
 /*
@@ -3208,7 +3652,9 @@ THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb)
 void
 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
 {
-	ledger_amount_t max_footprint_mb;
+	ledger_amount_t max_footprint, max_footprint_mb;
+	ledger_amount_t footprint_after_purge;
+	task_t task;
 
 	if (warning == LEDGER_WARNING_DIPPED_BELOW) {
 		/*
@@ -3217,14 +3663,36 @@ task_footprint_exceeded(int warning, __unused const void *param0, __unused const
 		return;
 	}
 
-	ledger_get_limit(current_task()->ledger, task_ledgers.phys_footprint, &max_footprint_mb);
-	max_footprint_mb >>= 20;
+	task = current_task();
+
+	ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
+	max_footprint_mb = max_footprint >> 20;
+
+	/*
+	 * Try and purge all "volatile" memory in that task first.
+	 */
+	(void) task_purge_volatile_memory(task);
+	/* are we still over the limit ? */
+	ledger_get_balance(task->ledger,
+			   task_ledgers.phys_footprint,
+			   &footprint_after_purge);
+	if ((!warning &&
+	     footprint_after_purge <= max_footprint) ||
+	    (warning &&
+	     footprint_after_purge <= ((max_footprint *
+					PHYS_FOOTPRINT_WARNING_LEVEL) / 100))) {
+		/* all better now */
+		ledger_reset_callback_state(task->ledger,
+					    task_ledgers.phys_footprint);
+		return;
+	}
+	/* still over the limit after purging... */
 
 	/*
 	 * If this an actual violation (not a warning),
 	 * generate a non-fatal high watermark EXC_RESOURCE.
 	 */
-	if ((warning == 0) && (current_task()->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) {
+	if ((warning == 0) && (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) {
 		THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE((int)max_footprint_mb);
 	}
 
@@ -3382,68 +3850,6 @@ task_findtid(task_t task, uint64_t tid)
 	return(THREAD_NULL);
 }
 
-
-#if CONFIG_MACF_MACH
-/*
- * Protect 2 task labels against modification by adding a reference on
- * both label handles. The locks do not actually have to be held while
- * using the labels as only labels with one reference can be modified
- * in place.
- */
-
-void
-tasklabel_lock2(
-	task_t a,
-	task_t b)
-{
-	labelh_reference(a->label);
-	labelh_reference(b->label);
-}
-
-void
-tasklabel_unlock2(
-	task_t a,
-	task_t b)
-{
-	labelh_release(a->label);
-	labelh_release(b->label);
-}
-
-void
-mac_task_label_update_internal(
-	struct label	*pl,
-	struct task	*task)
-{
-
-	tasklabel_lock(task);
-	task->label = labelh_modify(task->label);
-	mac_task_label_update(pl, &task->maclabel);
-	tasklabel_unlock(task);
-	ip_lock(task->itk_self);
-	mac_port_label_update_cred(pl, &task->itk_self->ip_label);
-	ip_unlock(task->itk_self);
-}
-
-void
-mac_task_label_modify(
-	struct task	*task,
-	void		*arg,
-	void (*f)	(struct label *l, void *arg))
-{
-
-	tasklabel_lock(task);
-	task->label = labelh_modify(task->label);
-	(*f)(&task->maclabel, arg);
-	tasklabel_unlock(task);
-}
-
-struct label *
-mac_task_get_label(struct task *task)
-{
-	return (&task->maclabel);
-}
-#endif
-
 /*
  * Control the CPU usage monitor for a task.
  */
@@ -3624,3 +4030,91 @@ THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void)
 		task_terminate_internal(task);
 	}
 }
+
+kern_return_t
+task_purge_volatile_memory(
+	task_t	task)
+{
+	vm_map_t	map;
+	int		num_object_purged;
+
+	if (task == TASK_NULL)
+		return KERN_INVALID_TASK;
+
+	task_lock(task);
+
+	if (!task->active) {
+		task_unlock(task);
+		return KERN_INVALID_TASK;
+	}
+	map = task->map;
+	if (map == VM_MAP_NULL) {
+		task_unlock(task);
+		return KERN_INVALID_TASK;
+	}
+	vm_map_reference(task->map);
+
+	task_unlock(task);
+
+	num_object_purged = vm_map_purge(map);
+	vm_map_deallocate(map);
+
+	return KERN_SUCCESS;
+}
+
+/* Placeholders for the task set/get voucher interfaces */
+kern_return_t 
+task_get_mach_voucher(
+	task_t			task,
+	mach_voucher_selector_t __unused which,
+	ipc_voucher_t		*voucher)
+{
+	if (TASK_NULL == task)
+		return KERN_INVALID_TASK;
+
+	*voucher = NULL;
+	return KERN_SUCCESS;
+}
+
+kern_return_t 
+task_set_mach_voucher(
+	task_t			task,
+	ipc_voucher_t		__unused voucher)
+{
+	if (TASK_NULL == task)
+		return KERN_INVALID_TASK;
+
+	return KERN_SUCCESS;
+}
+
+kern_return_t
+task_swap_mach_voucher(
+	task_t			task,
+	ipc_voucher_t		new_voucher,
+	ipc_voucher_t		*in_out_old_voucher)
+{
+	if (TASK_NULL == task)
+		return KERN_INVALID_TASK;
+
+	*in_out_old_voucher = new_voucher;
+	return KERN_SUCCESS;
+}
+
+void task_set_gpu_denied(task_t task, boolean_t denied)
+{
+	task_lock(task);
+
+	if (denied) {
+		task->t_flags |= TF_GPU_DENIED;
+	} else {
+		task->t_flags &= ~TF_GPU_DENIED;
+	}
+
+	task_unlock(task);
+}
+
+boolean_t task_is_gpu_denied(task_t task)
+{
+	/* We don't need the lock to read this flag */
+	return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
+}