osfmk/kern/task.c

   1 /*
   2  * Copyright (c) 2000-2010, 2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  *      File:   kern/task.c
  58  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
  59  *              David Black
  60  *
  61  *      Task management primitives implementation.
  62  */
  63 /*
  64  * Copyright (c) 1993 The University of Utah and
  65  * the Computer Systems Laboratory (CSL).  All rights reserved.
  66  *
  67  * Permission to use, copy, modify and distribute this software and its
  68  * documentation is hereby granted, provided that both the copyright
  69  * notice and this permission notice appear in all copies of the
  70  * software, derivative works or modified versions, and any portions
  71  * thereof, and that both notices appear in supporting documentation.
  72  *
  73  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  74  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  75  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  76  *
  77  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  78  * improvements that they make and grant CSL redistribution rights.
  79  *
  80  */
  81 /*
  82  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
  83  * support for mandatory and extensible security protections.  This notice
  84  * is included in support of clause 2.2 (b) of the Apple Public License,
  85  * Version 2.0.
  86  * Copyright (c) 2005 SPARTA, Inc.
  87  */
  88
  89 #include <mach/mach_types.h>
  90 #include <mach/boolean.h>
  91 #include <mach/host_priv.h>
  92 #include <mach/machine/vm_types.h>
  93 #include <mach/vm_param.h>
  94 #include <mach/mach_vm.h>
  95 #include <mach/semaphore.h>
  96 #include <mach/task_info.h>
  97 #include <mach/task_special_ports.h>
  98
  99 #include <ipc/ipc_importance.h>
 100 #include <ipc/ipc_types.h>
 101 #include <ipc/ipc_space.h>
 102 #include <ipc/ipc_entry.h>
 103 #include <ipc/ipc_hash.h>
 104
 105 #include <kern/kern_types.h>
 106 #include <kern/mach_param.h>
 107 #include <kern/misc_protos.h>
 108 #include <kern/task.h>
 109 #include <kern/thread.h>
 110 #include <kern/coalition.h>
 111 #include <kern/zalloc.h>
 112 #include <kern/kalloc.h>
 113 #include <kern/kern_cdata.h>
 114 #include <kern/processor.h>
 115 #include <kern/sched_prim.h>    /* for thread_wakeup */
 116 #include <kern/ipc_tt.h>
 117 #include <kern/host.h>
 118 #include <kern/clock.h>
 119 #include <kern/timer.h>
 120 #include <kern/assert.h>
 121 #include <kern/sync_lock.h>
 122 #include <kern/affinity.h>
 123 #include <kern/exc_resource.h>
 124 #include <kern/machine.h>
 125 #include <corpses/task_corpse.h>
 126 #if CONFIG_TELEMETRY
 127 #include <kern/telemetry.h>
 128 #endif
 129
 130 #include <vm/pmap.h>
 131 #include <vm/vm_map.h>
 132 #include <vm/vm_kern.h>         /* for kernel_map, ipc_kernel_map */
 133 #include <vm/vm_pageout.h>
 134 #include <vm/vm_protos.h>
 135 #include <vm/vm_purgeable_internal.h>
 136
 137 #include <sys/resource.h>
 138 #include <sys/signalvar.h> /* for coredump */
 139
 140 /*
 141  * Exported interfaces
 142  */
 143
 144 #include <mach/task_server.h>
 145 #include <mach/mach_host_server.h>
 146 #include <mach/host_security_server.h>
 147 #include <mach/mach_port_server.h>
 148
 149 #include <vm/vm_shared_region.h>
 150
 151 #include <libkern/OSDebug.h>
 152 #include <libkern/OSAtomic.h>
 153
 154 #if CONFIG_ATM
 155 #include <atm/atm_internal.h>
 156 #endif
 157
 158 #include <kern/sfi.h>
 159
 160 #if KPERF
 161 extern int kpc_force_all_ctrs(task_t, int);
 162 #endif
 163
 164 uint32_t qos_override_mode;
 165
 166 task_t                  kernel_task;
 167 zone_t                  task_zone;
 168 lck_attr_t      task_lck_attr;
 169 lck_grp_t       task_lck_grp;
 170 lck_grp_attr_t  task_lck_grp_attr;
 171
 172 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
 173 int audio_active = 0;
 174
 175 zinfo_usage_store_t tasks_tkm_private;
 176 zinfo_usage_store_t tasks_tkm_shared;
 177
 178 /* A container to accumulate statistics for expired tasks */
 179 expired_task_statistics_t               dead_task_statistics;
 180 lck_spin_t              dead_task_statistics_lock;
 181
 182 ledger_template_t task_ledger_template = NULL;
 183
 184 struct _task_ledger_indices task_ledgers __attribute__((used)) =
 185         {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
 186          { 0 /* initialized at runtime */},
 187 #ifdef CONFIG_BANK
 188          -1, -1,
 189 #endif
 190         };
 191
 192 /* System sleep state */
 193 boolean_t tasks_suspend_state;
 194
 195
 196 void init_task_ledgers(void);
 197 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 198 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 199 void __attribute__((noinline)) THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void);
 200 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb);
 201
 202 kern_return_t task_suspend_internal(task_t);
 203 kern_return_t task_resume_internal(task_t);
 204 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
 205
 206
 207 void proc_init_cpumon_params(void);
 208 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
 209
 210 // Warn tasks when they hit 80% of their memory limit.
 211 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
 212
 213 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT              150 /* wakeups per second */
 214 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL   300 /* in seconds. */
 215
 216 /*
 217  * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
 218  *
 219  * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
 220  *  stacktraces, aka micro-stackshots)
 221  */
 222 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER        70
 223
 224 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
 225 int task_wakeups_monitor_rate;     /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
 226
 227 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 228
 229 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
 230
 231 ledger_amount_t max_task_footprint = 0;  /* Per-task limit on physical memory consumption in bytes     */
 232 int max_task_footprint_mb = 0;  /* Per-task limit on physical memory consumption in megabytes */
 233
 234 #if MACH_ASSERT
 235 int pmap_ledgers_panic = 1;
 236 #endif /* MACH_ASSERT */
 237
 238 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
 239
 240 int hwm_user_cores = 0; /* high watermark violations generate user core files */
 241
 242 #ifdef MACH_BSD
 243 extern void     proc_getexecutableuuid(void *, unsigned char *, unsigned long);
 244 extern int      proc_pid(struct proc *p);
 245 extern int      proc_selfpid(void);
 246 extern char     *proc_name_address(struct proc *p);
 247 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 248 #if CONFIG_JETSAM
 249 extern void     proc_memstat_terminated(struct proc* p, boolean_t set);
 250 extern void     memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb);
 251 #endif
 252 #endif
 253 #if MACH_ASSERT
 254 extern int pmap_ledgers_panic;
 255 #endif /* MACH_ASSERT */
 256
 257 /* Forwards */
 258
 259 void            task_hold_locked(
 260                         task_t          task);
 261 void            task_wait_locked(
 262                         task_t          task,
 263                         boolean_t       until_not_runnable);
 264 void            task_release_locked(
 265                         task_t          task);
 266 void            task_free(
 267                         task_t          task );
 268 void            task_synchronizer_destroy_all(
 269                         task_t          task);
 270
 271 int check_for_tasksuspend(
 272                         task_t task);
 273
 274 void
 275 task_backing_store_privileged(
 276                         task_t task)
 277 {
 278         task_lock(task);
 279         task->priv_flags |= VM_BACKING_STORE_PRIV;
 280         task_unlock(task);
 281         return;
 282 }
 283
 284
 285 void
 286 task_set_64bit(
 287                 task_t task,
 288                 boolean_t is64bit)
 289 {
 290 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
 291         thread_t thread;
 292 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
 293
 294         task_lock(task);
 295
 296         if (is64bit) {
 297                 if (task_has_64BitAddr(task))
 298                         goto out;
 299                 task_set_64BitAddr(task);
 300         } else {
 301                 if ( !task_has_64BitAddr(task))
 302                         goto out;
 303                 task_clear_64BitAddr(task);
 304         }
 305         /* FIXME: On x86, the thread save state flavor can diverge from the
 306          * task's 64-bit feature flag due to the 32-bit/64-bit register save
 307          * state dichotomy. Since we can be pre-empted in this interval,
 308          * certain routines may observe the thread as being in an inconsistent
 309          * state with respect to its task's 64-bitness.
 310          */
 311
 312 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
 313         queue_iterate(&task->threads, thread, thread_t, task_threads) {
 314                 thread_mtx_lock(thread);
 315                 machine_thread_switch_addrmode(thread);
 316                 thread_mtx_unlock(thread);
 317         }
 318 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
 319
 320 out:
 321         task_unlock(task);
 322 }
 323
 324
 325 void
 326 task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size)
 327 {
 328         task_lock(task);
 329         task->all_image_info_addr = addr;
 330         task->all_image_info_size = size;
 331         task_unlock(task);
 332 }
 333
 334 void
 335 task_atm_reset(__unused task_t task) {
 336
 337 #if CONFIG_ATM
 338         if (task->atm_context != NULL) {
 339                  atm_task_descriptor_destroy(task->atm_context);
 340                  task->atm_context = NULL;
 341         }
 342 #endif
 343
 344 }
 345
 346 #if TASK_REFERENCE_LEAK_DEBUG
 347 #include <kern/btlog.h>
 348
 349 decl_simple_lock_data(static,task_ref_lock);
 350 static btlog_t *task_ref_btlog;
 351 #define TASK_REF_OP_INCR        0x1
 352 #define TASK_REF_OP_DECR        0x2
 353
 354 #define TASK_REF_BTDEPTH        7
 355
 356 static void
 357 task_ref_lock_lock(void *context)
 358 {
 359         simple_lock((simple_lock_t)context);
 360 }
 361 static void
 362 task_ref_lock_unlock(void *context)
 363 {
 364         simple_unlock((simple_lock_t)context);
 365 }
 366
 367 void
 368 task_reference_internal(task_t task)
 369 {
 370         void *       bt[TASK_REF_BTDEPTH];
 371         int             numsaved = 0;
 372
 373         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 374
 375         (void)hw_atomic_add(&(task)->ref_count, 1);
 376         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
 377                                         bt, numsaved);
 378 }
 379
 380 uint32_t
 381 task_deallocate_internal(task_t task)
 382 {
 383         void *       bt[TASK_REF_BTDEPTH];
 384         int             numsaved = 0;
 385
 386         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 387
 388         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
 389                                         bt, numsaved);
 390         return hw_atomic_sub(&(task)->ref_count, 1);
 391 }
 392
 393 #endif /* TASK_REFERENCE_LEAK_DEBUG */
 394
 395 void
 396 task_init(void)
 397 {
 398
 399         lck_grp_attr_setdefault(&task_lck_grp_attr);
 400         lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
 401         lck_attr_setdefault(&task_lck_attr);
 402         lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
 403
 404         task_zone = zinit(
 405                         sizeof(struct task),
 406                         task_max * sizeof(struct task),
 407                         TASK_CHUNK * sizeof(struct task),
 408                         "tasks");
 409
 410         zone_change(task_zone, Z_NOENCRYPT, TRUE);
 411
 412         /*
 413          * Configure per-task memory limit.
 414          * The boot-arg is interpreted as Megabytes,
 415          * and takes precedence over the device tree.
 416          * Setting the boot-arg to 0 disables task limits.
 417          */
 418         if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
 419                         sizeof (max_task_footprint_mb))) {
 420                 /*
 421                  * No limit was found in boot-args, so go look in the device tree.
 422                  */
 423                 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
 424                                 sizeof(max_task_footprint_mb))) {
 425                         /*
 426                          * No limit was found in device tree.
 427                          */
 428                         max_task_footprint_mb = 0;
 429                 }
 430         }
 431
 432         if (max_task_footprint_mb != 0) {
 433 #if CONFIG_JETSAM
 434                 if (max_task_footprint_mb < 50) {
 435                                 printf("Warning: max_task_pmem %d below minimum.\n",
 436                                 max_task_footprint_mb);
 437                                 max_task_footprint_mb = 50;
 438                 }
 439                 printf("Limiting task physical memory footprint to %d MB\n",
 440                         max_task_footprint_mb);
 441
 442                 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
 443 #else
 444                 printf("Warning: max_task_footprint specified, but jetsam not configured; ignoring.\n");
 445 #endif
 446         }
 447
 448 #if MACH_ASSERT
 449         PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
 450                           sizeof (pmap_ledgers_panic));
 451 #endif /* MACH_ASSERT */
 452
 453         if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
 454                         sizeof (hwm_user_cores))) {
 455                 hwm_user_cores = 0;
 456         }
 457
 458         if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
 459                 printf("QOS override mode: 0x%08x\n", qos_override_mode);
 460         } else {
 461                 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
 462         }
 463
 464         proc_init_cpumon_params();
 465
 466         if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
 467                 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
 468         }
 469
 470         if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
 471                 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
 472         }
 473
 474         if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
 475                 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
 476                 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
 477         }
 478
 479         if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
 480                 sizeof (disable_exc_resource))) {
 481                 disable_exc_resource = 0;
 482         }
 483
 484 /*
 485  * If we have coalitions, coalition_init() will call init_task_ledgers() as it
 486  * sets up the ledgers for the default coalition. If we don't have coalitions,
 487  * then we have to call it now.
 488  */
 489 #if CONFIG_COALITIONS
 490         assert(task_ledger_template);
 491 #else /* CONFIG_COALITIONS */
 492         init_task_ledgers();
 493 #endif /* CONFIG_COALITIONS */
 494
 495 #if TASK_REFERENCE_LEAK_DEBUG
 496         simple_lock_init(&task_ref_lock, 0);
 497         task_ref_btlog = btlog_create(100000,
 498                                                                   TASK_REF_BTDEPTH,
 499                                                                   task_ref_lock_lock,
 500                                                                   task_ref_lock_unlock,
 501                                                                   &task_ref_lock);
 502         assert(task_ref_btlog);
 503 #endif
 504
 505         /*
 506          * Create the kernel task as the first task.
 507          */
 508 #ifdef __LP64__
 509         if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS)
 510 #else
 511         if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS)
 512 #endif
 513                 panic("task_init\n");
 514
 515         vm_map_deallocate(kernel_task->map);
 516         kernel_task->map = kernel_map;
 517         lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
 518
 519 }
 520
 521 /*
 522  * Create a task running in the kernel address space.  It may
 523  * have its own map of size mem_size and may have ipc privileges.
 524  */
 525 kern_return_t
 526 kernel_task_create(
 527         __unused task_t         parent_task,
 528         __unused vm_offset_t            map_base,
 529         __unused vm_size_t              map_size,
 530         __unused task_t         *child_task)
 531 {
 532         return (KERN_INVALID_ARGUMENT);
 533 }
 534
 535 kern_return_t
 536 task_create(
 537         task_t                          parent_task,
 538         __unused ledger_port_array_t    ledger_ports,
 539         __unused mach_msg_type_number_t num_ledger_ports,
 540         __unused boolean_t              inherit_memory,
 541         __unused task_t                 *child_task)    /* OUT */
 542 {
 543         if (parent_task == TASK_NULL)
 544                 return(KERN_INVALID_ARGUMENT);
 545
 546         /*
 547          * No longer supported: too many calls assume that a task has a valid
 548          * process attached.
 549          */
 550         return(KERN_FAILURE);
 551 }
 552
 553 kern_return_t
 554 host_security_create_task_token(
 555         host_security_t                 host_security,
 556         task_t                          parent_task,
 557         __unused security_token_t       sec_token,
 558         __unused audit_token_t          audit_token,
 559         __unused host_priv_t            host_priv,
 560         __unused ledger_port_array_t    ledger_ports,
 561         __unused mach_msg_type_number_t num_ledger_ports,
 562         __unused boolean_t              inherit_memory,
 563         __unused task_t                 *child_task)    /* OUT */
 564 {
 565         if (parent_task == TASK_NULL)
 566                 return(KERN_INVALID_ARGUMENT);
 567
 568         if (host_security == HOST_NULL)
 569                 return(KERN_INVALID_SECURITY);
 570
 571         /*
 572          * No longer supported.
 573          */
 574         return(KERN_FAILURE);
 575 }
 576
 577 /*
 578  * Task ledgers
 579  * ------------
 580  *
 581  * phys_footprint
 582  *   Physical footprint: This is the sum of:
 583  *     + (internal - alternate_accounting)
 584  *     + (internal_compressed - alternate_accounting_compressed)
 585  *     + iokit_mapped
 586  *     + purgeable_nonvolatile
 587  *     + purgeable_nonvolatile_compressed
 588  *
 589  * internal
 590  *   The task's anonymous memory, which on iOS is always resident.
 591  *
 592  * internal_compressed
 593  *   Amount of this task's internal memory which is held by the compressor.
 594  *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
 595  *   and could be either decompressed back into memory, or paged out to storage, depending
 596  *   on our implementation.
 597  *
 598  * iokit_mapped
 599  *   IOKit mappings: The total size of all IOKit mappings in this task, regardless of
 600      clean/dirty or internal/external state].
 601  *
 602  * alternate_accounting
 603  *   The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
 604  *   are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
 605  *   double counting.
 606  */
 607 void
 608 init_task_ledgers(void)
 609 {
 610         ledger_template_t t;
 611
 612         assert(task_ledger_template == NULL);
 613         assert(kernel_task == TASK_NULL);
 614
 615         if ((t = ledger_template_create("Per-task ledger")) == NULL)
 616                 panic("couldn't create task ledger template");
 617
 618         task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
 619         task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
 620             "physmem", "bytes");
 621         task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
 622             "bytes");
 623         task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
 624             "bytes");
 625         task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
 626             "bytes");
 627         task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
 628             "bytes");
 629         task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
 630             "bytes");
 631         task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
 632             "bytes");
 633         task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
 634             "bytes");
 635         task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
 636             "bytes");
 637         task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
 638             "bytes");
 639         task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
 640         task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
 641         task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
 642         task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
 643         task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
 644             "count");
 645         task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
 646             "count");
 647
 648 #if CONFIG_SCHED_SFI
 649         sfi_class_id_t class_id, ledger_alias;
 650         for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
 651                 task_ledgers.sfi_wait_times[class_id] = -1;
 652         }
 653
 654         /* don't account for UNSPECIFIED */
 655         for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
 656                 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
 657                 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
 658                         /* Check to see if alias has been registered yet */
 659                         if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
 660                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
 661                         } else {
 662                                 /* Otherwise, initialize it first */
 663                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
 664                         }
 665                 } else {
 666                         task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
 667                 }
 668
 669                 if (task_ledgers.sfi_wait_times[class_id] < 0) {
 670                         panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
 671                 }
 672         }
 673
 674         assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
 675 #endif /* CONFIG_SCHED_SFI */
 676
 677 #ifdef CONFIG_BANK
 678         task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
 679         task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
 680 #endif
 681         if ((task_ledgers.cpu_time < 0) ||
 682             (task_ledgers.tkm_private < 0) ||
 683             (task_ledgers.tkm_shared < 0) ||
 684             (task_ledgers.phys_mem < 0) ||
 685             (task_ledgers.wired_mem < 0) ||
 686             (task_ledgers.internal < 0) ||
 687             (task_ledgers.iokit_mapped < 0) ||
 688             (task_ledgers.alternate_accounting < 0) ||
 689             (task_ledgers.alternate_accounting_compressed < 0) ||
 690             (task_ledgers.phys_footprint < 0) ||
 691             (task_ledgers.internal_compressed < 0) ||
 692             (task_ledgers.purgeable_volatile < 0) ||
 693             (task_ledgers.purgeable_nonvolatile < 0) ||
 694             (task_ledgers.purgeable_volatile_compressed < 0) ||
 695             (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
 696             (task_ledgers.platform_idle_wakeups < 0) ||
 697             (task_ledgers.interrupt_wakeups < 0)
 698 #ifdef CONFIG_BANK
 699             || (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0)
 700 #endif
 701             ) {
 702                 panic("couldn't create entries for task ledger template");
 703         }
 704
 705         ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
 706 #if MACH_ASSERT
 707         if (pmap_ledgers_panic) {
 708                 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
 709                 ledger_panic_on_negative(t, task_ledgers.internal);
 710                 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
 711                 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
 712                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
 713                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
 714                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
 715                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
 716                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
 717                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
 718         }
 719 #endif /* MACH_ASSERT */
 720
 721 #if CONFIG_JETSAM
 722         ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
 723 #endif
 724
 725         ledger_set_callback(t, task_ledgers.interrupt_wakeups,
 726                 task_wakeups_rate_exceeded, NULL, NULL);
 727
 728         task_ledger_template = t;
 729 }
 730
 731 kern_return_t
 732 task_create_internal(
 733         task_t          parent_task,
 734         coalition_t     *parent_coalitions __unused,
 735         boolean_t       inherit_memory,
 736         boolean_t       is_64bit,
 737         task_t          *child_task)            /* OUT */
 738 {
 739         task_t                  new_task;
 740         vm_shared_region_t      shared_region;
 741         ledger_t                ledger = NULL;
 742
 743         new_task = (task_t) zalloc(task_zone);
 744
 745         if (new_task == TASK_NULL)
 746                 return(KERN_RESOURCE_SHORTAGE);
 747
 748         /* one ref for just being alive; one for our caller */
 749         new_task->ref_count = 2;
 750
 751         /* allocate with active entries */
 752         assert(task_ledger_template != NULL);
 753         if ((ledger = ledger_instantiate(task_ledger_template,
 754                         LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
 755                 zfree(task_zone, new_task);
 756                 return(KERN_RESOURCE_SHORTAGE);
 757         }
 758
 759         new_task->ledger = ledger;
 760
 761 #if defined(CONFIG_SCHED_MULTIQ)
 762         new_task->sched_group = sched_group_create();
 763 #endif
 764
 765         /* if inherit_memory is true, parent_task MUST not be NULL */
 766         if (inherit_memory)
 767                 new_task->map = vm_map_fork(ledger, parent_task->map);
 768         else
 769                 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
 770                                 (vm_map_offset_t)(VM_MIN_ADDRESS),
 771                                 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
 772
 773         /* Inherit memlock limit from parent */
 774         if (parent_task)
 775                 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
 776
 777         lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
 778         queue_init(&new_task->threads);
 779         new_task->suspend_count = 0;
 780         new_task->thread_count = 0;
 781         new_task->active_thread_count = 0;
 782         new_task->user_stop_count = 0;
 783         new_task->legacy_stop_count = 0;
 784         new_task->active = TRUE;
 785         new_task->halting = FALSE;
 786         new_task->user_data = NULL;
 787         new_task->faults = 0;
 788         new_task->cow_faults = 0;
 789         new_task->pageins = 0;
 790         new_task->messages_sent = 0;
 791         new_task->messages_received = 0;
 792         new_task->syscalls_mach = 0;
 793         new_task->priv_flags = 0;
 794         new_task->syscalls_unix=0;
 795         new_task->c_switch = new_task->p_switch = new_task->ps_switch = 0;
 796         new_task->t_flags = 0;
 797         new_task->importance = 0;
 798
 799 #if CONFIG_ATM
 800         new_task->atm_context = NULL;
 801 #endif
 802 #if CONFIG_BANK
 803         new_task->bank_context = NULL;
 804 #endif
 805
 806         zinfo_task_init(new_task);
 807
 808 #ifdef MACH_BSD
 809         new_task->bsd_info = NULL;
 810         new_task->corpse_info = NULL;
 811 #endif /* MACH_BSD */
 812
 813 #if CONFIG_JETSAM
 814         if (max_task_footprint != 0) {
 815                 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
 816         }
 817 #endif
 818
 819         if (task_wakeups_monitor_rate != 0) {
 820                 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
 821                 int32_t  rate; // Ignored because of WAKEMON_SET_DEFAULTS
 822                 task_wakeups_monitor_ctl(new_task, &flags, &rate);
 823         }
 824
 825 #if defined(__i386__) || defined(__x86_64__)
 826         new_task->i386_ldt = 0;
 827 #endif
 828
 829         new_task->task_debug = NULL;
 830
 831         queue_init(&new_task->semaphore_list);
 832         new_task->semaphores_owned = 0;
 833
 834         ipc_task_init(new_task, parent_task);
 835
 836         new_task->total_user_time = 0;
 837         new_task->total_system_time = 0;
 838
 839         new_task->vtimers = 0;
 840
 841         new_task->shared_region = NULL;
 842
 843         new_task->affinity_space = NULL;
 844
 845         new_task->pidsuspended = FALSE;
 846         new_task->frozen = FALSE;
 847         new_task->changing_freeze_state = FALSE;
 848         new_task->rusage_cpu_flags = 0;
 849         new_task->rusage_cpu_percentage = 0;
 850         new_task->rusage_cpu_interval = 0;
 851         new_task->rusage_cpu_deadline = 0;
 852         new_task->rusage_cpu_callt = NULL;
 853 #if MACH_ASSERT
 854         new_task->suspends_outstanding = 0;
 855 #endif
 856
 857 #if HYPERVISOR
 858         new_task->hv_task_target = NULL;
 859 #endif /* HYPERVISOR */
 860
 861
 862         new_task->low_mem_notified_warn = 0;
 863         new_task->low_mem_notified_critical = 0;
 864         new_task->low_mem_privileged_listener = 0;
 865         new_task->purged_memory_warn = 0;
 866         new_task->purged_memory_critical = 0;
 867         new_task->mem_notify_reserved = 0;
 868 #if IMPORTANCE_INHERITANCE
 869         new_task->task_imp_base = NULL;
 870 #endif /* IMPORTANCE_INHERITANCE */
 871
 872 #if     defined(__x86_64__)
 873         new_task->uexc_range_start = new_task->uexc_range_size = new_task->uexc_handler = 0;
 874 #endif
 875
 876         new_task->requested_policy = default_task_requested_policy;
 877         new_task->effective_policy = default_task_effective_policy;
 878         new_task->pended_policy    = default_task_pended_policy;
 879
 880         if (parent_task != TASK_NULL) {
 881                 new_task->sec_token = parent_task->sec_token;
 882                 new_task->audit_token = parent_task->audit_token;
 883
 884                 /* inherit the parent's shared region */
 885                 shared_region = vm_shared_region_get(parent_task);
 886                 vm_shared_region_set(new_task, shared_region);
 887
 888                 if(task_has_64BitAddr(parent_task))
 889                         task_set_64BitAddr(new_task);
 890                 new_task->all_image_info_addr = parent_task->all_image_info_addr;
 891                 new_task->all_image_info_size = parent_task->all_image_info_size;
 892
 893 #if defined(__i386__) || defined(__x86_64__)
 894                 if (inherit_memory && parent_task->i386_ldt)
 895                         new_task->i386_ldt = user_ldt_copy(parent_task->i386_ldt);
 896 #endif
 897                 if (inherit_memory && parent_task->affinity_space)
 898                         task_affinity_create(parent_task, new_task);
 899
 900                 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
 901
 902 #if IMPORTANCE_INHERITANCE
 903                 ipc_importance_task_t new_task_imp = IIT_NULL;
 904
 905                 if (task_is_marked_importance_donor(parent_task)) {
 906                         new_task_imp = ipc_importance_for_task(new_task, FALSE);
 907                         assert(IIT_NULL != new_task_imp);
 908                         ipc_importance_task_mark_donor(new_task_imp, TRUE);
 909                 }
 910                 /* Embedded doesn't want this to inherit */
 911                 if (task_is_marked_importance_receiver(parent_task)) {
 912                         if (IIT_NULL == new_task_imp)
 913                                 new_task_imp = ipc_importance_for_task(new_task, FALSE);
 914                         assert(IIT_NULL != new_task_imp);
 915                         ipc_importance_task_mark_receiver(new_task_imp, TRUE);
 916                 }
 917                 if (task_is_marked_importance_denap_receiver(parent_task)) {
 918                         if (IIT_NULL == new_task_imp)
 919                                 new_task_imp = ipc_importance_for_task(new_task, FALSE);
 920                         assert(IIT_NULL != new_task_imp);
 921                         ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
 922                 }
 923
 924                 if (IIT_NULL != new_task_imp) {
 925                         assert(new_task->task_imp_base == new_task_imp);
 926                         ipc_importance_task_release(new_task_imp);
 927                 }
 928 #endif /* IMPORTANCE_INHERITANCE */
 929
 930                 new_task->priority = BASEPRI_DEFAULT;
 931                 new_task->max_priority = MAXPRI_USER;
 932
 933                 new_task->requested_policy.t_apptype     = parent_task->requested_policy.t_apptype;
 934
 935                 new_task->requested_policy.int_darwinbg  = parent_task->requested_policy.int_darwinbg;
 936                 new_task->requested_policy.ext_darwinbg  = parent_task->requested_policy.ext_darwinbg;
 937                 new_task->requested_policy.int_iotier    = parent_task->requested_policy.int_iotier;
 938                 new_task->requested_policy.ext_iotier    = parent_task->requested_policy.ext_iotier;
 939                 new_task->requested_policy.int_iopassive = parent_task->requested_policy.int_iopassive;
 940                 new_task->requested_policy.ext_iopassive = parent_task->requested_policy.ext_iopassive;
 941                 new_task->requested_policy.bg_iotier     = parent_task->requested_policy.bg_iotier;
 942                 new_task->requested_policy.terminated    = parent_task->requested_policy.terminated;
 943                 new_task->requested_policy.t_qos_clamp   = parent_task->requested_policy.t_qos_clamp;
 944
 945                 task_policy_create(new_task, parent_task->requested_policy.t_boosted);
 946         } else {
 947                 new_task->sec_token = KERNEL_SECURITY_TOKEN;
 948                 new_task->audit_token = KERNEL_AUDIT_TOKEN;
 949 #ifdef __LP64__
 950                 if(is_64bit)
 951                         task_set_64BitAddr(new_task);
 952 #endif
 953                 new_task->all_image_info_addr = (mach_vm_address_t)0;
 954                 new_task->all_image_info_size = (mach_vm_size_t)0;
 955
 956                 new_task->pset_hint = PROCESSOR_SET_NULL;
 957
 958                 if (kernel_task == TASK_NULL) {
 959                         new_task->priority = BASEPRI_KERNEL;
 960                         new_task->max_priority = MAXPRI_KERNEL;
 961                 } else {
 962                         new_task->priority = BASEPRI_DEFAULT;
 963                         new_task->max_priority = MAXPRI_USER;
 964                 }
 965         }
 966
 967         bzero(new_task->coalition, sizeof(new_task->coalition));
 968         for (int i = 0; i < COALITION_NUM_TYPES; i++)
 969                 queue_chain_init(new_task->task_coalition[i]);
 970
 971         /* Allocate I/O Statistics */
 972         new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
 973         assert(new_task->task_io_stats != NULL);
 974         bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
 975         new_task->task_immediate_writes = 0;
 976         new_task->task_deferred_writes = 0;
 977         new_task->task_invalidated_writes = 0;
 978         new_task->task_metadata_writes = 0;
 979
 980         bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats));
 981
 982         bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
 983         new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0;
 984         new_task->task_gpu_ns = 0;
 985
 986 #if CONFIG_COALITIONS
 987
 988         /* TODO: there is no graceful failure path here... */
 989         if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
 990                 coalitions_adopt_task(parent_coalitions, new_task);
 991         } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
 992                 /*
 993                  * all tasks at least have a resource coalition, so
 994                  * if the parent has one then inherit all coalitions
 995                  * the parent is a part of
 996                  */
 997                 coalitions_adopt_task(parent_task->coalition, new_task);
 998         } else {
 999                 /* TODO: assert that new_task will be PID 1 (launchd) */
1000                 coalitions_adopt_init_task(new_task);
1001         }
1002
1003         if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1004                 panic("created task is not a member of a resource coalition");
1005         }
1006 #endif /* CONFIG_COALITIONS */
1007
1008         new_task->dispatchqueue_offset = 0;
1009         if (parent_task != NULL) {
1010                 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1011         }
1012
1013         if (vm_backing_store_low && parent_task != NULL)
1014                 new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
1015
1016         new_task->task_volatile_objects = 0;
1017         new_task->task_nonvolatile_objects = 0;
1018         new_task->task_purgeable_disowning = FALSE;
1019         new_task->task_purgeable_disowned = FALSE;
1020
1021         ipc_task_enable(new_task);
1022
1023         lck_mtx_lock(&tasks_threads_lock);
1024         queue_enter(&tasks, new_task, task_t, tasks);
1025         tasks_count++;
1026         if (tasks_suspend_state) {
1027             task_suspend_internal(new_task);
1028         }
1029         lck_mtx_unlock(&tasks_threads_lock);
1030
1031         *child_task = new_task;
1032         return(KERN_SUCCESS);
1033 }
1034
1035 int task_dropped_imp_count = 0;
1036
1037 /*
1038  *      task_deallocate:
1039  *
1040  *      Drop a reference on a task.
1041  */
1042 void
1043 task_deallocate(
1044         task_t          task)
1045 {
1046         ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1047         uint32_t refs;
1048
1049         if (task == TASK_NULL)
1050             return;
1051
1052         refs = task_deallocate_internal(task);
1053
1054 #if IMPORTANCE_INHERITANCE
1055         if (refs > 1)
1056                 return;
1057
1058         if (refs == 1) {
1059                 /*
1060                  * If last ref potentially comes from the task's importance,
1061                  * disconnect it.  But more task refs may be added before
1062                  * that completes, so wait for the reference to go to zero
1063                  * naturually (it may happen on a recursive task_deallocate()
1064                  * from the ipc_importance_disconnect_task() call).
1065                  */
1066                 if (IIT_NULL != task->task_imp_base)
1067                         ipc_importance_disconnect_task(task);
1068                 return;
1069         }
1070 #else
1071         if (refs > 0)
1072                 return;
1073 #endif /* IMPORTANCE_INHERITANCE */
1074
1075         lck_mtx_lock(&tasks_threads_lock);
1076         queue_remove(&terminated_tasks, task, task_t, tasks);
1077         terminated_tasks_count--;
1078         lck_mtx_unlock(&tasks_threads_lock);
1079
1080         /*
1081          * remove the reference on atm descriptor
1082          */
1083          task_atm_reset(task);
1084
1085 #if CONFIG_BANK
1086         /*
1087          * remove the reference on bank context
1088          */
1089         if (task->bank_context != NULL) {
1090                 bank_task_destroy(task->bank_context);
1091                 task->bank_context = NULL;
1092         }
1093 #endif
1094
1095         if (task->task_io_stats)
1096                 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1097
1098         /*
1099          *      Give the machine dependent code a chance
1100          *      to perform cleanup before ripping apart
1101          *      the task.
1102          */
1103         machine_task_terminate(task);
1104
1105         ipc_task_terminate(task);
1106
1107         if (task->affinity_space)
1108                 task_affinity_deallocate(task);
1109
1110 #if MACH_ASSERT
1111         if (task->ledger != NULL &&
1112             task->map != NULL &&
1113             task->map->pmap != NULL &&
1114             task->map->pmap->ledger != NULL) {
1115                 assert(task->ledger == task->map->pmap->ledger);
1116         }
1117 #endif /* MACH_ASSERT */
1118
1119         vm_purgeable_disown(task);
1120         assert(task->task_purgeable_disowned);
1121         if (task->task_volatile_objects != 0 ||
1122             task->task_nonvolatile_objects != 0) {
1123                 panic("task_deallocate(%p): "
1124                       "volatile_objects=%d nonvolatile_objects=%d\n",
1125                       task,
1126                       task->task_volatile_objects,
1127                       task->task_nonvolatile_objects);
1128         }
1129
1130         vm_map_deallocate(task->map);
1131         is_release(task->itk_space);
1132
1133         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1134                            &interrupt_wakeups, &debit);
1135         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1136                            &platform_idle_wakeups, &debit);
1137
1138 #if defined(CONFIG_SCHED_MULTIQ)
1139         sched_group_destroy(task->sched_group);
1140 #endif
1141
1142         /* Accumulate statistics for dead tasks */
1143         lck_spin_lock(&dead_task_statistics_lock);
1144         dead_task_statistics.total_user_time += task->total_user_time;
1145         dead_task_statistics.total_system_time += task->total_system_time;
1146
1147         dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1148         dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1149
1150         dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1151         dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1152
1153         lck_spin_unlock(&dead_task_statistics_lock);
1154         lck_mtx_destroy(&task->lock, &task_lck_grp);
1155
1156         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1157             &debit)) {
1158                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1159                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1160         }
1161         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1162             &debit)) {
1163                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1164                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1165         }
1166         ledger_dereference(task->ledger);
1167         zinfo_task_free(task);
1168
1169 #if TASK_REFERENCE_LEAK_DEBUG
1170         btlog_remove_entries_for_element(task_ref_btlog, task);
1171 #endif
1172
1173 #if CONFIG_COALITIONS
1174         if (!task->coalition[COALITION_TYPE_RESOURCE])
1175                 panic("deallocating task was not a member of a resource coalition");
1176         task_release_coalitions(task);
1177 #endif /* CONFIG_COALITIONS */
1178
1179         bzero(task->coalition, sizeof(task->coalition));
1180
1181 #if MACH_BSD
1182         /* clean up collected information since last reference to task is gone */
1183         if (task->corpse_info) {
1184                 task_crashinfo_destroy(task->corpse_info);
1185                 task->corpse_info = NULL;
1186         }
1187 #endif
1188
1189         zfree(task_zone, task);
1190 }
1191
1192 /*
1193  *      task_name_deallocate:
1194  *
1195  *      Drop a reference on a task name.
1196  */
1197 void
1198 task_name_deallocate(
1199         task_name_t             task_name)
1200 {
1201         return(task_deallocate((task_t)task_name));
1202 }
1203
1204 /*
1205  *      task_suspension_token_deallocate:
1206  *
1207  *      Drop a reference on a task suspension token.
1208  */
1209 void
1210 task_suspension_token_deallocate(
1211         task_suspension_token_t         token)
1212 {
1213         return(task_deallocate((task_t)token));
1214 }
1215
1216
1217 /*
1218  * task_collect_crash_info:
1219  *
1220  * collect crash info from bsd and mach based data
1221  */
1222 kern_return_t
1223 task_collect_crash_info(task_t task)
1224 {
1225         kern_return_t kr = KERN_SUCCESS;
1226
1227         kcdata_descriptor_t crash_data = NULL;
1228         kcdata_descriptor_t crash_data_release = NULL;
1229         mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1230         mach_vm_offset_t crash_data_user_ptr = 0;
1231
1232         if (!corpses_enabled()) {
1233                 return KERN_NOT_SUPPORTED;
1234         }
1235
1236         task_lock(task);
1237         assert(task->bsd_info != NULL);
1238         if (task->corpse_info == NULL && task->bsd_info != NULL) {
1239                 task_unlock(task);
1240                 /* map crash data memory in task's vm map */
1241                 kr = mach_vm_allocate(task->map, &crash_data_user_ptr, size, (VM_MAKE_TAG(VM_MEMORY_CORPSEINFO) | VM_FLAGS_ANYWHERE));
1242
1243                 if (kr != KERN_SUCCESS)
1244                         goto out_no_lock;
1245
1246                 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_user_ptr, size);
1247                 if (crash_data) {
1248                         task_lock(task);
1249                         crash_data_release = task->corpse_info;
1250                         task->corpse_info = crash_data;
1251                         task_unlock(task);
1252                         kr = KERN_SUCCESS;
1253                 } else {
1254                         /* if failed to create corpse info, free the mapping */
1255                         if (KERN_SUCCESS != mach_vm_deallocate(task->map, crash_data_user_ptr, size)) {
1256                                 printf("mach_vm_deallocate failed to clear corpse_data for pid %d.\n", task_pid(task));
1257                         }
1258                         kr = KERN_FAILURE;
1259                 }
1260
1261                 if (crash_data_release != NULL) {
1262                         task_crashinfo_destroy(crash_data_release);
1263                 }
1264         } else {
1265                 task_unlock(task);
1266         }
1267
1268 out_no_lock:
1269         return kr;
1270 }
1271
1272 /*
1273  * task_deliver_crash_notification:
1274  *
1275  * Makes outcall to registered host port for a corpse.
1276  */
1277 kern_return_t
1278 task_deliver_crash_notification(task_t task)
1279 {
1280         kcdata_descriptor_t crash_info = task->corpse_info;
1281         thread_t th_iter = NULL;
1282         kern_return_t kr = KERN_SUCCESS;
1283         wait_interrupt_t wsave;
1284         mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1285
1286         if (crash_info == NULL)
1287                 return KERN_FAILURE;
1288
1289         code[0] = crash_info->kcd_addr_begin;
1290         code[1] = crash_info->kcd_length;
1291
1292         task_lock(task);
1293         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1294         {
1295                 ipc_thread_reset(th_iter);
1296         }
1297         task_unlock(task);
1298
1299         wsave = thread_interrupt_level(THREAD_UNINT);
1300         kr = exception_triage(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX);
1301         if (kr != KERN_SUCCESS) {
1302                 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
1303         }
1304
1305         /*
1306          * crash reporting is done. Now release threads
1307          * for reaping by thread_terminate_daemon
1308          */
1309         task_lock(task);
1310         assert(task->active_thread_count == 0);
1311         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1312         {
1313                 thread_mtx_lock(th_iter);
1314                 assert(th_iter->inspection == TRUE);
1315                 th_iter->inspection = FALSE;
1316                 /* now that the corpse has been autopsied, dispose of the thread name */
1317                 uthread_cleanup_name(th_iter->uthread);
1318                 thread_mtx_unlock(th_iter);
1319         }
1320
1321         thread_terminate_crashed_threads();
1322         /* remove the pending corpse report flag */
1323         task_clear_corpse_pending_report(task);
1324
1325         task_unlock(task);
1326
1327         (void)thread_interrupt_level(wsave);
1328         task_terminate_internal(task);
1329
1330         return kr;
1331 }
1332
1333 /*
1334  *      task_terminate:
1335  *
1336  *      Terminate the specified task.  See comments on thread_terminate
1337  *      (kern/thread.c) about problems with terminating the "current task."
1338  */
1339
1340 kern_return_t
1341 task_terminate(
1342         task_t          task)
1343 {
1344         if (task == TASK_NULL)
1345                 return (KERN_INVALID_ARGUMENT);
1346
1347         if (task->bsd_info)
1348                 return (KERN_FAILURE);
1349
1350         return (task_terminate_internal(task));
1351 }
1352
1353 #if MACH_ASSERT
1354 extern int proc_pid(struct proc *);
1355 extern void proc_name_kdp(task_t t, char *buf, int size);
1356 #endif /* MACH_ASSERT */
1357
1358 #define VM_MAP_PARTIAL_REAP 0x54  /* 0x150 */
1359 static void
1360 __unused task_partial_reap(task_t task, __unused int pid)
1361 {
1362         unsigned int    reclaimed_resident = 0;
1363         unsigned int    reclaimed_compressed = 0;
1364         uint64_t        task_page_count;
1365
1366         task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1367
1368         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
1369                               pid, task_page_count, 0, 0, 0);
1370
1371         vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1372
1373         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
1374                               pid, reclaimed_resident, reclaimed_compressed, 0, 0);
1375 }
1376
1377 kern_return_t
1378 task_mark_corpse(task_t task)
1379 {
1380         kern_return_t kr = KERN_SUCCESS;
1381         thread_t self_thread;
1382         (void) self_thread;
1383         wait_interrupt_t wsave;
1384
1385         assert(task != kernel_task);
1386         assert(task == current_task());
1387         assert(!task_is_a_corpse(task));
1388
1389         kr = task_collect_crash_info(task);
1390         if (kr != KERN_SUCCESS) {
1391                 return kr;
1392         }
1393
1394         self_thread = current_thread();
1395
1396         wsave = thread_interrupt_level(THREAD_UNINT);
1397         task_lock(task);
1398
1399         task_set_corpse_pending_report(task);
1400         task_set_corpse(task);
1401
1402         kr = task_start_halt_locked(task, TRUE);
1403         assert(kr == KERN_SUCCESS);
1404         ipc_task_reset(task);
1405         ipc_task_enable(task);
1406
1407         task_unlock(task);
1408         /* terminate the ipc space */
1409         ipc_space_terminate(task->itk_space);
1410
1411         task_start_halt(task);
1412         thread_terminate_internal(self_thread);
1413         (void) thread_interrupt_level(wsave);
1414         assert(task->halting == TRUE);
1415         return kr;
1416 }
1417
1418 kern_return_t
1419 task_terminate_internal(
1420         task_t                  task)
1421 {
1422         thread_t                        thread, self;
1423         task_t                          self_task;
1424         boolean_t                       interrupt_save;
1425         int                             pid = 0;
1426
1427         assert(task != kernel_task);
1428
1429         self = current_thread();
1430         self_task = self->task;
1431
1432         /*
1433          *      Get the task locked and make sure that we are not racing
1434          *      with someone else trying to terminate us.
1435          */
1436         if (task == self_task)
1437                 task_lock(task);
1438         else
1439         if (task < self_task) {
1440                 task_lock(task);
1441                 task_lock(self_task);
1442         }
1443         else {
1444                 task_lock(self_task);
1445                 task_lock(task);
1446         }
1447
1448         if (!task->active) {
1449                 /*
1450                  *      Task is already being terminated.
1451                  *      Just return an error. If we are dying, this will
1452                  *      just get us to our AST special handler and that
1453                  *      will get us to finalize the termination of ourselves.
1454                  */
1455                 task_unlock(task);
1456                 if (self_task != task)
1457                         task_unlock(self_task);
1458
1459                 return (KERN_FAILURE);
1460         }
1461
1462         if (task_corpse_pending_report(task)) {
1463                 /*
1464                  *      Task is marked for reporting as corpse.
1465                  *      Just return an error. This will
1466                  *      just get us to our AST special handler and that
1467                  *      will get us to finish the path to death
1468                  */
1469                 task_unlock(task);
1470                 if (self_task != task)
1471                         task_unlock(self_task);
1472
1473                 return (KERN_FAILURE);
1474         }
1475
1476         if (self_task != task)
1477                 task_unlock(self_task);
1478
1479         /*
1480          * Make sure the current thread does not get aborted out of
1481          * the waits inside these operations.
1482          */
1483         interrupt_save = thread_interrupt_level(THREAD_UNINT);
1484
1485         /*
1486          *      Indicate that we want all the threads to stop executing
1487          *      at user space by holding the task (we would have held
1488          *      each thread independently in thread_terminate_internal -
1489          *      but this way we may be more likely to already find it
1490          *      held there).  Mark the task inactive, and prevent
1491          *      further task operations via the task port.
1492          */
1493         task_hold_locked(task);
1494         task->active = FALSE;
1495         ipc_task_disable(task);
1496
1497 #if CONFIG_TELEMETRY
1498         /*
1499          * Notify telemetry that this task is going away.
1500          */
1501         telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
1502 #endif
1503
1504         /*
1505          *      Terminate each thread in the task.
1506          */
1507         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1508                         thread_terminate_internal(thread);
1509         }
1510
1511 #ifdef MACH_BSD
1512         if (task->bsd_info != NULL) {
1513                 pid = proc_pid(task->bsd_info);
1514         }
1515 #endif /* MACH_BSD */
1516
1517         task_unlock(task);
1518
1519         proc_set_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE,
1520                              TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
1521
1522         /* Early object reap phase */
1523
1524 // PR-17045188: Revisit implementation
1525 //        task_partial_reap(task, pid);
1526
1527
1528         /*
1529          *      Destroy all synchronizers owned by the task.
1530          */
1531         task_synchronizer_destroy_all(task);
1532
1533         /*
1534          *      Destroy the IPC space, leaving just a reference for it.
1535          */
1536         ipc_space_terminate(task->itk_space);
1537
1538 #if 00
1539         /* if some ledgers go negative on tear-down again... */
1540         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1541                                          task_ledgers.phys_footprint);
1542         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1543                                          task_ledgers.internal);
1544         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1545                                          task_ledgers.internal_compressed);
1546         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1547                                          task_ledgers.iokit_mapped);
1548         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1549                                          task_ledgers.alternate_accounting);
1550         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1551                                          task_ledgers.alternate_accounting_compressed);
1552 #endif
1553
1554         /*
1555          * If the current thread is a member of the task
1556          * being terminated, then the last reference to
1557          * the task will not be dropped until the thread
1558          * is finally reaped.  To avoid incurring the
1559          * expense of removing the address space regions
1560          * at reap time, we do it explictly here.
1561          */
1562
1563         vm_map_lock(task->map);
1564         vm_map_disable_hole_optimization(task->map);
1565         vm_map_unlock(task->map);
1566
1567         vm_map_remove(task->map,
1568                       task->map->min_offset,
1569                       task->map->max_offset,
1570                       /* no unnesting on final cleanup: */
1571                       VM_MAP_REMOVE_NO_UNNESTING);
1572
1573         /* release our shared region */
1574         vm_shared_region_set(task, NULL);
1575
1576
1577 #if MACH_ASSERT
1578         /*
1579          * Identify the pmap's process, in case the pmap ledgers drift
1580          * and we have to report it.
1581          */
1582         char procname[17];
1583         if (task->bsd_info) {
1584                 pid = proc_pid(task->bsd_info);
1585                 proc_name_kdp(task, procname, sizeof (procname));
1586         } else {
1587                 pid = 0;
1588                 strlcpy(procname, "<unknown>", sizeof (procname));
1589         }
1590         pmap_set_process(task->map->pmap, pid, procname);
1591 #endif /* MACH_ASSERT */
1592
1593         lck_mtx_lock(&tasks_threads_lock);
1594         queue_remove(&tasks, task, task_t, tasks);
1595         queue_enter(&terminated_tasks, task, task_t, tasks);
1596         tasks_count--;
1597         terminated_tasks_count++;
1598         lck_mtx_unlock(&tasks_threads_lock);
1599
1600         /*
1601          * We no longer need to guard against being aborted, so restore
1602          * the previous interruptible state.
1603          */
1604         thread_interrupt_level(interrupt_save);
1605
1606 #if KPERF
1607         /* force the task to release all ctrs */
1608         if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS)
1609                 kpc_force_all_ctrs(task, 0);
1610 #endif
1611
1612 #if CONFIG_COALITIONS
1613         /*
1614          * Leave our coalitions. (drop activation but not reference)
1615          */
1616         coalitions_remove_task(task);
1617 #endif
1618
1619         /*
1620          * Get rid of the task active reference on itself.
1621          */
1622         task_deallocate(task);
1623
1624         return (KERN_SUCCESS);
1625 }
1626
1627 void
1628 tasks_system_suspend(boolean_t suspend)
1629 {
1630         task_t task;
1631
1632         lck_mtx_lock(&tasks_threads_lock);
1633         assert(tasks_suspend_state != suspend);
1634         tasks_suspend_state = suspend;
1635         queue_iterate(&tasks, task, task_t, tasks) {
1636                 if (task == kernel_task) {
1637                         continue;
1638                 }
1639                 suspend ? task_suspend_internal(task) : task_resume_internal(task);
1640         }
1641         lck_mtx_unlock(&tasks_threads_lock);
1642 }
1643
1644 /*
1645  * task_start_halt:
1646  *
1647  *      Shut the current task down (except for the current thread) in
1648  *      preparation for dramatic changes to the task (probably exec).
1649  *      We hold the task and mark all other threads in the task for
1650  *      termination.
1651  */
1652 kern_return_t
1653 task_start_halt(task_t task)
1654 {
1655         kern_return_t kr = KERN_SUCCESS;
1656         task_lock(task);
1657         kr = task_start_halt_locked(task, FALSE);
1658         task_unlock(task);
1659         return kr;
1660 }
1661
1662 static kern_return_t
1663 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
1664 {
1665         thread_t thread, self;
1666         uint64_t dispatchqueue_offset;
1667
1668         assert(task != kernel_task);
1669
1670         self = current_thread();
1671
1672         if (task != self->task)
1673                 return (KERN_INVALID_ARGUMENT);
1674
1675         if (task->halting || !task->active || !self->active) {
1676                 /*
1677                  * Task or current thread is already being terminated.
1678                  * Hurry up and return out of the current kernel context
1679                  * so that we run our AST special handler to terminate
1680                  * ourselves.
1681                  */
1682                 return (KERN_FAILURE);
1683         }
1684
1685         task->halting = TRUE;
1686
1687         /*
1688          * Mark all the threads to keep them from starting any more
1689          * user-level execution.  The thread_terminate_internal code
1690          * would do this on a thread by thread basis anyway, but this
1691          * gives us a better chance of not having to wait there.
1692          */
1693         task_hold_locked(task);
1694         dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
1695
1696         /*
1697          * Terminate all the other threads in the task.
1698          */
1699         queue_iterate(&task->threads, thread, thread_t, task_threads)
1700         {
1701                 if (should_mark_corpse) {
1702                         thread_mtx_lock(thread);
1703                         thread->inspection = TRUE;
1704                         thread_mtx_unlock(thread);
1705                 }
1706                 if (thread != self)
1707                         thread_terminate_internal(thread);
1708         }
1709         task->dispatchqueue_offset = dispatchqueue_offset;
1710
1711         task_release_locked(task);
1712
1713         return KERN_SUCCESS;
1714 }
1715
1716
1717 /*
1718  * task_complete_halt:
1719  *
1720  *      Complete task halt by waiting for threads to terminate, then clean
1721  *      up task resources (VM, port namespace, etc...) and then let the
1722  *      current thread go in the (practically empty) task context.
1723  */
1724 void
1725 task_complete_halt(task_t task)
1726 {
1727         task_lock(task);
1728         assert(task->halting);
1729         assert(task == current_task());
1730
1731         /*
1732          *      Wait for the other threads to get shut down.
1733          *      When the last other thread is reaped, we'll be
1734          *      woken up.
1735          */
1736         if (task->thread_count > 1) {
1737                 assert_wait((event_t)&task->halting, THREAD_UNINT);
1738                 task_unlock(task);
1739                 thread_block(THREAD_CONTINUE_NULL);
1740         } else {
1741                 task_unlock(task);
1742         }
1743
1744         /*
1745          *      Give the machine dependent code a chance
1746          *      to perform cleanup of task-level resources
1747          *      associated with the current thread before
1748          *      ripping apart the task.
1749          */
1750         machine_task_terminate(task);
1751
1752         /*
1753          *      Destroy all synchronizers owned by the task.
1754          */
1755         task_synchronizer_destroy_all(task);
1756
1757         /*
1758          *      Destroy the contents of the IPC space, leaving just
1759          *      a reference for it.
1760          */
1761         ipc_space_clean(task->itk_space);
1762
1763         /*
1764          * Clean out the address space, as we are going to be
1765          * getting a new one.
1766          */
1767         vm_map_remove(task->map, task->map->min_offset,
1768                       task->map->max_offset,
1769                       /* no unnesting on final cleanup: */
1770                       VM_MAP_REMOVE_NO_UNNESTING);
1771
1772         task->halting = FALSE;
1773 }
1774
1775 /*
1776  *      task_hold_locked:
1777  *
1778  *      Suspend execution of the specified task.
1779  *      This is a recursive-style suspension of the task, a count of
1780  *      suspends is maintained.
1781  *
1782  *      CONDITIONS: the task is locked and active.
1783  */
1784 void
1785 task_hold_locked(
1786         register task_t         task)
1787 {
1788         register thread_t       thread;
1789
1790         assert(task->active);
1791
1792         if (task->suspend_count++ > 0)
1793                 return;
1794
1795         /*
1796          *      Iterate through all the threads and hold them.
1797          */
1798         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1799                 thread_mtx_lock(thread);
1800                 thread_hold(thread);
1801                 thread_mtx_unlock(thread);
1802         }
1803 }
1804
1805 /*
1806  *      task_hold:
1807  *
1808  *      Same as the internal routine above, except that is must lock
1809  *      and verify that the task is active.  This differs from task_suspend
1810  *      in that it places a kernel hold on the task rather than just a
1811  *      user-level hold.  This keeps users from over resuming and setting
1812  *      it running out from under the kernel.
1813  *
1814  *      CONDITIONS: the caller holds a reference on the task
1815  */
1816 kern_return_t
1817 task_hold(
1818         register task_t         task)
1819 {
1820         if (task == TASK_NULL)
1821                 return (KERN_INVALID_ARGUMENT);
1822
1823         task_lock(task);
1824
1825         if (!task->active) {
1826                 task_unlock(task);
1827
1828                 return (KERN_FAILURE);
1829         }
1830
1831         task_hold_locked(task);
1832         task_unlock(task);
1833
1834         return (KERN_SUCCESS);
1835 }
1836
1837 kern_return_t
1838 task_wait(
1839                 task_t          task,
1840                 boolean_t       until_not_runnable)
1841 {
1842         if (task == TASK_NULL)
1843                 return (KERN_INVALID_ARGUMENT);
1844
1845         task_lock(task);
1846
1847         if (!task->active) {
1848                 task_unlock(task);
1849
1850                 return (KERN_FAILURE);
1851         }
1852
1853         task_wait_locked(task, until_not_runnable);
1854         task_unlock(task);
1855
1856         return (KERN_SUCCESS);
1857 }
1858
1859 /*
1860  *      task_wait_locked:
1861  *
1862  *      Wait for all threads in task to stop.
1863  *
1864  * Conditions:
1865  *      Called with task locked, active, and held.
1866  */
1867 void
1868 task_wait_locked(
1869         register task_t         task,
1870         boolean_t               until_not_runnable)
1871 {
1872         register thread_t       thread, self;
1873
1874         assert(task->active);
1875         assert(task->suspend_count > 0);
1876
1877         self = current_thread();
1878
1879         /*
1880          *      Iterate through all the threads and wait for them to
1881          *      stop.  Do not wait for the current thread if it is within
1882          *      the task.
1883          */
1884         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1885                 if (thread != self)
1886                         thread_wait(thread, until_not_runnable);
1887         }
1888 }
1889
1890 /*
1891  *      task_release_locked:
1892  *
1893  *      Release a kernel hold on a task.
1894  *
1895  *      CONDITIONS: the task is locked and active
1896  */
1897 void
1898 task_release_locked(
1899         register task_t         task)
1900 {
1901         register thread_t       thread;
1902
1903         assert(task->active);
1904         assert(task->suspend_count > 0);
1905
1906         if (--task->suspend_count > 0)
1907                 return;
1908
1909         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1910                 thread_mtx_lock(thread);
1911                 thread_release(thread);
1912                 thread_mtx_unlock(thread);
1913         }
1914 }
1915
1916 /*
1917  *      task_release:
1918  *
1919  *      Same as the internal routine above, except that it must lock
1920  *      and verify that the task is active.
1921  *
1922  *      CONDITIONS: The caller holds a reference to the task
1923  */
1924 kern_return_t
1925 task_release(
1926         task_t          task)
1927 {
1928         if (task == TASK_NULL)
1929                 return (KERN_INVALID_ARGUMENT);
1930
1931         task_lock(task);
1932
1933         if (!task->active) {
1934                 task_unlock(task);
1935
1936                 return (KERN_FAILURE);
1937         }
1938
1939         task_release_locked(task);
1940         task_unlock(task);
1941
1942         return (KERN_SUCCESS);
1943 }
1944
1945 kern_return_t
1946 task_threads(
1947         task_t                                  task,
1948         thread_act_array_t              *threads_out,
1949         mach_msg_type_number_t  *count)
1950 {
1951         mach_msg_type_number_t  actual;
1952         thread_t                                *thread_list;
1953         thread_t                                thread;
1954         vm_size_t                               size, size_needed;
1955         void                                    *addr;
1956         unsigned int                    i, j;
1957
1958         if (task == TASK_NULL)
1959                 return (KERN_INVALID_ARGUMENT);
1960
1961         size = 0; addr = NULL;
1962
1963         for (;;) {
1964                 task_lock(task);
1965                 if (!task->active) {
1966                         task_unlock(task);
1967
1968                         if (size != 0)
1969                                 kfree(addr, size);
1970
1971                         return (KERN_FAILURE);
1972                 }
1973
1974                 actual = task->thread_count;
1975
1976                 /* do we have the memory we need? */
1977                 size_needed = actual * sizeof (mach_port_t);
1978                 if (size_needed <= size)
1979                         break;
1980
1981                 /* unlock the task and allocate more memory */
1982                 task_unlock(task);
1983
1984                 if (size != 0)
1985                         kfree(addr, size);
1986
1987                 assert(size_needed > 0);
1988                 size = size_needed;
1989
1990                 addr = kalloc(size);
1991                 if (addr == 0)
1992                         return (KERN_RESOURCE_SHORTAGE);
1993         }
1994
1995         /* OK, have memory and the task is locked & active */
1996         thread_list = (thread_t *)addr;
1997
1998         i = j = 0;
1999
2000         for (thread = (thread_t)queue_first(&task->threads); i < actual;
2001                                 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
2002                 thread_reference_internal(thread);
2003                 thread_list[j++] = thread;
2004         }
2005
2006         assert(queue_end(&task->threads, (queue_entry_t)thread));
2007
2008         actual = j;
2009         size_needed = actual * sizeof (mach_port_t);
2010
2011         /* can unlock task now that we've got the thread refs */
2012         task_unlock(task);
2013
2014         if (actual == 0) {
2015                 /* no threads, so return null pointer and deallocate memory */
2016
2017                 *threads_out = NULL;
2018                 *count = 0;
2019
2020                 if (size != 0)
2021                         kfree(addr, size);
2022         }
2023         else {
2024                 /* if we allocated too much, must copy */
2025
2026                 if (size_needed < size) {
2027                         void *newaddr;
2028
2029                         newaddr = kalloc(size_needed);
2030                         if (newaddr == 0) {
2031                                 for (i = 0; i < actual; ++i)
2032                                         thread_deallocate(thread_list[i]);
2033                                 kfree(addr, size);
2034                                 return (KERN_RESOURCE_SHORTAGE);
2035                         }
2036
2037                         bcopy(addr, newaddr, size_needed);
2038                         kfree(addr, size);
2039                         thread_list = (thread_t *)newaddr;
2040                 }
2041
2042                 *threads_out = thread_list;
2043                 *count = actual;
2044
2045                 /* do the conversion that Mig should handle */
2046
2047                 for (i = 0; i < actual; ++i)
2048                         ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
2049         }
2050
2051         return (KERN_SUCCESS);
2052 }
2053
2054 #define TASK_HOLD_NORMAL        0
2055 #define TASK_HOLD_PIDSUSPEND    1
2056 #define TASK_HOLD_LEGACY        2
2057 #define TASK_HOLD_LEGACY_ALL    3
2058
2059 static kern_return_t
2060 place_task_hold    (
2061         register task_t task,
2062         int mode)
2063 {
2064         if (!task->active) {
2065                 return (KERN_FAILURE);
2066         }
2067
2068         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2069             MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
2070             task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2071             task->user_stop_count, task->user_stop_count + 1, 0);
2072
2073 #if MACH_ASSERT
2074         current_task()->suspends_outstanding++;
2075 #endif
2076
2077         if (mode == TASK_HOLD_LEGACY)
2078                 task->legacy_stop_count++;
2079
2080         if (task->user_stop_count++ > 0) {
2081                 /*
2082                  *      If the stop count was positive, the task is
2083                  *      already stopped and we can exit.
2084                  */
2085                 return (KERN_SUCCESS);
2086         }
2087
2088         /*
2089          * Put a kernel-level hold on the threads in the task (all
2090          * user-level task suspensions added together represent a
2091          * single kernel-level hold).  We then wait for the threads
2092          * to stop executing user code.
2093          */
2094         task_hold_locked(task);
2095         task_wait_locked(task, FALSE);
2096
2097         return (KERN_SUCCESS);
2098 }
2099
2100 static kern_return_t
2101 release_task_hold    (
2102         register task_t         task,
2103         int                     mode)
2104 {
2105         register boolean_t release = FALSE;
2106
2107         if (!task->active) {
2108                 return (KERN_FAILURE);
2109         }
2110
2111         if (mode == TASK_HOLD_PIDSUSPEND) {
2112             if (task->pidsuspended == FALSE) {
2113                     return (KERN_FAILURE);
2114             }
2115             task->pidsuspended = FALSE;
2116         }
2117
2118         if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
2119
2120                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2121                     MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
2122                     task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2123                     task->user_stop_count, mode, task->legacy_stop_count);
2124
2125 #if MACH_ASSERT
2126                 /*
2127                  * This is obviously not robust; if we suspend one task and then resume a different one,
2128                  * we'll fly under the radar. This is only meant to catch the common case of a crashed
2129                  * or buggy suspender.
2130                  */
2131                 current_task()->suspends_outstanding--;
2132 #endif
2133
2134                 if (mode == TASK_HOLD_LEGACY_ALL) {
2135                         if (task->legacy_stop_count >= task->user_stop_count) {
2136                                 task->user_stop_count = 0;
2137                                 release = TRUE;
2138                         } else {
2139                                 task->user_stop_count -= task->legacy_stop_count;
2140                         }
2141                         task->legacy_stop_count = 0;
2142                 } else {
2143                         if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
2144                                 task->legacy_stop_count--;
2145                         if (--task->user_stop_count == 0)
2146                                 release = TRUE;
2147                 }
2148         }
2149         else {
2150                 return (KERN_FAILURE);
2151         }
2152
2153         /*
2154          *      Release the task if necessary.
2155          */
2156         if (release)
2157                 task_release_locked(task);
2158
2159     return (KERN_SUCCESS);
2160 }
2161
2162
2163 /*
2164  *      task_suspend:
2165  *
2166  *      Implement an (old-fashioned) user-level suspension on a task.
2167  *
2168  *      Because the user isn't expecting to have to manage a suspension
2169  *      token, we'll track it for him in the kernel in the form of a naked
2170  *      send right to the task's resume port.  All such send rights
2171  *      account for a single suspension against the task (unlike task_suspend2()
2172  *      where each caller gets a unique suspension count represented by a
2173  *      unique send-once right).
2174  *
2175  * Conditions:
2176  *      The caller holds a reference to the task
2177  */
2178 kern_return_t
2179 task_suspend(
2180         register task_t         task)
2181 {
2182         kern_return_t                   kr;
2183         mach_port_t                     port, send, old_notify;
2184         mach_port_name_t                name;
2185
2186         if (task == TASK_NULL || task == kernel_task)
2187                 return (KERN_INVALID_ARGUMENT);
2188
2189         task_lock(task);
2190
2191         /*
2192          * Claim a send right on the task resume port, and request a no-senders
2193          * notification on that port (if none outstanding).
2194          */
2195         if (task->itk_resume == IP_NULL) {
2196                 task->itk_resume = ipc_port_alloc_kernel();
2197                 if (!IP_VALID(task->itk_resume))
2198                         panic("failed to create resume port");
2199                 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
2200         }
2201
2202         port = task->itk_resume;
2203         ip_lock(port);
2204         assert(ip_active(port));
2205
2206         send = ipc_port_make_send_locked(port);
2207         assert(IP_VALID(send));
2208
2209         if (port->ip_nsrequest == IP_NULL) {
2210                 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2211                 assert(old_notify == IP_NULL);
2212                 /* port unlocked */
2213         } else {
2214                 ip_unlock(port);
2215         }
2216
2217         /*
2218          * place a legacy hold on the task.
2219          */
2220         kr = place_task_hold(task, TASK_HOLD_LEGACY);
2221         if (kr != KERN_SUCCESS) {
2222                 task_unlock(task);
2223                 ipc_port_release_send(send);
2224                 return kr;
2225         }
2226
2227         task_unlock(task);
2228
2229         /*
2230          * Copyout the send right into the calling task's IPC space.  It won't know it is there,
2231          * but we'll look it up when calling a traditional resume.  Any IPC operations that
2232          * deallocate the send right will auto-release the suspension.
2233          */
2234         if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
2235                 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
2236                 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
2237                                 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2238                                 task_pid(task), kr);
2239                 return (kr);
2240         }
2241
2242         return (kr);
2243 }
2244
2245 /*
2246  *      task_resume:
2247  *              Release a user hold on a task.
2248  *
2249  * Conditions:
2250  *              The caller holds a reference to the task
2251  */
2252 kern_return_t
2253 task_resume(
2254         register task_t task)
2255 {
2256         kern_return_t    kr;
2257         mach_port_name_t resume_port_name;
2258         ipc_entry_t              resume_port_entry;
2259         ipc_space_t              space = current_task()->itk_space;
2260
2261         if (task == TASK_NULL || task == kernel_task )
2262                 return (KERN_INVALID_ARGUMENT);
2263
2264         /* release a legacy task hold */
2265         task_lock(task);
2266         kr = release_task_hold(task, TASK_HOLD_LEGACY);
2267         task_unlock(task);
2268
2269         is_write_lock(space);
2270         if (is_active(space) && IP_VALID(task->itk_resume) &&
2271             ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
2272                 /*
2273                  * We found a suspension token in the caller's IPC space. Release a send right to indicate that
2274                  * we are holding one less legacy hold on the task from this caller.  If the release failed,
2275                  * go ahead and drop all the rights, as someone either already released our holds or the task
2276                  * is gone.
2277                  */
2278                 if (kr == KERN_SUCCESS)
2279                         ipc_right_dealloc(space, resume_port_name, resume_port_entry);
2280                 else
2281                         ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
2282                 /* space unlocked */
2283         } else {
2284                 is_write_unlock(space);
2285                 if (kr == KERN_SUCCESS)
2286                         printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
2287                                proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2288                                task_pid(task));
2289         }
2290
2291         return kr;
2292 }
2293
2294 /*
2295  * Suspend the target task.
2296  * Making/holding a token/reference/port is the callers responsibility.
2297  */
2298 kern_return_t
2299 task_suspend_internal(task_t task)
2300 {
2301         kern_return_t    kr;
2302
2303         if (task == TASK_NULL || task == kernel_task)
2304                 return (KERN_INVALID_ARGUMENT);
2305
2306         task_lock(task);
2307         kr = place_task_hold(task, TASK_HOLD_NORMAL);
2308         task_unlock(task);
2309         return (kr);
2310 }
2311
2312 /*
2313  * Suspend the target task, and return a suspension token. The token
2314  * represents a reference on the suspended task.
2315  */
2316 kern_return_t
2317 task_suspend2(
2318         register task_t                 task,
2319         task_suspension_token_t *suspend_token)
2320 {
2321         kern_return_t    kr;
2322
2323         kr = task_suspend_internal(task);
2324         if (kr != KERN_SUCCESS) {
2325                 *suspend_token = TASK_NULL;
2326                 return (kr);
2327         }
2328
2329         /*
2330          * Take a reference on the target task and return that to the caller
2331          * as a "suspension token," which can be converted into an SO right to
2332          * the now-suspended task's resume port.
2333          */
2334         task_reference_internal(task);
2335         *suspend_token = task;
2336
2337         return (KERN_SUCCESS);
2338 }
2339
2340 /*
2341  * Resume the task
2342  * (reference/token/port management is caller's responsibility).
2343  */
2344 kern_return_t
2345 task_resume_internal(
2346         register task_suspension_token_t                task)
2347 {
2348         kern_return_t kr;
2349
2350         if (task == TASK_NULL || task == kernel_task)
2351                 return (KERN_INVALID_ARGUMENT);
2352
2353         task_lock(task);
2354         kr = release_task_hold(task, TASK_HOLD_NORMAL);
2355         task_unlock(task);
2356         return (kr);
2357 }
2358
2359 /*
2360  * Resume the task using a suspension token. Consumes the token's ref.
2361  */
2362 kern_return_t
2363 task_resume2(
2364         register task_suspension_token_t                task)
2365 {
2366         kern_return_t kr;
2367
2368         kr = task_resume_internal(task);
2369         task_suspension_token_deallocate(task);
2370
2371         return (kr);
2372 }
2373
2374 boolean_t
2375 task_suspension_notify(mach_msg_header_t *request_header)
2376 {
2377         ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
2378         task_t task = convert_port_to_task_suspension_token(port);
2379         mach_msg_type_number_t not_count;
2380
2381         if (task == TASK_NULL || task == kernel_task)
2382                 return TRUE;  /* nothing to do */
2383
2384         switch (request_header->msgh_id) {
2385
2386         case MACH_NOTIFY_SEND_ONCE:
2387                 /* release the hold held by this specific send-once right */
2388                 task_lock(task);
2389                 release_task_hold(task, TASK_HOLD_NORMAL);
2390                 task_unlock(task);
2391                 break;
2392
2393         case MACH_NOTIFY_NO_SENDERS:
2394                 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
2395
2396                 task_lock(task);
2397                 ip_lock(port);
2398                 if (port->ip_mscount == not_count) {
2399
2400                         /* release all the [remaining] outstanding legacy holds */
2401                         assert(port->ip_nsrequest == IP_NULL);
2402                         ip_unlock(port);
2403                         release_task_hold(task, TASK_HOLD_LEGACY_ALL);
2404                         task_unlock(task);
2405
2406                 } else if (port->ip_nsrequest == IP_NULL) {
2407                         ipc_port_t old_notify;
2408
2409                         task_unlock(task);
2410                         /* new send rights, re-arm notification at current make-send count */
2411                         ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2412                         assert(old_notify == IP_NULL);
2413                         /* port unlocked */
2414                 } else {
2415                         ip_unlock(port);
2416                         task_unlock(task);
2417                 }
2418                 break;
2419
2420         default:
2421                 break;
2422         }
2423
2424         task_suspension_token_deallocate(task); /* drop token reference */
2425         return TRUE;
2426 }
2427
2428 kern_return_t
2429 task_pidsuspend_locked(task_t task)
2430 {
2431         kern_return_t kr;
2432
2433         if (task->pidsuspended) {
2434                 kr = KERN_FAILURE;
2435                 goto out;
2436         }
2437
2438         task->pidsuspended = TRUE;
2439
2440         kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
2441         if (kr != KERN_SUCCESS) {
2442                 task->pidsuspended = FALSE;
2443         }
2444 out:
2445         return(kr);
2446 }
2447
2448
2449 /*
2450  *      task_pidsuspend:
2451  *
2452  *      Suspends a task by placing a hold on its threads.
2453  *
2454  * Conditions:
2455  *      The caller holds a reference to the task
2456  */
2457 kern_return_t
2458 task_pidsuspend(
2459         register task_t         task)
2460 {
2461         kern_return_t    kr;
2462
2463         if (task == TASK_NULL || task == kernel_task)
2464                 return (KERN_INVALID_ARGUMENT);
2465
2466         task_lock(task);
2467
2468         kr = task_pidsuspend_locked(task);
2469
2470         task_unlock(task);
2471
2472         return (kr);
2473 }
2474
2475 /* If enabled, we bring all the frozen pages back in prior to resumption; otherwise, they're faulted back in on demand */
2476 #define THAW_ON_RESUME 1
2477
2478 /*
2479  *      task_pidresume:
2480  *              Resumes a previously suspended task.
2481  *
2482  * Conditions:
2483  *              The caller holds a reference to the task
2484  */
2485 kern_return_t
2486 task_pidresume(
2487         register task_t task)
2488 {
2489         kern_return_t    kr;
2490
2491         if (task == TASK_NULL || task == kernel_task)
2492                 return (KERN_INVALID_ARGUMENT);
2493
2494         task_lock(task);
2495
2496 #if (CONFIG_FREEZE && THAW_ON_RESUME)
2497
2498         while (task->changing_freeze_state) {
2499
2500                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2501                 task_unlock(task);
2502                 thread_block(THREAD_CONTINUE_NULL);
2503
2504                 task_lock(task);
2505         }
2506         task->changing_freeze_state = TRUE;
2507 #endif
2508
2509         kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
2510
2511         task_unlock(task);
2512
2513 #if (CONFIG_FREEZE && THAW_ON_RESUME)
2514         if ((kr == KERN_SUCCESS) && (task->frozen == TRUE)) {
2515
2516                 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2517
2518                         kr = KERN_SUCCESS;
2519                 } else {
2520
2521                         kr = vm_map_thaw(task->map);
2522                 }
2523         }
2524         task_lock(task);
2525
2526         if (kr == KERN_SUCCESS)
2527                 task->frozen = FALSE;
2528         task->changing_freeze_state = FALSE;
2529         thread_wakeup(&task->changing_freeze_state);
2530
2531         task_unlock(task);
2532 #endif
2533
2534         return (kr);
2535 }
2536
2537 #if CONFIG_FREEZE
2538
2539 /*
2540  *      task_freeze:
2541  *
2542  *      Freeze a task.
2543  *
2544  * Conditions:
2545  *      The caller holds a reference to the task
2546  */
2547 extern void             vm_wake_compactor_swapper();
2548 extern queue_head_t     c_swapout_list_head;
2549
2550 kern_return_t
2551 task_freeze(
2552         register task_t    task,
2553         uint32_t           *purgeable_count,
2554         uint32_t           *wired_count,
2555         uint32_t           *clean_count,
2556         uint32_t           *dirty_count,
2557         uint32_t           dirty_budget,
2558         boolean_t          *shared,
2559         boolean_t          walk_only)
2560 {
2561         kern_return_t kr;
2562
2563         if (task == TASK_NULL || task == kernel_task)
2564                 return (KERN_INVALID_ARGUMENT);
2565
2566         task_lock(task);
2567
2568         while (task->changing_freeze_state) {
2569
2570                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2571                 task_unlock(task);
2572                 thread_block(THREAD_CONTINUE_NULL);
2573
2574                 task_lock(task);
2575         }
2576         if (task->frozen) {
2577                 task_unlock(task);
2578                 return (KERN_FAILURE);
2579         }
2580         task->changing_freeze_state = TRUE;
2581
2582         task_unlock(task);
2583
2584         if (walk_only) {
2585                 kr = vm_map_freeze_walk(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2586         } else {
2587                 kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2588         }
2589
2590         task_lock(task);
2591
2592         if (walk_only == FALSE && kr == KERN_SUCCESS)
2593                 task->frozen = TRUE;
2594         task->changing_freeze_state = FALSE;
2595         thread_wakeup(&task->changing_freeze_state);
2596
2597         task_unlock(task);
2598
2599         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2600                 vm_wake_compactor_swapper();
2601                 /*
2602                  * We do an explicit wakeup of the swapout thread here
2603                  * because the compact_and_swap routines don't have
2604                  * knowledge about these kind of "per-task packed c_segs"
2605                  * and so will not be evaluating whether we need to do
2606                  * a wakeup there.
2607                  */
2608                 thread_wakeup((event_t)&c_swapout_list_head);
2609         }
2610
2611         return (kr);
2612 }
2613
2614 /*
2615  *      task_thaw:
2616  *
2617  *      Thaw a currently frozen task.
2618  *
2619  * Conditions:
2620  *      The caller holds a reference to the task
2621  */
2622 kern_return_t
2623 task_thaw(
2624         register task_t         task)
2625 {
2626         kern_return_t kr;
2627
2628         if (task == TASK_NULL || task == kernel_task)
2629                 return (KERN_INVALID_ARGUMENT);
2630
2631         task_lock(task);
2632
2633         while (task->changing_freeze_state) {
2634
2635                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2636                 task_unlock(task);
2637                 thread_block(THREAD_CONTINUE_NULL);
2638
2639                 task_lock(task);
2640         }
2641         if (!task->frozen) {
2642                 task_unlock(task);
2643                 return (KERN_FAILURE);
2644         }
2645         task->changing_freeze_state = TRUE;
2646
2647         if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) {
2648                 task_unlock(task);
2649
2650                 kr = vm_map_thaw(task->map);
2651
2652                 task_lock(task);
2653
2654                 if (kr == KERN_SUCCESS)
2655                         task->frozen = FALSE;
2656         } else {
2657                 task->frozen = FALSE;
2658                 kr = KERN_SUCCESS;
2659         }
2660
2661         task->changing_freeze_state = FALSE;
2662         thread_wakeup(&task->changing_freeze_state);
2663
2664         task_unlock(task);
2665
2666         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2667                 vm_wake_compactor_swapper();
2668         }
2669
2670         return (kr);
2671 }
2672
2673 #endif /* CONFIG_FREEZE */
2674
2675 kern_return_t
2676 host_security_set_task_token(
2677         host_security_t  host_security,
2678         task_t           task,
2679         security_token_t sec_token,
2680         audit_token_t    audit_token,
2681         host_priv_t      host_priv)
2682 {
2683         ipc_port_t       host_port;
2684         kern_return_t    kr;
2685
2686         if (task == TASK_NULL)
2687                 return(KERN_INVALID_ARGUMENT);
2688
2689         if (host_security == HOST_NULL)
2690                 return(KERN_INVALID_SECURITY);
2691
2692         task_lock(task);
2693         task->sec_token = sec_token;
2694         task->audit_token = audit_token;
2695
2696         task_unlock(task);
2697
2698         if (host_priv != HOST_PRIV_NULL) {
2699                 kr = host_get_host_priv_port(host_priv, &host_port);
2700         } else {
2701                 kr = host_get_host_port(host_priv_self(), &host_port);
2702         }
2703         assert(kr == KERN_SUCCESS);
2704         kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
2705         return(kr);
2706 }
2707
2708 kern_return_t
2709 task_send_trace_memory(
2710         task_t        target_task,
2711         __unused uint32_t pid,
2712         __unused uint64_t uniqueid)
2713 {
2714         kern_return_t kr = KERN_INVALID_ARGUMENT;
2715         if (target_task == TASK_NULL)
2716                 return (KERN_INVALID_ARGUMENT);
2717
2718 #if CONFIG_ATM
2719         kr = atm_send_proc_inspect_notification(target_task,
2720                                   pid,
2721                                   uniqueid);
2722
2723 #endif
2724         return (kr);
2725 }
2726 /*
2727  * This routine was added, pretty much exclusively, for registering the
2728  * RPC glue vector for in-kernel short circuited tasks.  Rather than
2729  * removing it completely, I have only disabled that feature (which was
2730  * the only feature at the time).  It just appears that we are going to
2731  * want to add some user data to tasks in the future (i.e. bsd info,
2732  * task names, etc...), so I left it in the formal task interface.
2733  */
2734 kern_return_t
2735 task_set_info(
2736         task_t          task,
2737         task_flavor_t   flavor,
2738         __unused task_info_t    task_info_in,           /* pointer to IN array */
2739         __unused mach_msg_type_number_t task_info_count)
2740 {
2741         if (task == TASK_NULL)
2742                 return(KERN_INVALID_ARGUMENT);
2743
2744         switch (flavor) {
2745
2746 #if CONFIG_ATM
2747                 case TASK_TRACE_MEMORY_INFO:
2748                 {
2749                         if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
2750                                 return (KERN_INVALID_ARGUMENT);
2751
2752                         assert(task_info_in != NULL);
2753                         task_trace_memory_info_t mem_info;
2754                         mem_info = (task_trace_memory_info_t) task_info_in;
2755                         kern_return_t kr = atm_register_trace_memory(task,
2756                                                 mem_info->user_memory_address,
2757                                                 mem_info->buffer_size);
2758                         return kr;
2759                         break;
2760                 }
2761
2762 #endif
2763             default:
2764                 return (KERN_INVALID_ARGUMENT);
2765         }
2766         return (KERN_SUCCESS);
2767 }
2768
2769 int radar_20146450 = 1;
2770 kern_return_t
2771 task_info(
2772         task_t                  task,
2773         task_flavor_t           flavor,
2774         task_info_t             task_info_out,
2775         mach_msg_type_number_t  *task_info_count)
2776 {
2777         kern_return_t error = KERN_SUCCESS;
2778
2779         if (task == TASK_NULL)
2780                 return (KERN_INVALID_ARGUMENT);
2781
2782         task_lock(task);
2783
2784         if ((task != current_task()) && (!task->active)) {
2785                 task_unlock(task);
2786                 return (KERN_INVALID_ARGUMENT);
2787         }
2788
2789         switch (flavor) {
2790
2791         case TASK_BASIC_INFO_32:
2792         case TASK_BASIC2_INFO_32:
2793         {
2794                 task_basic_info_32_t    basic_info;
2795                 vm_map_t                                map;
2796                 clock_sec_t                             secs;
2797                 clock_usec_t                    usecs;
2798
2799                 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
2800                     error = KERN_INVALID_ARGUMENT;
2801                     break;
2802                 }
2803
2804                 basic_info = (task_basic_info_32_t)task_info_out;
2805
2806                 map = (task == kernel_task)? kernel_map: task->map;
2807                 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
2808                 if (flavor == TASK_BASIC2_INFO_32) {
2809                         /*
2810                          * The "BASIC2" flavor gets the maximum resident
2811                          * size instead of the current resident size...
2812                          */
2813                         basic_info->resident_size = pmap_resident_max(map->pmap);
2814                 } else {
2815                         basic_info->resident_size = pmap_resident_count(map->pmap);
2816                 }
2817                 basic_info->resident_size *= PAGE_SIZE;
2818
2819                 basic_info->policy = ((task != kernel_task)?
2820                                                                                   POLICY_TIMESHARE: POLICY_RR);
2821                 basic_info->suspend_count = task->user_stop_count;
2822
2823                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2824                 basic_info->user_time.seconds =
2825                         (typeof(basic_info->user_time.seconds))secs;
2826                 basic_info->user_time.microseconds = usecs;
2827
2828                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2829                 basic_info->system_time.seconds =
2830                         (typeof(basic_info->system_time.seconds))secs;
2831                 basic_info->system_time.microseconds = usecs;
2832
2833                 *task_info_count = TASK_BASIC_INFO_32_COUNT;
2834                 break;
2835         }
2836
2837         case TASK_BASIC_INFO_64:
2838         {
2839                 task_basic_info_64_t    basic_info;
2840                 vm_map_t                                map;
2841                 clock_sec_t                             secs;
2842                 clock_usec_t                    usecs;
2843
2844                 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
2845                     error = KERN_INVALID_ARGUMENT;
2846                     break;
2847                 }
2848
2849                 basic_info = (task_basic_info_64_t)task_info_out;
2850
2851                 map = (task == kernel_task)? kernel_map: task->map;
2852                 basic_info->virtual_size  = map->size;
2853                 basic_info->resident_size =
2854                         (mach_vm_size_t)(pmap_resident_count(map->pmap))
2855                         * PAGE_SIZE_64;
2856
2857                 basic_info->policy = ((task != kernel_task)?
2858                                                                                   POLICY_TIMESHARE: POLICY_RR);
2859                 basic_info->suspend_count = task->user_stop_count;
2860
2861                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2862                 basic_info->user_time.seconds =
2863                         (typeof(basic_info->user_time.seconds))secs;
2864                 basic_info->user_time.microseconds = usecs;
2865
2866                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2867                 basic_info->system_time.seconds =
2868                         (typeof(basic_info->system_time.seconds))secs;
2869                 basic_info->system_time.microseconds = usecs;
2870
2871                 *task_info_count = TASK_BASIC_INFO_64_COUNT;
2872                 break;
2873         }
2874
2875         case MACH_TASK_BASIC_INFO:
2876         {
2877                 mach_task_basic_info_t  basic_info;
2878                 vm_map_t                map;
2879                 clock_sec_t             secs;
2880                 clock_usec_t            usecs;
2881
2882                 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
2883                     error = KERN_INVALID_ARGUMENT;
2884                     break;
2885                 }
2886
2887                 basic_info = (mach_task_basic_info_t)task_info_out;
2888
2889                 map = (task == kernel_task) ? kernel_map : task->map;
2890
2891                 basic_info->virtual_size  = map->size;
2892
2893                 basic_info->resident_size =
2894                     (mach_vm_size_t)(pmap_resident_count(map->pmap));
2895                 basic_info->resident_size *= PAGE_SIZE_64;
2896
2897                 basic_info->resident_size_max =
2898                     (mach_vm_size_t)(pmap_resident_max(map->pmap));
2899                 basic_info->resident_size_max *= PAGE_SIZE_64;
2900
2901                 basic_info->policy = ((task != kernel_task) ?
2902                                       POLICY_TIMESHARE : POLICY_RR);
2903
2904                 basic_info->suspend_count = task->user_stop_count;
2905
2906                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2907                 basic_info->user_time.seconds =
2908                     (typeof(basic_info->user_time.seconds))secs;
2909                 basic_info->user_time.microseconds = usecs;
2910
2911                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2912                 basic_info->system_time.seconds =
2913                     (typeof(basic_info->system_time.seconds))secs;
2914                 basic_info->system_time.microseconds = usecs;
2915
2916                 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
2917                 break;
2918         }
2919
2920         case TASK_THREAD_TIMES_INFO:
2921         {
2922                 register task_thread_times_info_t       times_info;
2923                 register thread_t                                       thread;
2924
2925                 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
2926                     error = KERN_INVALID_ARGUMENT;
2927                     break;
2928                 }
2929
2930                 times_info = (task_thread_times_info_t) task_info_out;
2931                 times_info->user_time.seconds = 0;
2932                 times_info->user_time.microseconds = 0;
2933                 times_info->system_time.seconds = 0;
2934                 times_info->system_time.microseconds = 0;
2935
2936
2937                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2938                         time_value_t    user_time, system_time;
2939
2940                         if (thread->options & TH_OPT_IDLE_THREAD)
2941                                 continue;
2942
2943                         thread_read_times(thread, &user_time, &system_time);
2944
2945                         time_value_add(&times_info->user_time, &user_time);
2946                         time_value_add(&times_info->system_time, &system_time);
2947                 }
2948
2949                 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
2950                 break;
2951         }
2952
2953         case TASK_ABSOLUTETIME_INFO:
2954         {
2955                 task_absolutetime_info_t        info;
2956                 register thread_t                       thread;
2957
2958                 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
2959                         error = KERN_INVALID_ARGUMENT;
2960                         break;
2961                 }
2962
2963                 info = (task_absolutetime_info_t)task_info_out;
2964                 info->threads_user = info->threads_system = 0;
2965
2966
2967                 info->total_user = task->total_user_time;
2968                 info->total_system = task->total_system_time;
2969
2970                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2971                         uint64_t        tval;
2972                         spl_t           x;
2973
2974                         if (thread->options & TH_OPT_IDLE_THREAD)
2975                                 continue;
2976
2977                         x = splsched();
2978                         thread_lock(thread);
2979
2980                         tval = timer_grab(&thread->user_timer);
2981                         info->threads_user += tval;
2982                         info->total_user += tval;
2983
2984                         tval = timer_grab(&thread->system_timer);
2985                         if (thread->precise_user_kernel_time) {
2986                                 info->threads_system += tval;
2987                                 info->total_system += tval;
2988                         } else {
2989                                 /* system_timer may represent either sys or user */
2990                                 info->threads_user += tval;
2991                                 info->total_user += tval;
2992                         }
2993
2994                         thread_unlock(thread);
2995                         splx(x);
2996                 }
2997
2998
2999                 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
3000                 break;
3001         }
3002
3003         case TASK_DYLD_INFO:
3004         {
3005                 task_dyld_info_t info;
3006
3007                 /*
3008                  * We added the format field to TASK_DYLD_INFO output.  For
3009                  * temporary backward compatibility, accept the fact that
3010                  * clients may ask for the old version - distinquished by the
3011                  * size of the expected result structure.
3012                  */
3013 #define TASK_LEGACY_DYLD_INFO_COUNT \
3014                 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
3015
3016                 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
3017                         error = KERN_INVALID_ARGUMENT;
3018                         break;
3019                 }
3020
3021                 info = (task_dyld_info_t)task_info_out;
3022                 info->all_image_info_addr = task->all_image_info_addr;
3023                 info->all_image_info_size = task->all_image_info_size;
3024
3025                 /* only set format on output for those expecting it */
3026                 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
3027                         info->all_image_info_format = task_has_64BitAddr(task) ?
3028                                                  TASK_DYLD_ALL_IMAGE_INFO_64 :
3029                                                  TASK_DYLD_ALL_IMAGE_INFO_32 ;
3030                         *task_info_count = TASK_DYLD_INFO_COUNT;
3031                 } else {
3032                         *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
3033                 }
3034                 break;
3035         }
3036
3037         case TASK_EXTMOD_INFO:
3038         {
3039                 task_extmod_info_t info;
3040                 void *p;
3041
3042                 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
3043                         error = KERN_INVALID_ARGUMENT;
3044                         break;
3045                 }
3046
3047                 info = (task_extmod_info_t)task_info_out;
3048
3049                 p = get_bsdtask_info(task);
3050                 if (p) {
3051                         proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
3052                 } else {
3053                         bzero(info->task_uuid, sizeof(info->task_uuid));
3054                 }
3055                 info->extmod_statistics = task->extmod_statistics;
3056                 *task_info_count = TASK_EXTMOD_INFO_COUNT;
3057
3058                 break;
3059         }
3060
3061         case TASK_KERNELMEMORY_INFO:
3062         {
3063                 task_kernelmemory_info_t        tkm_info;
3064                 ledger_amount_t                 credit, debit;
3065
3066                 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
3067                    error = KERN_INVALID_ARGUMENT;
3068                    break;
3069                 }
3070
3071                 tkm_info = (task_kernelmemory_info_t) task_info_out;
3072                 tkm_info->total_palloc = 0;
3073                 tkm_info->total_pfree = 0;
3074                 tkm_info->total_salloc = 0;
3075                 tkm_info->total_sfree = 0;
3076
3077                 if (task == kernel_task) {
3078                         /*
3079                          * All shared allocs/frees from other tasks count against
3080                          * the kernel private memory usage.  If we are looking up
3081                          * info for the kernel task, gather from everywhere.
3082                          */
3083                         task_unlock(task);
3084
3085                         /* start by accounting for all the terminated tasks against the kernel */
3086                         tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
3087                         tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
3088
3089                         /* count all other task/thread shared alloc/free against the kernel */
3090                         lck_mtx_lock(&tasks_threads_lock);
3091
3092                         /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
3093                         queue_iterate(&tasks, task, task_t, tasks) {
3094                                 if (task == kernel_task) {
3095                                         if (ledger_get_entries(task->ledger,
3096                                             task_ledgers.tkm_private, &credit,
3097                                             &debit) == KERN_SUCCESS) {
3098                                                 tkm_info->total_palloc += credit;
3099                                                 tkm_info->total_pfree += debit;
3100                                         }
3101                                 }
3102                                 if (!ledger_get_entries(task->ledger,
3103                                     task_ledgers.tkm_shared, &credit, &debit)) {
3104                                         tkm_info->total_palloc += credit;
3105                                         tkm_info->total_pfree += debit;
3106                                 }
3107                         }
3108                         lck_mtx_unlock(&tasks_threads_lock);
3109                 } else {
3110                         if (!ledger_get_entries(task->ledger,
3111                             task_ledgers.tkm_private, &credit, &debit)) {
3112                                 tkm_info->total_palloc = credit;
3113                                 tkm_info->total_pfree = debit;
3114                         }
3115                         if (!ledger_get_entries(task->ledger,
3116                             task_ledgers.tkm_shared, &credit, &debit)) {
3117                                 tkm_info->total_salloc = credit;
3118                                 tkm_info->total_sfree = debit;
3119                         }
3120                         task_unlock(task);
3121                 }
3122
3123                 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
3124                 return KERN_SUCCESS;
3125         }
3126
3127         /* OBSOLETE */
3128         case TASK_SCHED_FIFO_INFO:
3129         {
3130
3131                 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
3132                         error = KERN_INVALID_ARGUMENT;
3133                         break;
3134                 }
3135
3136                 error = KERN_INVALID_POLICY;
3137                 break;
3138         }
3139
3140         /* OBSOLETE */
3141         case TASK_SCHED_RR_INFO:
3142         {
3143                 register policy_rr_base_t       rr_base;
3144                 uint32_t quantum_time;
3145                 uint64_t quantum_ns;
3146
3147                 if (*task_info_count < POLICY_RR_BASE_COUNT) {
3148                         error = KERN_INVALID_ARGUMENT;
3149                         break;
3150                 }
3151
3152                 rr_base = (policy_rr_base_t) task_info_out;
3153
3154                 if (task != kernel_task) {
3155                         error = KERN_INVALID_POLICY;
3156                         break;
3157                 }
3158
3159                 rr_base->base_priority = task->priority;
3160
3161                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
3162                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
3163
3164                 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
3165
3166                 *task_info_count = POLICY_RR_BASE_COUNT;
3167                 break;
3168         }
3169
3170         /* OBSOLETE */
3171         case TASK_SCHED_TIMESHARE_INFO:
3172         {
3173                 register policy_timeshare_base_t        ts_base;
3174
3175                 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
3176                         error = KERN_INVALID_ARGUMENT;
3177                         break;
3178                 }
3179
3180                 ts_base = (policy_timeshare_base_t) task_info_out;
3181
3182                 if (task == kernel_task) {
3183                         error = KERN_INVALID_POLICY;
3184                         break;
3185                 }
3186
3187                 ts_base->base_priority = task->priority;
3188
3189                 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
3190                 break;
3191         }
3192
3193         case TASK_SECURITY_TOKEN:
3194         {
3195                 register security_token_t       *sec_token_p;
3196
3197                 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
3198                     error = KERN_INVALID_ARGUMENT;
3199                     break;
3200                 }
3201
3202                 sec_token_p = (security_token_t *) task_info_out;
3203
3204                 *sec_token_p = task->sec_token;
3205
3206                 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
3207                 break;
3208         }
3209
3210         case TASK_AUDIT_TOKEN:
3211         {
3212                 register audit_token_t  *audit_token_p;
3213
3214                 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
3215                     error = KERN_INVALID_ARGUMENT;
3216                     break;
3217                 }
3218
3219                 audit_token_p = (audit_token_t *) task_info_out;
3220
3221                 *audit_token_p = task->audit_token;
3222
3223                 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
3224                 break;
3225         }
3226
3227         case TASK_SCHED_INFO:
3228                 error = KERN_INVALID_ARGUMENT;
3229                 break;
3230
3231         case TASK_EVENTS_INFO:
3232         {
3233                 register task_events_info_t     events_info;
3234                 register thread_t                       thread;
3235
3236                 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
3237                    error = KERN_INVALID_ARGUMENT;
3238                    break;
3239                 }
3240
3241                 events_info = (task_events_info_t) task_info_out;
3242
3243
3244                 events_info->faults = task->faults;
3245                 events_info->pageins = task->pageins;
3246                 events_info->cow_faults = task->cow_faults;
3247                 events_info->messages_sent = task->messages_sent;
3248                 events_info->messages_received = task->messages_received;
3249                 events_info->syscalls_mach = task->syscalls_mach;
3250                 events_info->syscalls_unix = task->syscalls_unix;
3251
3252                 events_info->csw = task->c_switch;
3253
3254                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3255                         events_info->csw           += thread->c_switch;
3256                         events_info->syscalls_mach += thread->syscalls_mach;
3257                         events_info->syscalls_unix += thread->syscalls_unix;
3258                 }
3259
3260
3261                 *task_info_count = TASK_EVENTS_INFO_COUNT;
3262                 break;
3263         }
3264         case TASK_AFFINITY_TAG_INFO:
3265         {
3266                 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
3267                     error = KERN_INVALID_ARGUMENT;
3268                     break;
3269                 }
3270
3271                 error = task_affinity_info(task, task_info_out, task_info_count);
3272                 break;
3273         }
3274         case TASK_POWER_INFO:
3275         {
3276                 if (*task_info_count < TASK_POWER_INFO_COUNT) {
3277                         error = KERN_INVALID_ARGUMENT;
3278                         break;
3279                 }
3280
3281                 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL);
3282                 break;
3283         }
3284
3285         case TASK_POWER_INFO_V2:
3286         {
3287                 if (*task_info_count < TASK_POWER_INFO_V2_COUNT) {
3288                         error = KERN_INVALID_ARGUMENT;
3289                         break;
3290                 }
3291                 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
3292                 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy);
3293                 break;
3294         }
3295
3296         case TASK_VM_INFO:
3297         case TASK_VM_INFO_PURGEABLE:
3298         {
3299                 task_vm_info_t          vm_info;
3300                 vm_map_t                map;
3301
3302                 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
3303                     error = KERN_INVALID_ARGUMENT;
3304                     break;
3305                 }
3306
3307                 vm_info = (task_vm_info_t)task_info_out;
3308
3309                 if (task == kernel_task) {
3310                         map = kernel_map;
3311                         /* no lock */
3312                 } else {
3313                         map = task->map;
3314                         vm_map_lock_read(map);
3315                 }
3316
3317                 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
3318                 vm_info->region_count = map->hdr.nentries;
3319                 vm_info->page_size = vm_map_page_size(map);
3320
3321                 vm_info->resident_size = pmap_resident_count(map->pmap);
3322                 vm_info->resident_size *= PAGE_SIZE;
3323                 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
3324                 vm_info->resident_size_peak *= PAGE_SIZE;
3325
3326 #define _VM_INFO(_name) \
3327         vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
3328
3329                 _VM_INFO(device);
3330                 _VM_INFO(device_peak);
3331                 _VM_INFO(external);
3332                 _VM_INFO(external_peak);
3333                 _VM_INFO(internal);
3334                 _VM_INFO(internal_peak);
3335                 _VM_INFO(reusable);
3336                 _VM_INFO(reusable_peak);
3337                 _VM_INFO(compressed);
3338                 _VM_INFO(compressed_peak);
3339                 _VM_INFO(compressed_lifetime);
3340
3341                 vm_info->purgeable_volatile_pmap = 0;
3342                 vm_info->purgeable_volatile_resident = 0;
3343                 vm_info->purgeable_volatile_virtual = 0;
3344                 if (task == kernel_task) {
3345                         /*
3346                          * We do not maintain the detailed stats for the
3347                          * kernel_pmap, so just count everything as
3348                          * "internal"...
3349                          */
3350                         vm_info->internal = vm_info->resident_size;
3351                         /*
3352                          * ... but since the memory held by the VM compressor
3353                          * in the kernel address space ought to be attributed
3354                          * to user-space tasks, we subtract it from "internal"
3355                          * to give memory reporting tools a more accurate idea
3356                          * of what the kernel itself is actually using, instead
3357                          * of making it look like the kernel is leaking memory
3358                          * when the system is under memory pressure.
3359                          */
3360                         vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
3361                                               PAGE_SIZE);
3362                 } else {
3363                         mach_vm_size_t  volatile_virtual_size;
3364                         mach_vm_size_t  volatile_resident_size;
3365                         mach_vm_size_t  volatile_compressed_size;
3366                         mach_vm_size_t  volatile_pmap_size;
3367                         mach_vm_size_t  volatile_compressed_pmap_size;
3368                         kern_return_t   kr;
3369
3370                         if (flavor == TASK_VM_INFO_PURGEABLE) {
3371                                 kr = vm_map_query_volatile(
3372                                         map,
3373                                         &volatile_virtual_size,
3374                                         &volatile_resident_size,
3375                                         &volatile_compressed_size,
3376                                         &volatile_pmap_size,
3377                                         &volatile_compressed_pmap_size);
3378                                 if (kr == KERN_SUCCESS) {
3379                                         vm_info->purgeable_volatile_pmap =
3380                                                 volatile_pmap_size;
3381                                         if (radar_20146450) {
3382                                         vm_info->compressed -=
3383                                                 volatile_compressed_pmap_size;
3384                                         }
3385                                         vm_info->purgeable_volatile_resident =
3386                                                 volatile_resident_size;
3387                                         vm_info->purgeable_volatile_virtual =
3388                                                 volatile_virtual_size;
3389                                 }
3390                         }
3391                         vm_map_unlock_read(map);
3392                 }
3393
3394                 if (*task_info_count >= TASK_VM_INFO_COUNT) {
3395                         vm_info->phys_footprint = 0;
3396                         *task_info_count = TASK_VM_INFO_COUNT;
3397                 } else {
3398                         *task_info_count = TASK_VM_INFO_REV0_COUNT;
3399                 }
3400
3401                 break;
3402         }
3403
3404         case TASK_WAIT_STATE_INFO:
3405         {
3406                 /*
3407                  * Deprecated flavor. Currently allowing some results until all users
3408                  * stop calling it. The results may not be accurate.
3409          */
3410                 task_wait_state_info_t  wait_state_info;
3411                 uint64_t total_sfi_ledger_val = 0;
3412
3413                 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
3414                    error = KERN_INVALID_ARGUMENT;
3415                    break;
3416                 }
3417
3418                 wait_state_info = (task_wait_state_info_t) task_info_out;
3419
3420                 wait_state_info->total_wait_state_time = 0;
3421                 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
3422
3423 #if CONFIG_SCHED_SFI
3424                 int i, prev_lentry = -1;
3425                 int64_t  val_credit, val_debit;
3426
3427                 for (i = 0; i < MAX_SFI_CLASS_ID; i++){
3428                         val_credit =0;
3429                         /*
3430                          * checking with prev_lentry != entry ensures adjacent classes
3431                          * which share the same ledger do not add wait times twice.
3432                          * Note: Use ledger() call to get data for each individual sfi class.
3433                          */
3434                         if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
3435                                 KERN_SUCCESS == ledger_get_entries(task->ledger,
3436                                                 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
3437                                 total_sfi_ledger_val += val_credit;
3438                         }
3439                         prev_lentry = task_ledgers.sfi_wait_times[i];
3440                 }
3441
3442 #endif /* CONFIG_SCHED_SFI */
3443                 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
3444                 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
3445
3446                 break;
3447         }
3448         case TASK_VM_INFO_PURGEABLE_ACCOUNT:
3449         {
3450 #if DEVELOPMENT || DEBUG
3451                 pvm_account_info_t      acnt_info;
3452
3453                 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
3454                         error = KERN_INVALID_ARGUMENT;
3455                         break;
3456                 }
3457
3458                 if (task_info_out == NULL) {
3459                         error = KERN_INVALID_ARGUMENT;
3460                         break;
3461                 }
3462
3463                 acnt_info = (pvm_account_info_t) task_info_out;
3464
3465                 error = vm_purgeable_account(task, acnt_info);
3466
3467                 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
3468
3469                 break;
3470 #else /* DEVELOPMENT || DEBUG */
3471                 error = KERN_NOT_SUPPORTED;
3472                 break;
3473 #endif /* DEVELOPMENT || DEBUG */
3474         }
3475         case TASK_FLAGS_INFO:
3476         {
3477                 task_flags_info_t               flags_info;
3478
3479                 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
3480                     error = KERN_INVALID_ARGUMENT;
3481                     break;
3482                 }
3483
3484                 flags_info = (task_flags_info_t)task_info_out;
3485
3486                 /* only publish the 64-bit flag of the task */
3487                 flags_info->flags = task->t_flags & TF_64B_ADDR;
3488
3489                 *task_info_count = TASK_FLAGS_INFO_COUNT;
3490                 break;
3491         }
3492
3493         case TASK_DEBUG_INFO_INTERNAL:
3494         {
3495 #if DEVELOPMENT || DEBUG
3496                 task_debug_info_internal_t dbg_info;
3497                 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
3498                         error = KERN_NOT_SUPPORTED;
3499                         break;
3500                 }
3501
3502                 if (task_info_out == NULL) {
3503                         error = KERN_INVALID_ARGUMENT;
3504                         break;
3505                 }
3506                 dbg_info = (task_debug_info_internal_t) task_info_out;
3507                 dbg_info->ipc_space_size = 0;
3508                 if (task->itk_space){
3509                         dbg_info->ipc_space_size = task->itk_space->is_table_size;
3510                 }
3511
3512                 error = KERN_SUCCESS;
3513                 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
3514                 break;
3515 #else /* DEVELOPMENT || DEBUG */
3516                 error = KERN_NOT_SUPPORTED;
3517                 break;
3518 #endif /* DEVELOPMENT || DEBUG */
3519         }
3520         default:
3521                 error = KERN_INVALID_ARGUMENT;
3522         }
3523
3524         task_unlock(task);
3525         return (error);
3526 }
3527
3528 /*
3529  *      task_power_info
3530  *
3531  *      Returns power stats for the task.
3532  *      Note: Called with task locked.
3533  */
3534 void
3535 task_power_info_locked(
3536         task_t                  task,
3537         task_power_info_t       info,
3538         gpu_energy_data_t       ginfo)
3539 {
3540         thread_t                thread;
3541         ledger_amount_t         tmp;
3542
3543         task_lock_assert_owned(task);
3544
3545         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
3546                 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
3547         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
3548                 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
3549
3550         info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
3551         info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
3552
3553         info->total_user = task->total_user_time;
3554         info->total_system = task->total_system_time;
3555
3556         if (ginfo) {
3557                 ginfo->task_gpu_utilisation = task->task_gpu_ns;
3558         }
3559
3560         queue_iterate(&task->threads, thread, thread_t, task_threads) {
3561                 uint64_t        tval;
3562                 spl_t           x;
3563
3564                 if (thread->options & TH_OPT_IDLE_THREAD)
3565                         continue;
3566
3567                 x = splsched();
3568                 thread_lock(thread);
3569
3570                 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
3571                 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
3572
3573                 tval = timer_grab(&thread->user_timer);
3574                 info->total_user += tval;
3575
3576                 tval = timer_grab(&thread->system_timer);
3577                 if (thread->precise_user_kernel_time) {
3578                         info->total_system += tval;
3579                 } else {
3580                         /* system_timer may represent either sys or user */
3581                         info->total_user += tval;
3582                 }
3583
3584                 if (ginfo) {
3585                         ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
3586                 }
3587                 thread_unlock(thread);
3588                 splx(x);
3589         }
3590 }
3591
3592 /*
3593  *      task_gpu_utilisation
3594  *
3595  *      Returns the total gpu time used by the all the threads of the task
3596  *  (both dead and alive)
3597  */
3598 uint64_t
3599 task_gpu_utilisation(
3600         task_t  task)
3601 {
3602         uint64_t gpu_time = 0;
3603         thread_t thread;
3604
3605         task_lock(task);
3606         gpu_time += task->task_gpu_ns;
3607
3608         queue_iterate(&task->threads, thread, thread_t, task_threads) {
3609                 spl_t x;
3610                 x = splsched();
3611                 thread_lock(thread);
3612                 gpu_time += ml_gpu_stat(thread);
3613                 thread_unlock(thread);
3614                 splx(x);
3615         }
3616
3617         task_unlock(task);
3618         return gpu_time;
3619 }
3620
3621 kern_return_t
3622 task_purgable_info(
3623         task_t                  task,
3624         task_purgable_info_t    *stats)
3625 {
3626         if (task == TASK_NULL || stats == NULL)
3627                 return KERN_INVALID_ARGUMENT;
3628         /* Take task reference */
3629         task_reference(task);
3630         vm_purgeable_stats((vm_purgeable_info_t)stats, task);
3631         /* Drop task reference */
3632         task_deallocate(task);
3633         return KERN_SUCCESS;
3634 }
3635
3636 void
3637 task_vtimer_set(
3638         task_t          task,
3639         integer_t       which)
3640 {
3641         thread_t        thread;
3642         spl_t           x;
3643
3644         /* assert(task == current_task()); */ /* bogus assert 4803227 4807483 */
3645
3646         task_lock(task);
3647
3648         task->vtimers |= which;
3649
3650         switch (which) {
3651
3652         case TASK_VTIMER_USER:
3653                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3654                         x = splsched();
3655                         thread_lock(thread);
3656                         if (thread->precise_user_kernel_time)
3657                                 thread->vtimer_user_save = timer_grab(&thread->user_timer);
3658                         else
3659                                 thread->vtimer_user_save = timer_grab(&thread->system_timer);
3660                         thread_unlock(thread);
3661                         splx(x);
3662                 }
3663                 break;
3664
3665         case TASK_VTIMER_PROF:
3666                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3667                         x = splsched();
3668                         thread_lock(thread);
3669                         thread->vtimer_prof_save = timer_grab(&thread->user_timer);
3670                         thread->vtimer_prof_save += timer_grab(&thread->system_timer);
3671                         thread_unlock(thread);
3672                         splx(x);
3673                 }
3674                 break;
3675
3676         case TASK_VTIMER_RLIM:
3677                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3678                         x = splsched();
3679                         thread_lock(thread);
3680                         thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
3681                         thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
3682                         thread_unlock(thread);
3683                         splx(x);
3684                 }
3685                 break;
3686         }
3687
3688         task_unlock(task);
3689 }
3690
3691 void
3692 task_vtimer_clear(
3693         task_t          task,
3694         integer_t       which)
3695 {
3696         assert(task == current_task());
3697
3698         task_lock(task);
3699
3700         task->vtimers &= ~which;
3701
3702         task_unlock(task);
3703 }
3704
3705 void
3706 task_vtimer_update(
3707 __unused
3708         task_t          task,
3709         integer_t       which,
3710         uint32_t        *microsecs)
3711 {
3712         thread_t        thread = current_thread();
3713         uint32_t        tdelt;
3714         clock_sec_t     secs;
3715         uint64_t        tsum;
3716
3717         assert(task == current_task());
3718
3719         assert(task->vtimers & which);
3720
3721         secs = tdelt = 0;
3722
3723         switch (which) {
3724
3725         case TASK_VTIMER_USER:
3726                 if (thread->precise_user_kernel_time) {
3727                         tdelt = (uint32_t)timer_delta(&thread->user_timer,
3728                                                                 &thread->vtimer_user_save);
3729                 } else {
3730                         tdelt = (uint32_t)timer_delta(&thread->system_timer,
3731                                                                 &thread->vtimer_user_save);
3732                 }
3733                 absolutetime_to_microtime(tdelt, &secs, microsecs);
3734                 break;
3735
3736         case TASK_VTIMER_PROF:
3737                 tsum = timer_grab(&thread->user_timer);
3738                 tsum += timer_grab(&thread->system_timer);
3739                 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
3740                 absolutetime_to_microtime(tdelt, &secs, microsecs);
3741                 /* if the time delta is smaller than a usec, ignore */
3742                 if (*microsecs != 0)
3743                         thread->vtimer_prof_save = tsum;
3744                 break;
3745
3746         case TASK_VTIMER_RLIM:
3747                 tsum = timer_grab(&thread->user_timer);
3748                 tsum += timer_grab(&thread->system_timer);
3749                 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
3750                 thread->vtimer_rlim_save = tsum;
3751                 absolutetime_to_microtime(tdelt, &secs, microsecs);
3752                 break;
3753         }
3754
3755 }
3756
3757 /*
3758  *      task_assign:
3759  *
3760  *      Change the assigned processor set for the task
3761  */
3762 kern_return_t
3763 task_assign(
3764         __unused task_t         task,
3765         __unused processor_set_t        new_pset,
3766         __unused boolean_t      assign_threads)
3767 {
3768         return(KERN_FAILURE);
3769 }
3770
3771 /*
3772  *      task_assign_default:
3773  *
3774  *      Version of task_assign to assign to default processor set.
3775  */
3776 kern_return_t
3777 task_assign_default(
3778         task_t          task,
3779         boolean_t       assign_threads)
3780 {
3781     return (task_assign(task, &pset0, assign_threads));
3782 }
3783
3784 /*
3785  *      task_get_assignment
3786  *
3787  *      Return name of processor set that task is assigned to.
3788  */
3789 kern_return_t
3790 task_get_assignment(
3791         task_t          task,
3792         processor_set_t *pset)
3793 {
3794         if (!task->active)
3795                 return(KERN_FAILURE);
3796
3797         *pset = &pset0;
3798
3799         return (KERN_SUCCESS);
3800 }
3801
3802 uint64_t
3803 get_task_dispatchqueue_offset(
3804                 task_t          task)
3805 {
3806         return task->dispatchqueue_offset;
3807 }
3808
3809 /*
3810  *      task_policy
3811  *
3812  *      Set scheduling policy and parameters, both base and limit, for
3813  *      the given task. Policy must be a policy which is enabled for the
3814  *      processor set. Change contained threads if requested.
3815  */
3816 kern_return_t
3817 task_policy(
3818         __unused task_t                 task,
3819         __unused policy_t                       policy_id,
3820         __unused policy_base_t          base,
3821         __unused mach_msg_type_number_t count,
3822         __unused boolean_t                      set_limit,
3823         __unused boolean_t                      change)
3824 {
3825         return(KERN_FAILURE);
3826 }
3827
3828 /*
3829  *      task_set_policy
3830  *
3831  *      Set scheduling policy and parameters, both base and limit, for
3832  *      the given task. Policy can be any policy implemented by the
3833  *      processor set, whether enabled or not. Change contained threads
3834  *      if requested.
3835  */
3836 kern_return_t
3837 task_set_policy(
3838         __unused task_t                 task,
3839         __unused processor_set_t                pset,
3840         __unused policy_t                       policy_id,
3841         __unused policy_base_t          base,
3842         __unused mach_msg_type_number_t base_count,
3843         __unused policy_limit_t         limit,
3844         __unused mach_msg_type_number_t limit_count,
3845         __unused boolean_t                      change)
3846 {
3847         return(KERN_FAILURE);
3848 }
3849
3850 kern_return_t
3851 task_set_ras_pc(
3852         __unused task_t task,
3853         __unused vm_offset_t    pc,
3854         __unused vm_offset_t    endpc)
3855 {
3856         return KERN_FAILURE;
3857 }
3858
3859 void
3860 task_synchronizer_destroy_all(task_t task)
3861 {
3862         /*
3863          *  Destroy owned semaphores
3864          */
3865         semaphore_destroy_all(task);
3866 }
3867
3868 /*
3869  * Install default (machine-dependent) initial thread state
3870  * on the task.  Subsequent thread creation will have this initial
3871  * state set on the thread by machine_thread_inherit_taskwide().
3872  * Flavors and structures are exactly the same as those to thread_set_state()
3873  */
3874 kern_return_t
3875 task_set_state(
3876         task_t task,
3877         int flavor,
3878         thread_state_t state,
3879         mach_msg_type_number_t state_count)
3880 {
3881         kern_return_t ret;
3882
3883         if (task == TASK_NULL) {
3884                 return (KERN_INVALID_ARGUMENT);
3885         }
3886
3887         task_lock(task);
3888
3889         if (!task->active) {
3890                 task_unlock(task);
3891                 return (KERN_FAILURE);
3892         }
3893
3894         ret = machine_task_set_state(task, flavor, state, state_count);
3895
3896         task_unlock(task);
3897         return ret;
3898 }
3899
3900 /*
3901  * Examine the default (machine-dependent) initial thread state
3902  * on the task, as set by task_set_state().  Flavors and structures
3903  * are exactly the same as those passed to thread_get_state().
3904  */
3905 kern_return_t
3906 task_get_state(
3907         task_t  task,
3908         int     flavor,
3909         thread_state_t state,
3910         mach_msg_type_number_t *state_count)
3911 {
3912         kern_return_t ret;
3913
3914         if (task == TASK_NULL) {
3915                 return (KERN_INVALID_ARGUMENT);
3916         }
3917
3918         task_lock(task);
3919
3920         if (!task->active) {
3921                 task_unlock(task);
3922                 return (KERN_FAILURE);
3923         }
3924
3925         ret = machine_task_get_state(task, flavor, state, state_count);
3926
3927         task_unlock(task);
3928         return ret;
3929 }
3930
3931 #if CONFIG_JETSAM
3932 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
3933
3934 void __attribute__((noinline))
3935 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb)
3936 {
3937         task_t                                          task            = current_task();
3938         int                                                     pid         = 0;
3939         const char                                      *procname       = "unknown";
3940         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
3941
3942 #ifdef MACH_BSD
3943         pid = proc_selfpid();
3944
3945         if (pid == 1) {
3946                 /*
3947                  * Cannot have ReportCrash analyzing
3948                  * a suspended initproc.
3949                  */
3950                 return;
3951         }
3952
3953         if (task->bsd_info != NULL)
3954                 procname = proc_name_address(current_task()->bsd_info);
3955 #endif
3956
3957         if (hwm_user_cores) {
3958                 int                             error;
3959                 uint64_t                starttime, end;
3960                 clock_sec_t             secs = 0;
3961                 uint32_t                microsecs = 0;
3962
3963                 starttime = mach_absolute_time();
3964                 /*
3965                  * Trigger a coredump of this process. Don't proceed unless we know we won't
3966                  * be filling up the disk; and ignore the core size resource limit for this
3967                  * core file.
3968                  */
3969                 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
3970                         printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
3971                 }
3972                 /*
3973                 * coredump() leaves the task suspended.
3974                 */
3975                 task_resume_internal(current_task());
3976
3977                 end = mach_absolute_time();
3978                 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
3979                 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
3980                        proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
3981         }
3982
3983         if (disable_exc_resource) {
3984                 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
3985                         "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
3986                 return;
3987         }
3988
3989         /*
3990          * A task that has triggered an EXC_RESOURCE, should not be
3991          * jetsammed when the device is under memory pressure.  Here
3992          * we set the P_MEMSTAT_TERMINATED flag so that the process
3993          * will be skipped if the memorystatus_thread wakes up.
3994          */
3995         proc_memstat_terminated(current_task()->bsd_info, TRUE);
3996
3997         printf("process %s[%d] crossed memory high watermark (%d MB); sending "
3998                 "EXC_RESOURCE.\n", procname, pid, max_footprint_mb);
3999
4000         code[0] = code[1] = 0;
4001         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
4002         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
4003         EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
4004
4005         /*
4006          * Use the _internal_ variant so that no user-space
4007          * process can resume our task from under us.
4008          */
4009         task_suspend_internal(task);
4010         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
4011         task_resume_internal(task);
4012
4013         /*
4014          * After the EXC_RESOURCE has been handled, we must clear the
4015          * P_MEMSTAT_TERMINATED flag so that the process can again be
4016          * considered for jetsam if the memorystatus_thread wakes up.
4017          */
4018         proc_memstat_terminated(current_task()->bsd_info, FALSE);  /* clear the flag */
4019 }
4020
4021 /*
4022  * Callback invoked when a task exceeds its physical footprint limit.
4023  */
4024 void
4025 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4026 {
4027         ledger_amount_t max_footprint, max_footprint_mb;
4028         ledger_amount_t footprint_after_purge;
4029         task_t task;
4030
4031         if (warning == LEDGER_WARNING_DIPPED_BELOW) {
4032                 /*
4033                  * Task memory limits only provide a warning on the way up.
4034                  */
4035                 return;
4036         }
4037
4038         task = current_task();
4039
4040         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
4041         max_footprint_mb = max_footprint >> 20;
4042
4043         /*
4044          * Try and purge all "volatile" memory in that task first.
4045          */
4046         (void) task_purge_volatile_memory(task);
4047         /* are we still over the limit ? */
4048         ledger_get_balance(task->ledger,
4049                            task_ledgers.phys_footprint,
4050                            &footprint_after_purge);
4051         if ((!warning &&
4052              footprint_after_purge <= max_footprint) ||
4053             (warning &&
4054              footprint_after_purge <= ((max_footprint *
4055                                         PHYS_FOOTPRINT_WARNING_LEVEL) / 100))) {
4056                 /* all better now */
4057                 ledger_reset_callback_state(task->ledger,
4058                                             task_ledgers.phys_footprint);
4059                 return;
4060         }
4061         /* still over the limit after purging... */
4062
4063         /*
4064          * If this an actual violation (not a warning),
4065          * generate a non-fatal high watermark EXC_RESOURCE.
4066          */
4067         if ((warning == 0) && (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) {
4068                 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb);
4069         }
4070
4071         memorystatus_on_ledger_footprint_exceeded((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE,
4072                 (int)max_footprint_mb);
4073 }
4074
4075 extern int proc_check_footprint_priv(void);
4076
4077 kern_return_t
4078 task_set_phys_footprint_limit(
4079         task_t task,
4080         int new_limit_mb,
4081         int *old_limit_mb)
4082 {
4083         kern_return_t error;
4084
4085         if ((error = proc_check_footprint_priv())) {
4086                 return (KERN_NO_ACCESS);
4087         }
4088
4089         return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, FALSE);
4090 }
4091
4092 kern_return_t
4093 task_convert_phys_footprint_limit(
4094         int limit_mb,
4095         int *converted_limit_mb)
4096 {
4097         if (limit_mb == -1) {
4098                 /*
4099                  * No limit
4100                  */
4101                 if (max_task_footprint != 0) {
4102                         *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024);   /* bytes to MB */
4103                 } else {
4104                         *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
4105                 }
4106         } else {
4107                 /* nothing to convert */
4108                 *converted_limit_mb = limit_mb;
4109         }
4110         return (KERN_SUCCESS);
4111 }
4112
4113
4114 kern_return_t
4115 task_set_phys_footprint_limit_internal(
4116         task_t task,
4117         int new_limit_mb,
4118         int *old_limit_mb,
4119         boolean_t trigger_exception)
4120 {
4121         ledger_amount_t old;
4122
4123         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
4124
4125         if (old_limit_mb) {
4126                 /*
4127                  * Check that limit >> 20 will not give an "unexpected" 32-bit
4128                  * result. There are, however, implicit assumptions that -1 mb limit
4129                  * equates to LEDGER_LIMIT_INFINITY.
4130                  */
4131                 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
4132                 *old_limit_mb = (int)(old >> 20);
4133         }
4134
4135         if (new_limit_mb == -1) {
4136                 /*
4137                  * Caller wishes to remove the limit.
4138                  */
4139                 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4140                                  max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
4141                                  max_task_footprint ? PHYS_FOOTPRINT_WARNING_LEVEL : 0);
4142                 return (KERN_SUCCESS);
4143         }
4144
4145 #ifdef CONFIG_NOMONITORS
4146         return (KERN_SUCCESS);
4147 #endif /* CONFIG_NOMONITORS */
4148
4149         task_lock(task);
4150
4151         if (trigger_exception) {
4152                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4153         } else {
4154                 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4155         }
4156
4157         ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4158                 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
4159
4160         if (task == current_task()) {
4161                 ledger_check_new_balance(task->ledger, task_ledgers.phys_footprint);
4162         }
4163
4164         task_unlock(task);
4165
4166         return (KERN_SUCCESS);
4167 }
4168
4169 kern_return_t
4170 task_get_phys_footprint_limit(
4171         task_t task,
4172         int *limit_mb)
4173 {
4174         ledger_amount_t limit;
4175
4176         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
4177         /*
4178          * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
4179          * result. There are, however, implicit assumptions that -1 mb limit
4180          * equates to LEDGER_LIMIT_INFINITY.
4181          */
4182         assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
4183         *limit_mb = (int)(limit >> 20);
4184
4185         return (KERN_SUCCESS);
4186 }
4187 #else /* CONFIG_JETSAM */
4188 kern_return_t
4189 task_set_phys_footprint_limit(
4190         __unused task_t task,
4191         __unused int new_limit_mb,
4192         __unused int *old_limit_mb)
4193 {
4194         return (KERN_FAILURE);
4195 }
4196
4197 kern_return_t
4198 task_get_phys_footprint_limit(
4199         __unused task_t task,
4200         __unused int *limit_mb)
4201 {
4202         return (KERN_FAILURE);
4203 }
4204 #endif /* CONFIG_JETSAM */
4205
4206 /*
4207  * We need to export some functions to other components that
4208  * are currently implemented in macros within the osfmk
4209  * component.  Just export them as functions of the same name.
4210  */
4211 boolean_t is_kerneltask(task_t t)
4212 {
4213         if (t == kernel_task)
4214                 return (TRUE);
4215
4216         return (FALSE);
4217 }
4218
4219 int
4220 check_for_tasksuspend(task_t task)
4221 {
4222
4223         if (task == TASK_NULL)
4224                 return (0);
4225
4226         return (task->suspend_count > 0);
4227 }
4228
4229 #undef current_task
4230 task_t current_task(void);
4231 task_t current_task(void)
4232 {
4233         return (current_task_fast());
4234 }
4235
4236 #undef task_reference
4237 void task_reference(task_t task);
4238 void
4239 task_reference(
4240         task_t          task)
4241 {
4242         if (task != TASK_NULL)
4243                 task_reference_internal(task);
4244 }
4245
4246 /* defined in bsd/kern/kern_prot.c */
4247 extern int get_audit_token_pid(audit_token_t *audit_token);
4248
4249 int task_pid(task_t task)
4250 {
4251         if (task)
4252                 return get_audit_token_pid(&task->audit_token);
4253         return -1;
4254 }
4255
4256
4257 /*
4258  * This routine is called always with task lock held.
4259  * And it returns a thread handle without reference as the caller
4260  * operates on it under the task lock held.
4261  */
4262 thread_t
4263 task_findtid(task_t task, uint64_t tid)
4264 {
4265         thread_t thread= THREAD_NULL;
4266
4267         queue_iterate(&task->threads, thread, thread_t, task_threads) {
4268                         if (thread->thread_id == tid)
4269                                 return(thread);
4270         }
4271         return(THREAD_NULL);
4272 }
4273
4274 /*
4275  * Control the CPU usage monitor for a task.
4276  */
4277 kern_return_t
4278 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
4279 {
4280         int error = KERN_SUCCESS;
4281
4282         if (*flags & CPUMON_MAKE_FATAL) {
4283                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
4284         } else {
4285                 error = KERN_INVALID_ARGUMENT;
4286         }
4287
4288         return error;
4289 }
4290
4291 /*
4292  * Control the wakeups monitor for a task.
4293  */
4294 kern_return_t
4295 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
4296 {
4297         ledger_t ledger = task->ledger;
4298
4299         task_lock(task);
4300         if (*flags & WAKEMON_GET_PARAMS) {
4301                 ledger_amount_t limit;
4302                 uint64_t                period;
4303
4304                 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
4305                 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
4306
4307                 if (limit != LEDGER_LIMIT_INFINITY) {
4308                         /*
4309                          * An active limit means the wakeups monitor is enabled.
4310                          */
4311                         *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
4312                         *flags = WAKEMON_ENABLE;
4313                         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
4314                                 *flags |= WAKEMON_MAKE_FATAL;
4315                         }
4316                 } else {
4317                         *flags = WAKEMON_DISABLE;
4318                         *rate_hz = -1;
4319                 }
4320
4321                 /*
4322                  * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
4323                  */
4324                 task_unlock(task);
4325                 return KERN_SUCCESS;
4326         }
4327
4328         if (*flags & WAKEMON_ENABLE) {
4329                 if (*flags & WAKEMON_SET_DEFAULTS) {
4330                         *rate_hz = task_wakeups_monitor_rate;
4331                 }
4332
4333 #ifndef CONFIG_NOMONITORS
4334                 if (*flags & WAKEMON_MAKE_FATAL) {
4335                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
4336                 }
4337 #endif /* CONFIG_NOMONITORS */
4338
4339                 if (*rate_hz < 0) {
4340                         task_unlock(task);
4341                         return KERN_INVALID_ARGUMENT;
4342                 }
4343
4344 #ifndef CONFIG_NOMONITORS
4345                 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
4346                         task_wakeups_monitor_ustackshots_trigger_pct);
4347                 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
4348                 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
4349 #endif /* CONFIG_NOMONITORS */
4350         } else if (*flags & WAKEMON_DISABLE) {
4351                 /*
4352                  * Caller wishes to disable wakeups monitor on the task.
4353                  *
4354                  * Disable telemetry if it was triggered by the wakeups monitor, and
4355                  * remove the limit & callback on the wakeups ledger entry.
4356                  */
4357 #if CONFIG_TELEMETRY
4358                 telemetry_task_ctl_locked(current_task(), TF_WAKEMON_WARNING, 0);
4359 #endif
4360                 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
4361                 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
4362         }
4363
4364         task_unlock(task);
4365         return KERN_SUCCESS;
4366 }
4367
4368 void
4369 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4370 {
4371         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
4372 #if CONFIG_TELEMETRY
4373                 /*
4374                  * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
4375                  * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
4376                  */
4377                 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
4378 #endif
4379                 return;
4380         }
4381
4382 #if CONFIG_TELEMETRY
4383         /*
4384          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
4385          * exceeded the limit, turn telemetry off for the task.
4386          */
4387         telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
4388 #endif
4389
4390         if (warning == 0) {
4391                 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE();
4392         }
4393 }
4394
4395 void __attribute__((noinline))
4396 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void)
4397 {
4398         task_t                                          task            = current_task();
4399         int                                                     pid         = 0;
4400         const char                                      *procname       = "unknown";
4401         uint64_t                                        observed_wakeups_rate;
4402         uint64_t                                        permitted_wakeups_rate;
4403         uint64_t                                        observation_interval;
4404         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
4405         struct ledger_entry_info        lei;
4406
4407 #ifdef MACH_BSD
4408         pid = proc_selfpid();
4409         if (task->bsd_info != NULL)
4410                 procname = proc_name_address(current_task()->bsd_info);
4411 #endif
4412
4413         ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
4414
4415         /*
4416          * Disable the exception notification so we don't overwhelm
4417          * the listener with an endless stream of redundant exceptions.
4418          */
4419         uint32_t flags = WAKEMON_DISABLE;
4420         task_wakeups_monitor_ctl(task, &flags, NULL);
4421
4422         observed_wakeups_rate = (lei.lei_balance * (int64_t)NSEC_PER_SEC) / lei.lei_last_refill;
4423         permitted_wakeups_rate = lei.lei_limit / task_wakeups_monitor_interval;
4424         observation_interval = lei.lei_refill_period / NSEC_PER_SEC;
4425
4426         if (disable_exc_resource) {
4427                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
4428                         "supressed by a boot-arg\n", procname, pid);
4429                 return;
4430         }
4431         if (audio_active) {
4432                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
4433                        "supressed due to audio playback\n", procname, pid);
4434                 return;
4435         }
4436         printf("process %s[%d] caught causing excessive wakeups. Observed wakeups rate "
4437                 "(per sec): %lld; Maximum permitted wakeups rate (per sec): %lld; Observation "
4438                 "period: %lld seconds; Task lifetime number of wakeups: %lld\n",
4439                 procname, pid, observed_wakeups_rate, permitted_wakeups_rate,
4440                 observation_interval, lei.lei_credit);
4441
4442         code[0] = code[1] = 0;
4443         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
4444         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
4445         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0], task_wakeups_monitor_rate);
4446         EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0], observation_interval);
4447         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1], lei.lei_balance * (int64_t)NSEC_PER_SEC / lei.lei_last_refill);
4448         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
4449
4450         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
4451                 task_terminate_internal(task);
4452         }
4453 }
4454
4455 kern_return_t
4456 task_purge_volatile_memory(
4457         task_t  task)
4458 {
4459         vm_map_t        map;
4460         int             num_object_purged;
4461
4462         if (task == TASK_NULL)
4463                 return KERN_INVALID_TASK;
4464
4465         task_lock(task);
4466
4467         if (!task->active) {
4468                 task_unlock(task);
4469                 return KERN_INVALID_TASK;
4470         }
4471         map = task->map;
4472         if (map == VM_MAP_NULL) {
4473                 task_unlock(task);
4474                 return KERN_INVALID_TASK;
4475         }
4476         vm_map_reference(task->map);
4477
4478         task_unlock(task);
4479
4480         num_object_purged = vm_map_purge(map);
4481         vm_map_deallocate(map);
4482
4483         return KERN_SUCCESS;
4484 }
4485
4486 /* Placeholders for the task set/get voucher interfaces */
4487 kern_return_t
4488 task_get_mach_voucher(
4489         task_t                  task,
4490         mach_voucher_selector_t __unused which,
4491         ipc_voucher_t           *voucher)
4492 {
4493         if (TASK_NULL == task)
4494                 return KERN_INVALID_TASK;
4495
4496         *voucher = NULL;
4497         return KERN_SUCCESS;
4498 }
4499
4500 kern_return_t
4501 task_set_mach_voucher(
4502         task_t                  task,
4503         ipc_voucher_t           __unused voucher)
4504 {
4505         if (TASK_NULL == task)
4506                 return KERN_INVALID_TASK;
4507
4508         return KERN_SUCCESS;
4509 }
4510
4511 kern_return_t
4512 task_swap_mach_voucher(
4513         task_t                  task,
4514         ipc_voucher_t           new_voucher,
4515         ipc_voucher_t           *in_out_old_voucher)
4516 {
4517         if (TASK_NULL == task)
4518                 return KERN_INVALID_TASK;
4519
4520         *in_out_old_voucher = new_voucher;
4521         return KERN_SUCCESS;
4522 }
4523
4524 void task_set_gpu_denied(task_t task, boolean_t denied)
4525 {
4526         task_lock(task);
4527
4528         if (denied) {
4529                 task->t_flags |= TF_GPU_DENIED;
4530         } else {
4531                 task->t_flags &= ~TF_GPU_DENIED;
4532         }
4533
4534         task_unlock(task);
4535 }
4536
4537 boolean_t task_is_gpu_denied(task_t task)
4538 {
4539         /* We don't need the lock to read this flag */
4540         return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
4541 }
4542
4543 void task_update_logical_writes(task_t task, uint32_t io_size, int flags)
4544 {
4545         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE, task_pid(task), io_size, flags, 0, 0);
4546         switch(flags) {
4547                 case TASK_WRITE_IMMEDIATE:
4548                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_immediate_writes));
4549                         break;
4550                 case TASK_WRITE_DEFERRED:
4551                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_deferred_writes));
4552                         break;
4553                 case TASK_WRITE_INVALIDATED:
4554                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_invalidated_writes));
4555                         break;
4556                 case TASK_WRITE_METADATA:
4557                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_metadata_writes));
4558                         break;
4559         }
4560         return;
4561 }