osfmk/kern/task.c

   1 /*
   2  * Copyright (c) 2000-2010, 2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  *      File:   kern/task.c
  58  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
  59  *              David Black
  60  *
  61  *      Task management primitives implementation.
  62  */
  63 /*
  64  * Copyright (c) 1993 The University of Utah and
  65  * the Computer Systems Laboratory (CSL).  All rights reserved.
  66  *
  67  * Permission to use, copy, modify and distribute this software and its
  68  * documentation is hereby granted, provided that both the copyright
  69  * notice and this permission notice appear in all copies of the
  70  * software, derivative works or modified versions, and any portions
  71  * thereof, and that both notices appear in supporting documentation.
  72  *
  73  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  74  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  75  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  76  *
  77  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  78  * improvements that they make and grant CSL redistribution rights.
  79  *
  80  */
  81 /*
  82  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
  83  * support for mandatory and extensible security protections.  This notice
  84  * is included in support of clause 2.2 (b) of the Apple Public License,
  85  * Version 2.0.
  86  * Copyright (c) 2005 SPARTA, Inc.
  87  */
  88
  89 #include <mach/mach_types.h>
  90 #include <mach/boolean.h>
  91 #include <mach/host_priv.h>
  92 #include <mach/machine/vm_types.h>
  93 #include <mach/vm_param.h>
  94 #include <mach/mach_vm.h>
  95 #include <mach/semaphore.h>
  96 #include <mach/task_info.h>
  97 #include <mach/task_special_ports.h>
  98
  99 #include <ipc/ipc_importance.h>
 100 #include <ipc/ipc_types.h>
 101 #include <ipc/ipc_space.h>
 102 #include <ipc/ipc_entry.h>
 103 #include <ipc/ipc_hash.h>
 104
 105 #include <kern/kern_types.h>
 106 #include <kern/mach_param.h>
 107 #include <kern/misc_protos.h>
 108 #include <kern/task.h>
 109 #include <kern/thread.h>
 110 #include <kern/coalition.h>
 111 #include <kern/zalloc.h>
 112 #include <kern/kalloc.h>
 113 #include <kern/kern_cdata.h>
 114 #include <kern/processor.h>
 115 #include <kern/sched_prim.h>    /* for thread_wakeup */
 116 #include <kern/ipc_tt.h>
 117 #include <kern/host.h>
 118 #include <kern/clock.h>
 119 #include <kern/timer.h>
 120 #include <kern/assert.h>
 121 #include <kern/sync_lock.h>
 122 #include <kern/affinity.h>
 123 #include <kern/exc_resource.h>
 124 #include <kern/machine.h>
 125 #include <corpses/task_corpse.h>
 126 #if CONFIG_TELEMETRY
 127 #include <kern/telemetry.h>
 128 #endif
 129
 130 #include <vm/pmap.h>
 131 #include <vm/vm_map.h>
 132 #include <vm/vm_kern.h>         /* for kernel_map, ipc_kernel_map */
 133 #include <vm/vm_pageout.h>
 134 #include <vm/vm_protos.h>
 135 #include <vm/vm_purgeable_internal.h>
 136
 137 #include <sys/resource.h>
 138 #include <sys/signalvar.h> /* for coredump */
 139
 140 /*
 141  * Exported interfaces
 142  */
 143
 144 #include <mach/task_server.h>
 145 #include <mach/mach_host_server.h>
 146 #include <mach/host_security_server.h>
 147 #include <mach/mach_port_server.h>
 148
 149 #include <vm/vm_shared_region.h>
 150
 151 #include <libkern/OSDebug.h>
 152 #include <libkern/OSAtomic.h>
 153
 154 #if CONFIG_ATM
 155 #include <atm/atm_internal.h>
 156 #endif
 157
 158 #include <kern/sfi.h>
 159
 160 #if KPERF
 161 extern int kpc_force_all_ctrs(task_t, int);
 162 #endif
 163
 164 uint32_t qos_override_mode;
 165
 166 task_t                  kernel_task;
 167 zone_t                  task_zone;
 168 lck_attr_t      task_lck_attr;
 169 lck_grp_t       task_lck_grp;
 170 lck_grp_attr_t  task_lck_grp_attr;
 171
 172 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
 173 int audio_active = 0;
 174
 175 zinfo_usage_store_t tasks_tkm_private;
 176 zinfo_usage_store_t tasks_tkm_shared;
 177
 178 /* A container to accumulate statistics for expired tasks */
 179 expired_task_statistics_t               dead_task_statistics;
 180 lck_spin_t              dead_task_statistics_lock;
 181
 182 ledger_template_t task_ledger_template = NULL;
 183
 184 struct _task_ledger_indices task_ledgers __attribute__((used)) =
 185         {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
 186          { 0 /* initialized at runtime */},
 187 #ifdef CONFIG_BANK
 188          -1, -1,
 189 #endif
 190         };
 191
 192 void init_task_ledgers(void);
 193 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 194 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 195 void __attribute__((noinline)) THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void);
 196 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb);
 197
 198 kern_return_t task_suspend_internal(task_t);
 199 kern_return_t task_resume_internal(task_t);
 200 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
 201
 202
 203 void proc_init_cpumon_params(void);
 204 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
 205
 206 // Warn tasks when they hit 80% of their memory limit.
 207 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
 208
 209 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT              150 /* wakeups per second */
 210 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL   300 /* in seconds. */
 211
 212 /*
 213  * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
 214  *
 215  * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
 216  *  stacktraces, aka micro-stackshots)
 217  */
 218 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER        70
 219
 220 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
 221 int task_wakeups_monitor_rate;     /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
 222
 223 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 224
 225 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
 226
 227 ledger_amount_t max_task_footprint = 0;  /* Per-task limit on physical memory consumption in bytes     */
 228 int max_task_footprint_mb = 0;  /* Per-task limit on physical memory consumption in megabytes */
 229
 230 #if MACH_ASSERT
 231 int pmap_ledgers_panic = 1;
 232 #endif /* MACH_ASSERT */
 233
 234 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
 235
 236 int hwm_user_cores = 0; /* high watermark violations generate user core files */
 237
 238 #ifdef MACH_BSD
 239 extern void     proc_getexecutableuuid(void *, unsigned char *, unsigned long);
 240 extern int      proc_pid(struct proc *p);
 241 extern int      proc_selfpid(void);
 242 extern char     *proc_name_address(struct proc *p);
 243 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 244 #if CONFIG_JETSAM
 245 extern void     proc_memstat_terminated(struct proc* p, boolean_t set);
 246 extern void     memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb);
 247 #endif
 248 #endif
 249 #if MACH_ASSERT
 250 extern int pmap_ledgers_panic;
 251 #endif /* MACH_ASSERT */
 252
 253 /* Forwards */
 254
 255 void            task_hold_locked(
 256                         task_t          task);
 257 void            task_wait_locked(
 258                         task_t          task,
 259                         boolean_t       until_not_runnable);
 260 void            task_release_locked(
 261                         task_t          task);
 262 void            task_free(
 263                         task_t          task );
 264 void            task_synchronizer_destroy_all(
 265                         task_t          task);
 266
 267 int check_for_tasksuspend(
 268                         task_t task);
 269
 270 void
 271 task_backing_store_privileged(
 272                         task_t task)
 273 {
 274         task_lock(task);
 275         task->priv_flags |= VM_BACKING_STORE_PRIV;
 276         task_unlock(task);
 277         return;
 278 }
 279
 280
 281 void
 282 task_set_64bit(
 283                 task_t task,
 284                 boolean_t is64bit)
 285 {
 286 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
 287         thread_t thread;
 288 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
 289
 290         task_lock(task);
 291
 292         if (is64bit) {
 293                 if (task_has_64BitAddr(task))
 294                         goto out;
 295                 task_set_64BitAddr(task);
 296         } else {
 297                 if ( !task_has_64BitAddr(task))
 298                         goto out;
 299                 task_clear_64BitAddr(task);
 300         }
 301         /* FIXME: On x86, the thread save state flavor can diverge from the
 302          * task's 64-bit feature flag due to the 32-bit/64-bit register save
 303          * state dichotomy. Since we can be pre-empted in this interval,
 304          * certain routines may observe the thread as being in an inconsistent
 305          * state with respect to its task's 64-bitness.
 306          */
 307
 308 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
 309         queue_iterate(&task->threads, thread, thread_t, task_threads) {
 310                 thread_mtx_lock(thread);
 311                 machine_thread_switch_addrmode(thread);
 312                 thread_mtx_unlock(thread);
 313         }
 314 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
 315
 316 out:
 317         task_unlock(task);
 318 }
 319
 320
 321 void
 322 task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size)
 323 {
 324         task_lock(task);
 325         task->all_image_info_addr = addr;
 326         task->all_image_info_size = size;
 327         task_unlock(task);
 328 }
 329
 330 void
 331 task_atm_reset(__unused task_t task) {
 332
 333 #if CONFIG_ATM
 334         if (task->atm_context != NULL) {
 335                  atm_task_descriptor_destroy(task->atm_context);
 336                  task->atm_context = NULL;
 337         }
 338 #endif
 339
 340 }
 341
 342 #if TASK_REFERENCE_LEAK_DEBUG
 343 #include <kern/btlog.h>
 344
 345 decl_simple_lock_data(static,task_ref_lock);
 346 static btlog_t *task_ref_btlog;
 347 #define TASK_REF_OP_INCR        0x1
 348 #define TASK_REF_OP_DECR        0x2
 349
 350 #define TASK_REF_BTDEPTH        7
 351
 352 static void
 353 task_ref_lock_lock(void *context)
 354 {
 355         simple_lock((simple_lock_t)context);
 356 }
 357 static void
 358 task_ref_lock_unlock(void *context)
 359 {
 360         simple_unlock((simple_lock_t)context);
 361 }
 362
 363 void
 364 task_reference_internal(task_t task)
 365 {
 366         void *       bt[TASK_REF_BTDEPTH];
 367         int             numsaved = 0;
 368
 369         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 370
 371         (void)hw_atomic_add(&(task)->ref_count, 1);
 372         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
 373                                         bt, numsaved);
 374 }
 375
 376 uint32_t
 377 task_deallocate_internal(task_t task)
 378 {
 379         void *       bt[TASK_REF_BTDEPTH];
 380         int             numsaved = 0;
 381
 382         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 383
 384         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
 385                                         bt, numsaved);
 386         return hw_atomic_sub(&(task)->ref_count, 1);
 387 }
 388
 389 #endif /* TASK_REFERENCE_LEAK_DEBUG */
 390
 391 void
 392 task_init(void)
 393 {
 394
 395         lck_grp_attr_setdefault(&task_lck_grp_attr);
 396         lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
 397         lck_attr_setdefault(&task_lck_attr);
 398         lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
 399
 400         task_zone = zinit(
 401                         sizeof(struct task),
 402                         task_max * sizeof(struct task),
 403                         TASK_CHUNK * sizeof(struct task),
 404                         "tasks");
 405
 406         zone_change(task_zone, Z_NOENCRYPT, TRUE);
 407
 408         /*
 409          * Configure per-task memory limit.
 410          * The boot-arg is interpreted as Megabytes,
 411          * and takes precedence over the device tree.
 412          * Setting the boot-arg to 0 disables task limits.
 413          */
 414         if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
 415                         sizeof (max_task_footprint_mb))) {
 416                 /*
 417                  * No limit was found in boot-args, so go look in the device tree.
 418                  */
 419                 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
 420                                 sizeof(max_task_footprint_mb))) {
 421                         /*
 422                          * No limit was found in device tree.
 423                          */
 424                         max_task_footprint_mb = 0;
 425                 }
 426         }
 427
 428         if (max_task_footprint_mb != 0) {
 429 #if CONFIG_JETSAM
 430                 if (max_task_footprint_mb < 50) {
 431                                 printf("Warning: max_task_pmem %d below minimum.\n",
 432                                 max_task_footprint_mb);
 433                                 max_task_footprint_mb = 50;
 434                 }
 435                 printf("Limiting task physical memory footprint to %d MB\n",
 436                         max_task_footprint_mb);
 437
 438                 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
 439 #else
 440                 printf("Warning: max_task_footprint specified, but jetsam not configured; ignoring.\n");
 441 #endif
 442         }
 443
 444 #if MACH_ASSERT
 445         PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
 446                           sizeof (pmap_ledgers_panic));
 447 #endif /* MACH_ASSERT */
 448
 449         if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
 450                         sizeof (hwm_user_cores))) {
 451                 hwm_user_cores = 0;
 452         }
 453
 454         if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
 455                 printf("QOS override mode: 0x%08x\n", qos_override_mode);
 456         } else {
 457                 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
 458         }
 459
 460         proc_init_cpumon_params();
 461
 462         if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
 463                 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
 464         }
 465
 466         if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
 467                 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
 468         }
 469
 470         if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
 471                 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
 472                 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
 473         }
 474
 475         if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
 476                 sizeof (disable_exc_resource))) {
 477                 disable_exc_resource = 0;
 478         }
 479
 480 /*
 481  * If we have coalitions, coalition_init() will call init_task_ledgers() as it
 482  * sets up the ledgers for the default coalition. If we don't have coalitions,
 483  * then we have to call it now.
 484  */
 485 #if CONFIG_COALITIONS
 486         assert(task_ledger_template);
 487 #else /* CONFIG_COALITIONS */
 488         init_task_ledgers();
 489 #endif /* CONFIG_COALITIONS */
 490
 491 #if TASK_REFERENCE_LEAK_DEBUG
 492         simple_lock_init(&task_ref_lock, 0);
 493         task_ref_btlog = btlog_create(100000,
 494                                                                   TASK_REF_BTDEPTH,
 495                                                                   task_ref_lock_lock,
 496                                                                   task_ref_lock_unlock,
 497                                                                   &task_ref_lock);
 498         assert(task_ref_btlog);
 499 #endif
 500
 501         /*
 502          * Create the kernel task as the first task.
 503          */
 504 #ifdef __LP64__
 505         if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS)
 506 #else
 507         if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS)
 508 #endif
 509                 panic("task_init\n");
 510
 511         vm_map_deallocate(kernel_task->map);
 512         kernel_task->map = kernel_map;
 513         lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
 514
 515 }
 516
 517 /*
 518  * Create a task running in the kernel address space.  It may
 519  * have its own map of size mem_size and may have ipc privileges.
 520  */
 521 kern_return_t
 522 kernel_task_create(
 523         __unused task_t         parent_task,
 524         __unused vm_offset_t            map_base,
 525         __unused vm_size_t              map_size,
 526         __unused task_t         *child_task)
 527 {
 528         return (KERN_INVALID_ARGUMENT);
 529 }
 530
 531 kern_return_t
 532 task_create(
 533         task_t                          parent_task,
 534         __unused ledger_port_array_t    ledger_ports,
 535         __unused mach_msg_type_number_t num_ledger_ports,
 536         __unused boolean_t              inherit_memory,
 537         __unused task_t                 *child_task)    /* OUT */
 538 {
 539         if (parent_task == TASK_NULL)
 540                 return(KERN_INVALID_ARGUMENT);
 541
 542         /*
 543          * No longer supported: too many calls assume that a task has a valid
 544          * process attached.
 545          */
 546         return(KERN_FAILURE);
 547 }
 548
 549 kern_return_t
 550 host_security_create_task_token(
 551         host_security_t                 host_security,
 552         task_t                          parent_task,
 553         __unused security_token_t       sec_token,
 554         __unused audit_token_t          audit_token,
 555         __unused host_priv_t            host_priv,
 556         __unused ledger_port_array_t    ledger_ports,
 557         __unused mach_msg_type_number_t num_ledger_ports,
 558         __unused boolean_t              inherit_memory,
 559         __unused task_t                 *child_task)    /* OUT */
 560 {
 561         if (parent_task == TASK_NULL)
 562                 return(KERN_INVALID_ARGUMENT);
 563
 564         if (host_security == HOST_NULL)
 565                 return(KERN_INVALID_SECURITY);
 566
 567         /*
 568          * No longer supported.
 569          */
 570         return(KERN_FAILURE);
 571 }
 572
 573 /*
 574  * Task ledgers
 575  * ------------
 576  *
 577  * phys_footprint
 578  *   Physical footprint: This is the sum of:
 579  *     + (internal - alternate_accounting)
 580  *     + (internal_compressed - alternate_accounting_compressed)
 581  *     + iokit_mapped
 582  *     + purgeable_nonvolatile
 583  *     + purgeable_nonvolatile_compressed
 584  *
 585  * internal
 586  *   The task's anonymous memory, which on iOS is always resident.
 587  *
 588  * internal_compressed
 589  *   Amount of this task's internal memory which is held by the compressor.
 590  *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
 591  *   and could be either decompressed back into memory, or paged out to storage, depending
 592  *   on our implementation.
 593  *
 594  * iokit_mapped
 595  *   IOKit mappings: The total size of all IOKit mappings in this task, regardless of
 596      clean/dirty or internal/external state].
 597  *
 598  * alternate_accounting
 599  *   The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
 600  *   are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
 601  *   double counting.
 602  */
 603 void
 604 init_task_ledgers(void)
 605 {
 606         ledger_template_t t;
 607
 608         assert(task_ledger_template == NULL);
 609         assert(kernel_task == TASK_NULL);
 610
 611         if ((t = ledger_template_create("Per-task ledger")) == NULL)
 612                 panic("couldn't create task ledger template");
 613
 614         task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
 615         task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
 616             "physmem", "bytes");
 617         task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
 618             "bytes");
 619         task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
 620             "bytes");
 621         task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
 622             "bytes");
 623         task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
 624             "bytes");
 625         task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
 626             "bytes");
 627         task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
 628             "bytes");
 629         task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
 630             "bytes");
 631         task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
 632             "bytes");
 633         task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
 634             "bytes");
 635         task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
 636         task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
 637         task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
 638         task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
 639         task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
 640             "count");
 641         task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
 642             "count");
 643
 644 #if CONFIG_SCHED_SFI
 645         sfi_class_id_t class_id, ledger_alias;
 646         for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
 647                 task_ledgers.sfi_wait_times[class_id] = -1;
 648         }
 649
 650         /* don't account for UNSPECIFIED */
 651         for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
 652                 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
 653                 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
 654                         /* Check to see if alias has been registered yet */
 655                         if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
 656                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
 657                         } else {
 658                                 /* Otherwise, initialize it first */
 659                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
 660                         }
 661                 } else {
 662                         task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
 663                 }
 664
 665                 if (task_ledgers.sfi_wait_times[class_id] < 0) {
 666                         panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
 667                 }
 668         }
 669
 670         assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
 671 #endif /* CONFIG_SCHED_SFI */
 672
 673 #ifdef CONFIG_BANK
 674         task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
 675         task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
 676 #endif
 677         if ((task_ledgers.cpu_time < 0) ||
 678             (task_ledgers.tkm_private < 0) ||
 679             (task_ledgers.tkm_shared < 0) ||
 680             (task_ledgers.phys_mem < 0) ||
 681             (task_ledgers.wired_mem < 0) ||
 682             (task_ledgers.internal < 0) ||
 683             (task_ledgers.iokit_mapped < 0) ||
 684             (task_ledgers.alternate_accounting < 0) ||
 685             (task_ledgers.alternate_accounting_compressed < 0) ||
 686             (task_ledgers.phys_footprint < 0) ||
 687             (task_ledgers.internal_compressed < 0) ||
 688             (task_ledgers.purgeable_volatile < 0) ||
 689             (task_ledgers.purgeable_nonvolatile < 0) ||
 690             (task_ledgers.purgeable_volatile_compressed < 0) ||
 691             (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
 692             (task_ledgers.platform_idle_wakeups < 0) ||
 693             (task_ledgers.interrupt_wakeups < 0)
 694 #ifdef CONFIG_BANK
 695             || (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0)
 696 #endif
 697             ) {
 698                 panic("couldn't create entries for task ledger template");
 699         }
 700
 701         ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
 702 #if MACH_ASSERT
 703         if (pmap_ledgers_panic) {
 704                 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
 705                 ledger_panic_on_negative(t, task_ledgers.internal);
 706                 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
 707                 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
 708                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
 709                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
 710                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
 711                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
 712                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
 713                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
 714         }
 715 #endif /* MACH_ASSERT */
 716
 717 #if CONFIG_JETSAM
 718         ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
 719 #endif
 720
 721         ledger_set_callback(t, task_ledgers.interrupt_wakeups,
 722                 task_wakeups_rate_exceeded, NULL, NULL);
 723
 724         task_ledger_template = t;
 725 }
 726
 727 kern_return_t
 728 task_create_internal(
 729         task_t          parent_task,
 730         coalition_t     *parent_coalitions __unused,
 731         boolean_t       inherit_memory,
 732         boolean_t       is_64bit,
 733         task_t          *child_task)            /* OUT */
 734 {
 735         task_t                  new_task;
 736         vm_shared_region_t      shared_region;
 737         ledger_t                ledger = NULL;
 738
 739         new_task = (task_t) zalloc(task_zone);
 740
 741         if (new_task == TASK_NULL)
 742                 return(KERN_RESOURCE_SHORTAGE);
 743
 744         /* one ref for just being alive; one for our caller */
 745         new_task->ref_count = 2;
 746
 747         /* allocate with active entries */
 748         assert(task_ledger_template != NULL);
 749         if ((ledger = ledger_instantiate(task_ledger_template,
 750                         LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
 751                 zfree(task_zone, new_task);
 752                 return(KERN_RESOURCE_SHORTAGE);
 753         }
 754
 755         new_task->ledger = ledger;
 756
 757 #if defined(CONFIG_SCHED_MULTIQ)
 758         new_task->sched_group = sched_group_create();
 759 #endif
 760
 761         /* if inherit_memory is true, parent_task MUST not be NULL */
 762         if (inherit_memory)
 763                 new_task->map = vm_map_fork(ledger, parent_task->map);
 764         else
 765                 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
 766                                 (vm_map_offset_t)(VM_MIN_ADDRESS),
 767                                 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
 768
 769         /* Inherit memlock limit from parent */
 770         if (parent_task)
 771                 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
 772
 773         lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
 774         queue_init(&new_task->threads);
 775         new_task->suspend_count = 0;
 776         new_task->thread_count = 0;
 777         new_task->active_thread_count = 0;
 778         new_task->user_stop_count = 0;
 779         new_task->legacy_stop_count = 0;
 780         new_task->active = TRUE;
 781         new_task->halting = FALSE;
 782         new_task->user_data = NULL;
 783         new_task->faults = 0;
 784         new_task->cow_faults = 0;
 785         new_task->pageins = 0;
 786         new_task->messages_sent = 0;
 787         new_task->messages_received = 0;
 788         new_task->syscalls_mach = 0;
 789         new_task->priv_flags = 0;
 790         new_task->syscalls_unix=0;
 791         new_task->c_switch = new_task->p_switch = new_task->ps_switch = 0;
 792         new_task->t_flags = 0;
 793         new_task->importance = 0;
 794
 795 #if CONFIG_ATM
 796         new_task->atm_context = NULL;
 797 #endif
 798 #if CONFIG_BANK
 799         new_task->bank_context = NULL;
 800 #endif
 801
 802         zinfo_task_init(new_task);
 803
 804 #ifdef MACH_BSD
 805         new_task->bsd_info = NULL;
 806         new_task->corpse_info = NULL;
 807 #endif /* MACH_BSD */
 808
 809 #if CONFIG_JETSAM
 810         if (max_task_footprint != 0) {
 811                 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
 812         }
 813 #endif
 814
 815         if (task_wakeups_monitor_rate != 0) {
 816                 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
 817                 int32_t  rate; // Ignored because of WAKEMON_SET_DEFAULTS
 818                 task_wakeups_monitor_ctl(new_task, &flags, &rate);
 819         }
 820
 821 #if defined(__i386__) || defined(__x86_64__)
 822         new_task->i386_ldt = 0;
 823 #endif
 824
 825         new_task->task_debug = NULL;
 826
 827         queue_init(&new_task->semaphore_list);
 828         new_task->semaphores_owned = 0;
 829
 830         ipc_task_init(new_task, parent_task);
 831
 832         new_task->total_user_time = 0;
 833         new_task->total_system_time = 0;
 834
 835         new_task->vtimers = 0;
 836
 837         new_task->shared_region = NULL;
 838
 839         new_task->affinity_space = NULL;
 840
 841         new_task->pidsuspended = FALSE;
 842         new_task->frozen = FALSE;
 843         new_task->changing_freeze_state = FALSE;
 844         new_task->rusage_cpu_flags = 0;
 845         new_task->rusage_cpu_percentage = 0;
 846         new_task->rusage_cpu_interval = 0;
 847         new_task->rusage_cpu_deadline = 0;
 848         new_task->rusage_cpu_callt = NULL;
 849 #if MACH_ASSERT
 850         new_task->suspends_outstanding = 0;
 851 #endif
 852
 853 #if HYPERVISOR
 854         new_task->hv_task_target = NULL;
 855 #endif /* HYPERVISOR */
 856
 857
 858         new_task->low_mem_notified_warn = 0;
 859         new_task->low_mem_notified_critical = 0;
 860         new_task->low_mem_privileged_listener = 0;
 861         new_task->purged_memory_warn = 0;
 862         new_task->purged_memory_critical = 0;
 863         new_task->mem_notify_reserved = 0;
 864 #if IMPORTANCE_INHERITANCE
 865         new_task->task_imp_base = NULL;
 866 #endif /* IMPORTANCE_INHERITANCE */
 867
 868 #if     defined(__x86_64__)
 869         new_task->uexc_range_start = new_task->uexc_range_size = new_task->uexc_handler = 0;
 870 #endif
 871
 872         new_task->requested_policy = default_task_requested_policy;
 873         new_task->effective_policy = default_task_effective_policy;
 874         new_task->pended_policy    = default_task_pended_policy;
 875
 876         if (parent_task != TASK_NULL) {
 877                 new_task->sec_token = parent_task->sec_token;
 878                 new_task->audit_token = parent_task->audit_token;
 879
 880                 /* inherit the parent's shared region */
 881                 shared_region = vm_shared_region_get(parent_task);
 882                 vm_shared_region_set(new_task, shared_region);
 883
 884                 if(task_has_64BitAddr(parent_task))
 885                         task_set_64BitAddr(new_task);
 886                 new_task->all_image_info_addr = parent_task->all_image_info_addr;
 887                 new_task->all_image_info_size = parent_task->all_image_info_size;
 888
 889 #if defined(__i386__) || defined(__x86_64__)
 890                 if (inherit_memory && parent_task->i386_ldt)
 891                         new_task->i386_ldt = user_ldt_copy(parent_task->i386_ldt);
 892 #endif
 893                 if (inherit_memory && parent_task->affinity_space)
 894                         task_affinity_create(parent_task, new_task);
 895
 896                 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
 897
 898 #if IMPORTANCE_INHERITANCE
 899                 ipc_importance_task_t new_task_imp = IIT_NULL;
 900
 901                 if (task_is_marked_importance_donor(parent_task)) {
 902                         new_task_imp = ipc_importance_for_task(new_task, FALSE);
 903                         assert(IIT_NULL != new_task_imp);
 904                         ipc_importance_task_mark_donor(new_task_imp, TRUE);
 905                 }
 906                 /* Embedded doesn't want this to inherit */
 907                 if (task_is_marked_importance_receiver(parent_task)) {
 908                         if (IIT_NULL == new_task_imp)
 909                                 new_task_imp = ipc_importance_for_task(new_task, FALSE);
 910                         assert(IIT_NULL != new_task_imp);
 911                         ipc_importance_task_mark_receiver(new_task_imp, TRUE);
 912                 }
 913                 if (task_is_marked_importance_denap_receiver(parent_task)) {
 914                         if (IIT_NULL == new_task_imp)
 915                                 new_task_imp = ipc_importance_for_task(new_task, FALSE);
 916                         assert(IIT_NULL != new_task_imp);
 917                         ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
 918                 }
 919
 920                 if (IIT_NULL != new_task_imp) {
 921                         assert(new_task->task_imp_base == new_task_imp);
 922                         ipc_importance_task_release(new_task_imp);
 923                 }
 924 #endif /* IMPORTANCE_INHERITANCE */
 925
 926                 new_task->priority = BASEPRI_DEFAULT;
 927                 new_task->max_priority = MAXPRI_USER;
 928
 929                 new_task->requested_policy.t_apptype     = parent_task->requested_policy.t_apptype;
 930
 931                 new_task->requested_policy.int_darwinbg  = parent_task->requested_policy.int_darwinbg;
 932                 new_task->requested_policy.ext_darwinbg  = parent_task->requested_policy.ext_darwinbg;
 933                 new_task->requested_policy.int_iotier    = parent_task->requested_policy.int_iotier;
 934                 new_task->requested_policy.ext_iotier    = parent_task->requested_policy.ext_iotier;
 935                 new_task->requested_policy.int_iopassive = parent_task->requested_policy.int_iopassive;
 936                 new_task->requested_policy.ext_iopassive = parent_task->requested_policy.ext_iopassive;
 937                 new_task->requested_policy.bg_iotier     = parent_task->requested_policy.bg_iotier;
 938                 new_task->requested_policy.terminated    = parent_task->requested_policy.terminated;
 939                 new_task->requested_policy.t_qos_clamp   = parent_task->requested_policy.t_qos_clamp;
 940
 941                 task_policy_create(new_task, parent_task->requested_policy.t_boosted);
 942         } else {
 943                 new_task->sec_token = KERNEL_SECURITY_TOKEN;
 944                 new_task->audit_token = KERNEL_AUDIT_TOKEN;
 945 #ifdef __LP64__
 946                 if(is_64bit)
 947                         task_set_64BitAddr(new_task);
 948 #endif
 949                 new_task->all_image_info_addr = (mach_vm_address_t)0;
 950                 new_task->all_image_info_size = (mach_vm_size_t)0;
 951
 952                 new_task->pset_hint = PROCESSOR_SET_NULL;
 953
 954                 if (kernel_task == TASK_NULL) {
 955                         new_task->priority = BASEPRI_KERNEL;
 956                         new_task->max_priority = MAXPRI_KERNEL;
 957                 } else {
 958                         new_task->priority = BASEPRI_DEFAULT;
 959                         new_task->max_priority = MAXPRI_USER;
 960                 }
 961         }
 962
 963         bzero(new_task->coalition, sizeof(new_task->coalition));
 964         for (int i = 0; i < COALITION_NUM_TYPES; i++)
 965                 queue_chain_init(new_task->task_coalition[i]);
 966
 967         /* Allocate I/O Statistics */
 968         new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
 969         assert(new_task->task_io_stats != NULL);
 970         bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
 971
 972         bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats));
 973
 974         bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
 975         new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0;
 976         new_task->task_gpu_ns = 0;
 977
 978 #if CONFIG_COALITIONS
 979
 980         /* TODO: there is no graceful failure path here... */
 981         if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
 982                 coalitions_adopt_task(parent_coalitions, new_task);
 983         } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
 984                 /*
 985                  * all tasks at least have a resource coalition, so
 986                  * if the parent has one then inherit all coalitions
 987                  * the parent is a part of
 988                  */
 989                 coalitions_adopt_task(parent_task->coalition, new_task);
 990         } else {
 991                 /* TODO: assert that new_task will be PID 1 (launchd) */
 992                 coalitions_adopt_init_task(new_task);
 993         }
 994
 995         if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
 996                 panic("created task is not a member of a resource coalition");
 997         }
 998 #endif /* CONFIG_COALITIONS */
 999
1000         new_task->dispatchqueue_offset = 0;
1001         if (parent_task != NULL) {
1002                 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1003         }
1004
1005         if (vm_backing_store_low && parent_task != NULL)
1006                 new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
1007
1008         new_task->task_volatile_objects = 0;
1009         new_task->task_nonvolatile_objects = 0;
1010         new_task->task_purgeable_disowning = FALSE;
1011         new_task->task_purgeable_disowned = FALSE;
1012
1013         ipc_task_enable(new_task);
1014
1015         lck_mtx_lock(&tasks_threads_lock);
1016         queue_enter(&tasks, new_task, task_t, tasks);
1017         tasks_count++;
1018         lck_mtx_unlock(&tasks_threads_lock);
1019
1020         *child_task = new_task;
1021         return(KERN_SUCCESS);
1022 }
1023
1024 int task_dropped_imp_count = 0;
1025
1026 /*
1027  *      task_deallocate:
1028  *
1029  *      Drop a reference on a task.
1030  */
1031 void
1032 task_deallocate(
1033         task_t          task)
1034 {
1035         ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1036         uint32_t refs;
1037
1038         if (task == TASK_NULL)
1039             return;
1040
1041         refs = task_deallocate_internal(task);
1042
1043 #if IMPORTANCE_INHERITANCE
1044         if (refs > 1)
1045                 return;
1046
1047         if (refs == 1) {
1048                 /*
1049                  * If last ref potentially comes from the task's importance,
1050                  * disconnect it.  But more task refs may be added before
1051                  * that completes, so wait for the reference to go to zero
1052                  * naturually (it may happen on a recursive task_deallocate()
1053                  * from the ipc_importance_disconnect_task() call).
1054                  */
1055                 if (IIT_NULL != task->task_imp_base)
1056                         ipc_importance_disconnect_task(task);
1057                 return;
1058         }
1059 #else
1060         if (refs > 0)
1061                 return;
1062 #endif /* IMPORTANCE_INHERITANCE */
1063
1064         lck_mtx_lock(&tasks_threads_lock);
1065         queue_remove(&terminated_tasks, task, task_t, tasks);
1066         terminated_tasks_count--;
1067         lck_mtx_unlock(&tasks_threads_lock);
1068
1069         /*
1070          * remove the reference on atm descriptor
1071          */
1072          task_atm_reset(task);
1073
1074 #if CONFIG_BANK
1075         /*
1076          * remove the reference on bank context
1077          */
1078         if (task->bank_context != NULL) {
1079                 bank_task_destroy(task->bank_context);
1080                 task->bank_context = NULL;
1081         }
1082 #endif
1083
1084         if (task->task_io_stats)
1085                 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1086
1087         /*
1088          *      Give the machine dependent code a chance
1089          *      to perform cleanup before ripping apart
1090          *      the task.
1091          */
1092         machine_task_terminate(task);
1093
1094         ipc_task_terminate(task);
1095
1096         if (task->affinity_space)
1097                 task_affinity_deallocate(task);
1098
1099 #if MACH_ASSERT
1100         if (task->ledger != NULL &&
1101             task->map != NULL &&
1102             task->map->pmap != NULL &&
1103             task->map->pmap->ledger != NULL) {
1104                 assert(task->ledger == task->map->pmap->ledger);
1105         }
1106 #endif /* MACH_ASSERT */
1107
1108         vm_purgeable_disown(task);
1109         assert(task->task_purgeable_disowned);
1110         if (task->task_volatile_objects != 0 ||
1111             task->task_nonvolatile_objects != 0) {
1112                 panic("task_deallocate(%p): "
1113                       "volatile_objects=%d nonvolatile_objects=%d\n",
1114                       task,
1115                       task->task_volatile_objects,
1116                       task->task_nonvolatile_objects);
1117         }
1118
1119         vm_map_deallocate(task->map);
1120         is_release(task->itk_space);
1121
1122         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1123                            &interrupt_wakeups, &debit);
1124         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1125                            &platform_idle_wakeups, &debit);
1126
1127 #if defined(CONFIG_SCHED_MULTIQ)
1128         sched_group_destroy(task->sched_group);
1129 #endif
1130
1131         /* Accumulate statistics for dead tasks */
1132         lck_spin_lock(&dead_task_statistics_lock);
1133         dead_task_statistics.total_user_time += task->total_user_time;
1134         dead_task_statistics.total_system_time += task->total_system_time;
1135
1136         dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1137         dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1138
1139         dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1140         dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1141
1142         lck_spin_unlock(&dead_task_statistics_lock);
1143         lck_mtx_destroy(&task->lock, &task_lck_grp);
1144
1145         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1146             &debit)) {
1147                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1148                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1149         }
1150         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1151             &debit)) {
1152                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1153                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1154         }
1155         ledger_dereference(task->ledger);
1156         zinfo_task_free(task);
1157
1158 #if TASK_REFERENCE_LEAK_DEBUG
1159         btlog_remove_entries_for_element(task_ref_btlog, task);
1160 #endif
1161
1162 #if CONFIG_COALITIONS
1163         if (!task->coalition[COALITION_TYPE_RESOURCE])
1164                 panic("deallocating task was not a member of a resource coalition");
1165         task_release_coalitions(task);
1166 #endif /* CONFIG_COALITIONS */
1167
1168         bzero(task->coalition, sizeof(task->coalition));
1169
1170 #if MACH_BSD
1171         /* clean up collected information since last reference to task is gone */
1172         if (task->corpse_info) {
1173                 task_crashinfo_destroy(task->corpse_info);
1174                 task->corpse_info = NULL;
1175         }
1176 #endif
1177
1178         zfree(task_zone, task);
1179 }
1180
1181 /*
1182  *      task_name_deallocate:
1183  *
1184  *      Drop a reference on a task name.
1185  */
1186 void
1187 task_name_deallocate(
1188         task_name_t             task_name)
1189 {
1190         return(task_deallocate((task_t)task_name));
1191 }
1192
1193 /*
1194  *      task_suspension_token_deallocate:
1195  *
1196  *      Drop a reference on a task suspension token.
1197  */
1198 void
1199 task_suspension_token_deallocate(
1200         task_suspension_token_t         token)
1201 {
1202         return(task_deallocate((task_t)token));
1203 }
1204
1205
1206 /*
1207  * task_collect_crash_info:
1208  *
1209  * collect crash info from bsd and mach based data
1210  */
1211 kern_return_t
1212 task_collect_crash_info(task_t task)
1213 {
1214         kern_return_t kr = KERN_SUCCESS;
1215
1216         kcdata_descriptor_t crash_data = NULL;
1217         kcdata_descriptor_t crash_data_release = NULL;
1218         mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1219         mach_vm_offset_t crash_data_user_ptr = 0;
1220
1221         if (!corpses_enabled()) {
1222                 return KERN_NOT_SUPPORTED;
1223         }
1224
1225         task_lock(task);
1226         assert(task->bsd_info != NULL);
1227         if (task->corpse_info == NULL && task->bsd_info != NULL) {
1228                 task_unlock(task);
1229                 /* map crash data memory in task's vm map */
1230                 kr = mach_vm_allocate(task->map, &crash_data_user_ptr, size, (VM_MAKE_TAG(VM_MEMORY_CORPSEINFO) | VM_FLAGS_ANYWHERE));
1231
1232                 if (kr != KERN_SUCCESS)
1233                         goto out_no_lock;
1234
1235                 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_user_ptr, size);
1236                 if (crash_data) {
1237                         task_lock(task);
1238                         crash_data_release = task->corpse_info;
1239                         task->corpse_info = crash_data;
1240                         task_unlock(task);
1241                         kr = KERN_SUCCESS;
1242                 } else {
1243                         /* if failed to create corpse info, free the mapping */
1244                         if (KERN_SUCCESS != mach_vm_deallocate(task->map, crash_data_user_ptr, size)) {
1245                                 printf("mach_vm_deallocate failed to clear corpse_data for pid %d.\n", task_pid(task));
1246                         }
1247                         kr = KERN_FAILURE;
1248                 }
1249
1250                 if (crash_data_release != NULL) {
1251                         task_crashinfo_destroy(crash_data_release);
1252                 }
1253         } else {
1254                 task_unlock(task);
1255         }
1256
1257 out_no_lock:
1258         return kr;
1259 }
1260
1261 /*
1262  * task_deliver_crash_notification:
1263  *
1264  * Makes outcall to registered host port for a corpse.
1265  */
1266 kern_return_t
1267 task_deliver_crash_notification(task_t task)
1268 {
1269         kcdata_descriptor_t crash_info = task->corpse_info;
1270         thread_t th_iter = NULL;
1271         kern_return_t kr = KERN_SUCCESS;
1272         wait_interrupt_t wsave;
1273         mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1274
1275         if (crash_info == NULL)
1276                 return KERN_FAILURE;
1277
1278         code[0] = crash_info->kcd_addr_begin;
1279         code[1] = crash_info->kcd_length;
1280
1281         task_lock(task);
1282         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1283         {
1284                 ipc_thread_reset(th_iter);
1285         }
1286         task_unlock(task);
1287
1288         wsave = thread_interrupt_level(THREAD_UNINT);
1289         kr = exception_triage(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX);
1290         if (kr != KERN_SUCCESS) {
1291                 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
1292         }
1293
1294         /*
1295          * crash reporting is done. Now release threads
1296          * for reaping by thread_terminate_daemon
1297          */
1298         task_lock(task);
1299         assert(task->active_thread_count == 0);
1300         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1301         {
1302                 thread_mtx_lock(th_iter);
1303                 assert(th_iter->inspection == TRUE);
1304                 th_iter->inspection = FALSE;
1305                 /* now that the corpse has been autopsied, dispose of the thread name */
1306                 uthread_cleanup_name(th_iter->uthread);
1307                 thread_mtx_unlock(th_iter);
1308         }
1309
1310         thread_terminate_crashed_threads();
1311         /* remove the pending corpse report flag */
1312         task_clear_corpse_pending_report(task);
1313
1314         task_unlock(task);
1315
1316         (void)thread_interrupt_level(wsave);
1317         task_terminate_internal(task);
1318
1319         return kr;
1320 }
1321
1322 /*
1323  *      task_terminate:
1324  *
1325  *      Terminate the specified task.  See comments on thread_terminate
1326  *      (kern/thread.c) about problems with terminating the "current task."
1327  */
1328
1329 kern_return_t
1330 task_terminate(
1331         task_t          task)
1332 {
1333         if (task == TASK_NULL)
1334                 return (KERN_INVALID_ARGUMENT);
1335
1336         if (task->bsd_info)
1337                 return (KERN_FAILURE);
1338
1339         return (task_terminate_internal(task));
1340 }
1341
1342 #if MACH_ASSERT
1343 extern int proc_pid(struct proc *);
1344 extern void proc_name_kdp(task_t t, char *buf, int size);
1345 #endif /* MACH_ASSERT */
1346
1347 #define VM_MAP_PARTIAL_REAP 0x54  /* 0x150 */
1348 static void
1349 __unused task_partial_reap(task_t task, __unused int pid)
1350 {
1351         unsigned int    reclaimed_resident = 0;
1352         unsigned int    reclaimed_compressed = 0;
1353         uint64_t        task_page_count;
1354
1355         task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1356
1357         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
1358                               pid, task_page_count, 0, 0, 0);
1359
1360         vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1361
1362         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
1363                               pid, reclaimed_resident, reclaimed_compressed, 0, 0);
1364 }
1365
1366 kern_return_t
1367 task_mark_corpse(task_t task)
1368 {
1369         kern_return_t kr = KERN_SUCCESS;
1370         thread_t self_thread;
1371         (void) self_thread;
1372         wait_interrupt_t wsave;
1373
1374         assert(task != kernel_task);
1375         assert(task == current_task());
1376         assert(!task_is_a_corpse(task));
1377
1378         kr = task_collect_crash_info(task);
1379         if (kr != KERN_SUCCESS) {
1380                 return kr;
1381         }
1382
1383         self_thread = current_thread();
1384
1385         wsave = thread_interrupt_level(THREAD_UNINT);
1386         task_lock(task);
1387
1388         task_set_corpse_pending_report(task);
1389         task_set_corpse(task);
1390
1391         kr = task_start_halt_locked(task, TRUE);
1392         assert(kr == KERN_SUCCESS);
1393         ipc_task_reset(task);
1394         ipc_task_enable(task);
1395
1396         task_unlock(task);
1397         /* terminate the ipc space */
1398         ipc_space_terminate(task->itk_space);
1399
1400         task_start_halt(task);
1401         thread_terminate_internal(self_thread);
1402         (void) thread_interrupt_level(wsave);
1403         assert(task->halting == TRUE);
1404         return kr;
1405 }
1406
1407 kern_return_t
1408 task_terminate_internal(
1409         task_t                  task)
1410 {
1411         thread_t                        thread, self;
1412         task_t                          self_task;
1413         boolean_t                       interrupt_save;
1414         int                             pid = 0;
1415
1416         assert(task != kernel_task);
1417
1418         self = current_thread();
1419         self_task = self->task;
1420
1421         /*
1422          *      Get the task locked and make sure that we are not racing
1423          *      with someone else trying to terminate us.
1424          */
1425         if (task == self_task)
1426                 task_lock(task);
1427         else
1428         if (task < self_task) {
1429                 task_lock(task);
1430                 task_lock(self_task);
1431         }
1432         else {
1433                 task_lock(self_task);
1434                 task_lock(task);
1435         }
1436
1437         if (!task->active) {
1438                 /*
1439                  *      Task is already being terminated.
1440                  *      Just return an error. If we are dying, this will
1441                  *      just get us to our AST special handler and that
1442                  *      will get us to finalize the termination of ourselves.
1443                  */
1444                 task_unlock(task);
1445                 if (self_task != task)
1446                         task_unlock(self_task);
1447
1448                 return (KERN_FAILURE);
1449         }
1450
1451         if (task_corpse_pending_report(task)) {
1452                 /*
1453                  *      Task is marked for reporting as corpse.
1454                  *      Just return an error. This will
1455                  *      just get us to our AST special handler and that
1456                  *      will get us to finish the path to death
1457                  */
1458                 task_unlock(task);
1459                 if (self_task != task)
1460                         task_unlock(self_task);
1461
1462                 return (KERN_FAILURE);
1463         }
1464
1465         if (self_task != task)
1466                 task_unlock(self_task);
1467
1468         /*
1469          * Make sure the current thread does not get aborted out of
1470          * the waits inside these operations.
1471          */
1472         interrupt_save = thread_interrupt_level(THREAD_UNINT);
1473
1474         /*
1475          *      Indicate that we want all the threads to stop executing
1476          *      at user space by holding the task (we would have held
1477          *      each thread independently in thread_terminate_internal -
1478          *      but this way we may be more likely to already find it
1479          *      held there).  Mark the task inactive, and prevent
1480          *      further task operations via the task port.
1481          */
1482         task_hold_locked(task);
1483         task->active = FALSE;
1484         ipc_task_disable(task);
1485
1486 #if CONFIG_TELEMETRY
1487         /*
1488          * Notify telemetry that this task is going away.
1489          */
1490         telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
1491 #endif
1492
1493         /*
1494          *      Terminate each thread in the task.
1495          */
1496         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1497                         thread_terminate_internal(thread);
1498         }
1499
1500 #ifdef MACH_BSD
1501         if (task->bsd_info != NULL) {
1502                 pid = proc_pid(task->bsd_info);
1503         }
1504 #endif /* MACH_BSD */
1505
1506         task_unlock(task);
1507
1508         proc_set_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE,
1509                              TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
1510
1511         /* Early object reap phase */
1512
1513 // PR-17045188: Revisit implementation
1514 //        task_partial_reap(task, pid);
1515
1516
1517         /*
1518          *      Destroy all synchronizers owned by the task.
1519          */
1520         task_synchronizer_destroy_all(task);
1521
1522         /*
1523          *      Destroy the IPC space, leaving just a reference for it.
1524          */
1525         ipc_space_terminate(task->itk_space);
1526
1527 #if 00
1528         /* if some ledgers go negative on tear-down again... */
1529         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1530                                          task_ledgers.phys_footprint);
1531         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1532                                          task_ledgers.internal);
1533         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1534                                          task_ledgers.internal_compressed);
1535         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1536                                          task_ledgers.iokit_mapped);
1537         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1538                                          task_ledgers.alternate_accounting);
1539         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1540                                          task_ledgers.alternate_accounting_compressed);
1541 #endif
1542
1543         /*
1544          * If the current thread is a member of the task
1545          * being terminated, then the last reference to
1546          * the task will not be dropped until the thread
1547          * is finally reaped.  To avoid incurring the
1548          * expense of removing the address space regions
1549          * at reap time, we do it explictly here.
1550          */
1551
1552         vm_map_lock(task->map);
1553         vm_map_disable_hole_optimization(task->map);
1554         vm_map_unlock(task->map);
1555
1556         vm_map_remove(task->map,
1557                       task->map->min_offset,
1558                       task->map->max_offset,
1559                       /* no unnesting on final cleanup: */
1560                       VM_MAP_REMOVE_NO_UNNESTING);
1561
1562         /* release our shared region */
1563         vm_shared_region_set(task, NULL);
1564
1565
1566 #if MACH_ASSERT
1567         /*
1568          * Identify the pmap's process, in case the pmap ledgers drift
1569          * and we have to report it.
1570          */
1571         char procname[17];
1572         if (task->bsd_info) {
1573                 pid = proc_pid(task->bsd_info);
1574                 proc_name_kdp(task, procname, sizeof (procname));
1575         } else {
1576                 pid = 0;
1577                 strlcpy(procname, "<unknown>", sizeof (procname));
1578         }
1579         pmap_set_process(task->map->pmap, pid, procname);
1580 #endif /* MACH_ASSERT */
1581
1582         lck_mtx_lock(&tasks_threads_lock);
1583         queue_remove(&tasks, task, task_t, tasks);
1584         queue_enter(&terminated_tasks, task, task_t, tasks);
1585         tasks_count--;
1586         terminated_tasks_count++;
1587         lck_mtx_unlock(&tasks_threads_lock);
1588
1589         /*
1590          * We no longer need to guard against being aborted, so restore
1591          * the previous interruptible state.
1592          */
1593         thread_interrupt_level(interrupt_save);
1594
1595 #if KPERF
1596         /* force the task to release all ctrs */
1597         if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS)
1598                 kpc_force_all_ctrs(task, 0);
1599 #endif
1600
1601 #if CONFIG_COALITIONS
1602         /*
1603          * Leave our coalitions. (drop activation but not reference)
1604          */
1605         coalitions_remove_task(task);
1606 #endif
1607
1608         /*
1609          * Get rid of the task active reference on itself.
1610          */
1611         task_deallocate(task);
1612
1613         return (KERN_SUCCESS);
1614 }
1615
1616 /*
1617  * task_start_halt:
1618  *
1619  *      Shut the current task down (except for the current thread) in
1620  *      preparation for dramatic changes to the task (probably exec).
1621  *      We hold the task and mark all other threads in the task for
1622  *      termination.
1623  */
1624 kern_return_t
1625 task_start_halt(task_t task)
1626 {
1627         kern_return_t kr = KERN_SUCCESS;
1628         task_lock(task);
1629         kr = task_start_halt_locked(task, FALSE);
1630         task_unlock(task);
1631         return kr;
1632 }
1633
1634 static kern_return_t
1635 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
1636 {
1637         thread_t thread, self;
1638         uint64_t dispatchqueue_offset;
1639
1640         assert(task != kernel_task);
1641
1642         self = current_thread();
1643
1644         if (task != self->task)
1645                 return (KERN_INVALID_ARGUMENT);
1646
1647         if (task->halting || !task->active || !self->active) {
1648                 /*
1649                  * Task or current thread is already being terminated.
1650                  * Hurry up and return out of the current kernel context
1651                  * so that we run our AST special handler to terminate
1652                  * ourselves.
1653                  */
1654                 return (KERN_FAILURE);
1655         }
1656
1657         task->halting = TRUE;
1658
1659         /*
1660          * Mark all the threads to keep them from starting any more
1661          * user-level execution.  The thread_terminate_internal code
1662          * would do this on a thread by thread basis anyway, but this
1663          * gives us a better chance of not having to wait there.
1664          */
1665         task_hold_locked(task);
1666         dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
1667
1668         /*
1669          * Terminate all the other threads in the task.
1670          */
1671         queue_iterate(&task->threads, thread, thread_t, task_threads)
1672         {
1673                 if (should_mark_corpse) {
1674                         thread_mtx_lock(thread);
1675                         thread->inspection = TRUE;
1676                         thread_mtx_unlock(thread);
1677                 }
1678                 if (thread != self)
1679                         thread_terminate_internal(thread);
1680         }
1681         task->dispatchqueue_offset = dispatchqueue_offset;
1682
1683         task_release_locked(task);
1684
1685         return KERN_SUCCESS;
1686 }
1687
1688
1689 /*
1690  * task_complete_halt:
1691  *
1692  *      Complete task halt by waiting for threads to terminate, then clean
1693  *      up task resources (VM, port namespace, etc...) and then let the
1694  *      current thread go in the (practically empty) task context.
1695  */
1696 void
1697 task_complete_halt(task_t task)
1698 {
1699         task_lock(task);
1700         assert(task->halting);
1701         assert(task == current_task());
1702
1703         /*
1704          *      Wait for the other threads to get shut down.
1705          *      When the last other thread is reaped, we'll be
1706          *      woken up.
1707          */
1708         if (task->thread_count > 1) {
1709                 assert_wait((event_t)&task->halting, THREAD_UNINT);
1710                 task_unlock(task);
1711                 thread_block(THREAD_CONTINUE_NULL);
1712         } else {
1713                 task_unlock(task);
1714         }
1715
1716         /*
1717          *      Give the machine dependent code a chance
1718          *      to perform cleanup of task-level resources
1719          *      associated with the current thread before
1720          *      ripping apart the task.
1721          */
1722         machine_task_terminate(task);
1723
1724         /*
1725          *      Destroy all synchronizers owned by the task.
1726          */
1727         task_synchronizer_destroy_all(task);
1728
1729         /*
1730          *      Destroy the contents of the IPC space, leaving just
1731          *      a reference for it.
1732          */
1733         ipc_space_clean(task->itk_space);
1734
1735         /*
1736          * Clean out the address space, as we are going to be
1737          * getting a new one.
1738          */
1739         vm_map_remove(task->map, task->map->min_offset,
1740                       task->map->max_offset,
1741                       /* no unnesting on final cleanup: */
1742                       VM_MAP_REMOVE_NO_UNNESTING);
1743
1744         task->halting = FALSE;
1745 }
1746
1747 /*
1748  *      task_hold_locked:
1749  *
1750  *      Suspend execution of the specified task.
1751  *      This is a recursive-style suspension of the task, a count of
1752  *      suspends is maintained.
1753  *
1754  *      CONDITIONS: the task is locked and active.
1755  */
1756 void
1757 task_hold_locked(
1758         register task_t         task)
1759 {
1760         register thread_t       thread;
1761
1762         assert(task->active);
1763
1764         if (task->suspend_count++ > 0)
1765                 return;
1766
1767         /*
1768          *      Iterate through all the threads and hold them.
1769          */
1770         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1771                 thread_mtx_lock(thread);
1772                 thread_hold(thread);
1773                 thread_mtx_unlock(thread);
1774         }
1775 }
1776
1777 /*
1778  *      task_hold:
1779  *
1780  *      Same as the internal routine above, except that is must lock
1781  *      and verify that the task is active.  This differs from task_suspend
1782  *      in that it places a kernel hold on the task rather than just a
1783  *      user-level hold.  This keeps users from over resuming and setting
1784  *      it running out from under the kernel.
1785  *
1786  *      CONDITIONS: the caller holds a reference on the task
1787  */
1788 kern_return_t
1789 task_hold(
1790         register task_t         task)
1791 {
1792         if (task == TASK_NULL)
1793                 return (KERN_INVALID_ARGUMENT);
1794
1795         task_lock(task);
1796
1797         if (!task->active) {
1798                 task_unlock(task);
1799
1800                 return (KERN_FAILURE);
1801         }
1802
1803         task_hold_locked(task);
1804         task_unlock(task);
1805
1806         return (KERN_SUCCESS);
1807 }
1808
1809 kern_return_t
1810 task_wait(
1811                 task_t          task,
1812                 boolean_t       until_not_runnable)
1813 {
1814         if (task == TASK_NULL)
1815                 return (KERN_INVALID_ARGUMENT);
1816
1817         task_lock(task);
1818
1819         if (!task->active) {
1820                 task_unlock(task);
1821
1822                 return (KERN_FAILURE);
1823         }
1824
1825         task_wait_locked(task, until_not_runnable);
1826         task_unlock(task);
1827
1828         return (KERN_SUCCESS);
1829 }
1830
1831 /*
1832  *      task_wait_locked:
1833  *
1834  *      Wait for all threads in task to stop.
1835  *
1836  * Conditions:
1837  *      Called with task locked, active, and held.
1838  */
1839 void
1840 task_wait_locked(
1841         register task_t         task,
1842         boolean_t               until_not_runnable)
1843 {
1844         register thread_t       thread, self;
1845
1846         assert(task->active);
1847         assert(task->suspend_count > 0);
1848
1849         self = current_thread();
1850
1851         /*
1852          *      Iterate through all the threads and wait for them to
1853          *      stop.  Do not wait for the current thread if it is within
1854          *      the task.
1855          */
1856         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1857                 if (thread != self)
1858                         thread_wait(thread, until_not_runnable);
1859         }
1860 }
1861
1862 /*
1863  *      task_release_locked:
1864  *
1865  *      Release a kernel hold on a task.
1866  *
1867  *      CONDITIONS: the task is locked and active
1868  */
1869 void
1870 task_release_locked(
1871         register task_t         task)
1872 {
1873         register thread_t       thread;
1874
1875         assert(task->active);
1876         assert(task->suspend_count > 0);
1877
1878         if (--task->suspend_count > 0)
1879                 return;
1880
1881         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1882                 thread_mtx_lock(thread);
1883                 thread_release(thread);
1884                 thread_mtx_unlock(thread);
1885         }
1886 }
1887
1888 /*
1889  *      task_release:
1890  *
1891  *      Same as the internal routine above, except that it must lock
1892  *      and verify that the task is active.
1893  *
1894  *      CONDITIONS: The caller holds a reference to the task
1895  */
1896 kern_return_t
1897 task_release(
1898         task_t          task)
1899 {
1900         if (task == TASK_NULL)
1901                 return (KERN_INVALID_ARGUMENT);
1902
1903         task_lock(task);
1904
1905         if (!task->active) {
1906                 task_unlock(task);
1907
1908                 return (KERN_FAILURE);
1909         }
1910
1911         task_release_locked(task);
1912         task_unlock(task);
1913
1914         return (KERN_SUCCESS);
1915 }
1916
1917 kern_return_t
1918 task_threads(
1919         task_t                                  task,
1920         thread_act_array_t              *threads_out,
1921         mach_msg_type_number_t  *count)
1922 {
1923         mach_msg_type_number_t  actual;
1924         thread_t                                *thread_list;
1925         thread_t                                thread;
1926         vm_size_t                               size, size_needed;
1927         void                                    *addr;
1928         unsigned int                    i, j;
1929
1930         if (task == TASK_NULL)
1931                 return (KERN_INVALID_ARGUMENT);
1932
1933         size = 0; addr = NULL;
1934
1935         for (;;) {
1936                 task_lock(task);
1937                 if (!task->active) {
1938                         task_unlock(task);
1939
1940                         if (size != 0)
1941                                 kfree(addr, size);
1942
1943                         return (KERN_FAILURE);
1944                 }
1945
1946                 actual = task->thread_count;
1947
1948                 /* do we have the memory we need? */
1949                 size_needed = actual * sizeof (mach_port_t);
1950                 if (size_needed <= size)
1951                         break;
1952
1953                 /* unlock the task and allocate more memory */
1954                 task_unlock(task);
1955
1956                 if (size != 0)
1957                         kfree(addr, size);
1958
1959                 assert(size_needed > 0);
1960                 size = size_needed;
1961
1962                 addr = kalloc(size);
1963                 if (addr == 0)
1964                         return (KERN_RESOURCE_SHORTAGE);
1965         }
1966
1967         /* OK, have memory and the task is locked & active */
1968         thread_list = (thread_t *)addr;
1969
1970         i = j = 0;
1971
1972         for (thread = (thread_t)queue_first(&task->threads); i < actual;
1973                                 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
1974                 thread_reference_internal(thread);
1975                 thread_list[j++] = thread;
1976         }
1977
1978         assert(queue_end(&task->threads, (queue_entry_t)thread));
1979
1980         actual = j;
1981         size_needed = actual * sizeof (mach_port_t);
1982
1983         /* can unlock task now that we've got the thread refs */
1984         task_unlock(task);
1985
1986         if (actual == 0) {
1987                 /* no threads, so return null pointer and deallocate memory */
1988
1989                 *threads_out = NULL;
1990                 *count = 0;
1991
1992                 if (size != 0)
1993                         kfree(addr, size);
1994         }
1995         else {
1996                 /* if we allocated too much, must copy */
1997
1998                 if (size_needed < size) {
1999                         void *newaddr;
2000
2001                         newaddr = kalloc(size_needed);
2002                         if (newaddr == 0) {
2003                                 for (i = 0; i < actual; ++i)
2004                                         thread_deallocate(thread_list[i]);
2005                                 kfree(addr, size);
2006                                 return (KERN_RESOURCE_SHORTAGE);
2007                         }
2008
2009                         bcopy(addr, newaddr, size_needed);
2010                         kfree(addr, size);
2011                         thread_list = (thread_t *)newaddr;
2012                 }
2013
2014                 *threads_out = thread_list;
2015                 *count = actual;
2016
2017                 /* do the conversion that Mig should handle */
2018
2019                 for (i = 0; i < actual; ++i)
2020                         ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
2021         }
2022
2023         return (KERN_SUCCESS);
2024 }
2025
2026 #define TASK_HOLD_NORMAL        0
2027 #define TASK_HOLD_PIDSUSPEND    1
2028 #define TASK_HOLD_LEGACY        2
2029 #define TASK_HOLD_LEGACY_ALL    3
2030
2031 static kern_return_t
2032 place_task_hold    (
2033         register task_t task,
2034         int mode)
2035 {
2036         if (!task->active) {
2037                 return (KERN_FAILURE);
2038         }
2039
2040         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2041             MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
2042             task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2043             task->user_stop_count, task->user_stop_count + 1, 0);
2044
2045 #if MACH_ASSERT
2046         current_task()->suspends_outstanding++;
2047 #endif
2048
2049         if (mode == TASK_HOLD_LEGACY)
2050                 task->legacy_stop_count++;
2051
2052         if (task->user_stop_count++ > 0) {
2053                 /*
2054                  *      If the stop count was positive, the task is
2055                  *      already stopped and we can exit.
2056                  */
2057                 return (KERN_SUCCESS);
2058         }
2059
2060         /*
2061          * Put a kernel-level hold on the threads in the task (all
2062          * user-level task suspensions added together represent a
2063          * single kernel-level hold).  We then wait for the threads
2064          * to stop executing user code.
2065          */
2066         task_hold_locked(task);
2067         task_wait_locked(task, FALSE);
2068
2069         return (KERN_SUCCESS);
2070 }
2071
2072 static kern_return_t
2073 release_task_hold    (
2074         register task_t         task,
2075         int                     mode)
2076 {
2077         register boolean_t release = FALSE;
2078
2079         if (!task->active) {
2080                 return (KERN_FAILURE);
2081         }
2082
2083         if (mode == TASK_HOLD_PIDSUSPEND) {
2084             if (task->pidsuspended == FALSE) {
2085                     return (KERN_FAILURE);
2086             }
2087             task->pidsuspended = FALSE;
2088         }
2089
2090         if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
2091
2092                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2093                     MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
2094                     task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2095                     task->user_stop_count, mode, task->legacy_stop_count);
2096
2097 #if MACH_ASSERT
2098                 /*
2099                  * This is obviously not robust; if we suspend one task and then resume a different one,
2100                  * we'll fly under the radar. This is only meant to catch the common case of a crashed
2101                  * or buggy suspender.
2102                  */
2103                 current_task()->suspends_outstanding--;
2104 #endif
2105
2106                 if (mode == TASK_HOLD_LEGACY_ALL) {
2107                         if (task->legacy_stop_count >= task->user_stop_count) {
2108                                 task->user_stop_count = 0;
2109                                 release = TRUE;
2110                         } else {
2111                                 task->user_stop_count -= task->legacy_stop_count;
2112                         }
2113                         task->legacy_stop_count = 0;
2114                 } else {
2115                         if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
2116                                 task->legacy_stop_count--;
2117                         if (--task->user_stop_count == 0)
2118                                 release = TRUE;
2119                 }
2120         }
2121         else {
2122                 return (KERN_FAILURE);
2123         }
2124
2125         /*
2126          *      Release the task if necessary.
2127          */
2128         if (release)
2129                 task_release_locked(task);
2130
2131     return (KERN_SUCCESS);
2132 }
2133
2134
2135 /*
2136  *      task_suspend:
2137  *
2138  *      Implement an (old-fashioned) user-level suspension on a task.
2139  *
2140  *      Because the user isn't expecting to have to manage a suspension
2141  *      token, we'll track it for him in the kernel in the form of a naked
2142  *      send right to the task's resume port.  All such send rights
2143  *      account for a single suspension against the task (unlike task_suspend2()
2144  *      where each caller gets a unique suspension count represented by a
2145  *      unique send-once right).
2146  *
2147  * Conditions:
2148  *      The caller holds a reference to the task
2149  */
2150 kern_return_t
2151 task_suspend(
2152         register task_t         task)
2153 {
2154         kern_return_t                   kr;
2155         mach_port_t                     port, send, old_notify;
2156         mach_port_name_t                name;
2157
2158         if (task == TASK_NULL || task == kernel_task)
2159                 return (KERN_INVALID_ARGUMENT);
2160
2161         task_lock(task);
2162
2163         /*
2164          * Claim a send right on the task resume port, and request a no-senders
2165          * notification on that port (if none outstanding).
2166          */
2167         if (task->itk_resume == IP_NULL) {
2168                 task->itk_resume = ipc_port_alloc_kernel();
2169                 if (!IP_VALID(task->itk_resume))
2170                         panic("failed to create resume port");
2171                 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
2172         }
2173
2174         port = task->itk_resume;
2175         ip_lock(port);
2176         assert(ip_active(port));
2177
2178         send = ipc_port_make_send_locked(port);
2179         assert(IP_VALID(send));
2180
2181         if (port->ip_nsrequest == IP_NULL) {
2182                 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2183                 assert(old_notify == IP_NULL);
2184                 /* port unlocked */
2185         } else {
2186                 ip_unlock(port);
2187         }
2188
2189         /*
2190          * place a legacy hold on the task.
2191          */
2192         kr = place_task_hold(task, TASK_HOLD_LEGACY);
2193         if (kr != KERN_SUCCESS) {
2194                 task_unlock(task);
2195                 ipc_port_release_send(send);
2196                 return kr;
2197         }
2198
2199         task_unlock(task);
2200
2201         /*
2202          * Copyout the send right into the calling task's IPC space.  It won't know it is there,
2203          * but we'll look it up when calling a traditional resume.  Any IPC operations that
2204          * deallocate the send right will auto-release the suspension.
2205          */
2206         if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
2207                 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
2208                 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
2209                                 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2210                                 task_pid(task), kr);
2211                 return (kr);
2212         }
2213
2214         return (kr);
2215 }
2216
2217 /*
2218  *      task_resume:
2219  *              Release a user hold on a task.
2220  *
2221  * Conditions:
2222  *              The caller holds a reference to the task
2223  */
2224 kern_return_t
2225 task_resume(
2226         register task_t task)
2227 {
2228         kern_return_t    kr;
2229         mach_port_name_t resume_port_name;
2230         ipc_entry_t              resume_port_entry;
2231         ipc_space_t              space = current_task()->itk_space;
2232
2233         if (task == TASK_NULL || task == kernel_task )
2234                 return (KERN_INVALID_ARGUMENT);
2235
2236         /* release a legacy task hold */
2237         task_lock(task);
2238         kr = release_task_hold(task, TASK_HOLD_LEGACY);
2239         task_unlock(task);
2240
2241         is_write_lock(space);
2242         if (is_active(space) && IP_VALID(task->itk_resume) &&
2243             ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
2244                 /*
2245                  * We found a suspension token in the caller's IPC space. Release a send right to indicate that
2246                  * we are holding one less legacy hold on the task from this caller.  If the release failed,
2247                  * go ahead and drop all the rights, as someone either already released our holds or the task
2248                  * is gone.
2249                  */
2250                 if (kr == KERN_SUCCESS)
2251                         ipc_right_dealloc(space, resume_port_name, resume_port_entry);
2252                 else
2253                         ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
2254                 /* space unlocked */
2255         } else {
2256                 is_write_unlock(space);
2257                 if (kr == KERN_SUCCESS)
2258                         printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
2259                                proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2260                                task_pid(task));
2261         }
2262
2263         return kr;
2264 }
2265
2266 /*
2267  * Suspend the target task.
2268  * Making/holding a token/reference/port is the callers responsibility.
2269  */
2270 kern_return_t
2271 task_suspend_internal(task_t task)
2272 {
2273         kern_return_t    kr;
2274
2275         if (task == TASK_NULL || task == kernel_task)
2276                 return (KERN_INVALID_ARGUMENT);
2277
2278         task_lock(task);
2279         kr = place_task_hold(task, TASK_HOLD_NORMAL);
2280         task_unlock(task);
2281         return (kr);
2282 }
2283
2284 /*
2285  * Suspend the target task, and return a suspension token. The token
2286  * represents a reference on the suspended task.
2287  */
2288 kern_return_t
2289 task_suspend2(
2290         register task_t                 task,
2291         task_suspension_token_t *suspend_token)
2292 {
2293         kern_return_t    kr;
2294
2295         kr = task_suspend_internal(task);
2296         if (kr != KERN_SUCCESS) {
2297                 *suspend_token = TASK_NULL;
2298                 return (kr);
2299         }
2300
2301         /*
2302          * Take a reference on the target task and return that to the caller
2303          * as a "suspension token," which can be converted into an SO right to
2304          * the now-suspended task's resume port.
2305          */
2306         task_reference_internal(task);
2307         *suspend_token = task;
2308
2309         return (KERN_SUCCESS);
2310 }
2311
2312 /*
2313  * Resume the task
2314  * (reference/token/port management is caller's responsibility).
2315  */
2316 kern_return_t
2317 task_resume_internal(
2318         register task_suspension_token_t                task)
2319 {
2320         kern_return_t kr;
2321
2322         if (task == TASK_NULL || task == kernel_task)
2323                 return (KERN_INVALID_ARGUMENT);
2324
2325         task_lock(task);
2326         kr = release_task_hold(task, TASK_HOLD_NORMAL);
2327         task_unlock(task);
2328         return (kr);
2329 }
2330
2331 /*
2332  * Resume the task using a suspension token. Consumes the token's ref.
2333  */
2334 kern_return_t
2335 task_resume2(
2336         register task_suspension_token_t                task)
2337 {
2338         kern_return_t kr;
2339
2340         kr = task_resume_internal(task);
2341         task_suspension_token_deallocate(task);
2342
2343         return (kr);
2344 }
2345
2346 boolean_t
2347 task_suspension_notify(mach_msg_header_t *request_header)
2348 {
2349         ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
2350         task_t task = convert_port_to_task_suspension_token(port);
2351         mach_msg_type_number_t not_count;
2352
2353         if (task == TASK_NULL || task == kernel_task)
2354                 return TRUE;  /* nothing to do */
2355
2356         switch (request_header->msgh_id) {
2357
2358         case MACH_NOTIFY_SEND_ONCE:
2359                 /* release the hold held by this specific send-once right */
2360                 task_lock(task);
2361                 release_task_hold(task, TASK_HOLD_NORMAL);
2362                 task_unlock(task);
2363                 break;
2364
2365         case MACH_NOTIFY_NO_SENDERS:
2366                 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
2367
2368                 task_lock(task);
2369                 ip_lock(port);
2370                 if (port->ip_mscount == not_count) {
2371
2372                         /* release all the [remaining] outstanding legacy holds */
2373                         assert(port->ip_nsrequest == IP_NULL);
2374                         ip_unlock(port);
2375                         release_task_hold(task, TASK_HOLD_LEGACY_ALL);
2376                         task_unlock(task);
2377
2378                 } else if (port->ip_nsrequest == IP_NULL) {
2379                         ipc_port_t old_notify;
2380
2381                         task_unlock(task);
2382                         /* new send rights, re-arm notification at current make-send count */
2383                         ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2384                         assert(old_notify == IP_NULL);
2385                         /* port unlocked */
2386                 } else {
2387                         ip_unlock(port);
2388                         task_unlock(task);
2389                 }
2390                 break;
2391
2392         default:
2393                 break;
2394         }
2395
2396         task_suspension_token_deallocate(task); /* drop token reference */
2397         return TRUE;
2398 }
2399
2400 kern_return_t
2401 task_pidsuspend_locked(task_t task)
2402 {
2403         kern_return_t kr;
2404
2405         if (task->pidsuspended) {
2406                 kr = KERN_FAILURE;
2407                 goto out;
2408         }
2409
2410         task->pidsuspended = TRUE;
2411
2412         kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
2413         if (kr != KERN_SUCCESS) {
2414                 task->pidsuspended = FALSE;
2415         }
2416 out:
2417         return(kr);
2418 }
2419
2420
2421 /*
2422  *      task_pidsuspend:
2423  *
2424  *      Suspends a task by placing a hold on its threads.
2425  *
2426  * Conditions:
2427  *      The caller holds a reference to the task
2428  */
2429 kern_return_t
2430 task_pidsuspend(
2431         register task_t         task)
2432 {
2433         kern_return_t    kr;
2434
2435         if (task == TASK_NULL || task == kernel_task)
2436                 return (KERN_INVALID_ARGUMENT);
2437
2438         task_lock(task);
2439
2440         kr = task_pidsuspend_locked(task);
2441
2442         task_unlock(task);
2443
2444         return (kr);
2445 }
2446
2447 /* If enabled, we bring all the frozen pages back in prior to resumption; otherwise, they're faulted back in on demand */
2448 #define THAW_ON_RESUME 1
2449
2450 /*
2451  *      task_pidresume:
2452  *              Resumes a previously suspended task.
2453  *
2454  * Conditions:
2455  *              The caller holds a reference to the task
2456  */
2457 kern_return_t
2458 task_pidresume(
2459         register task_t task)
2460 {
2461         kern_return_t    kr;
2462
2463         if (task == TASK_NULL || task == kernel_task)
2464                 return (KERN_INVALID_ARGUMENT);
2465
2466         task_lock(task);
2467
2468 #if (CONFIG_FREEZE && THAW_ON_RESUME)
2469
2470         while (task->changing_freeze_state) {
2471
2472                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2473                 task_unlock(task);
2474                 thread_block(THREAD_CONTINUE_NULL);
2475
2476                 task_lock(task);
2477         }
2478         task->changing_freeze_state = TRUE;
2479 #endif
2480
2481         kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
2482
2483         task_unlock(task);
2484
2485 #if (CONFIG_FREEZE && THAW_ON_RESUME)
2486         if ((kr == KERN_SUCCESS) && (task->frozen == TRUE)) {
2487
2488                 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2489
2490                         kr = KERN_SUCCESS;
2491                 } else {
2492
2493                         kr = vm_map_thaw(task->map);
2494                 }
2495         }
2496         task_lock(task);
2497
2498         if (kr == KERN_SUCCESS)
2499                 task->frozen = FALSE;
2500         task->changing_freeze_state = FALSE;
2501         thread_wakeup(&task->changing_freeze_state);
2502
2503         task_unlock(task);
2504 #endif
2505
2506         return (kr);
2507 }
2508
2509 #if CONFIG_FREEZE
2510
2511 /*
2512  *      task_freeze:
2513  *
2514  *      Freeze a task.
2515  *
2516  * Conditions:
2517  *      The caller holds a reference to the task
2518  */
2519 extern void             vm_wake_compactor_swapper();
2520 extern queue_head_t     c_swapout_list_head;
2521
2522 kern_return_t
2523 task_freeze(
2524         register task_t    task,
2525         uint32_t           *purgeable_count,
2526         uint32_t           *wired_count,
2527         uint32_t           *clean_count,
2528         uint32_t           *dirty_count,
2529         uint32_t           dirty_budget,
2530         boolean_t          *shared,
2531         boolean_t          walk_only)
2532 {
2533         kern_return_t kr;
2534
2535         if (task == TASK_NULL || task == kernel_task)
2536                 return (KERN_INVALID_ARGUMENT);
2537
2538         task_lock(task);
2539
2540         while (task->changing_freeze_state) {
2541
2542                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2543                 task_unlock(task);
2544                 thread_block(THREAD_CONTINUE_NULL);
2545
2546                 task_lock(task);
2547         }
2548         if (task->frozen) {
2549                 task_unlock(task);
2550                 return (KERN_FAILURE);
2551         }
2552         task->changing_freeze_state = TRUE;
2553
2554         task_unlock(task);
2555
2556         if (walk_only) {
2557                 kr = vm_map_freeze_walk(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2558         } else {
2559                 kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2560         }
2561
2562         task_lock(task);
2563
2564         if (walk_only == FALSE && kr == KERN_SUCCESS)
2565                 task->frozen = TRUE;
2566         task->changing_freeze_state = FALSE;
2567         thread_wakeup(&task->changing_freeze_state);
2568
2569         task_unlock(task);
2570
2571         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2572                 vm_wake_compactor_swapper();
2573                 /*
2574                  * We do an explicit wakeup of the swapout thread here
2575                  * because the compact_and_swap routines don't have
2576                  * knowledge about these kind of "per-task packed c_segs"
2577                  * and so will not be evaluating whether we need to do
2578                  * a wakeup there.
2579                  */
2580                 thread_wakeup((event_t)&c_swapout_list_head);
2581         }
2582
2583         return (kr);
2584 }
2585
2586 /*
2587  *      task_thaw:
2588  *
2589  *      Thaw a currently frozen task.
2590  *
2591  * Conditions:
2592  *      The caller holds a reference to the task
2593  */
2594 kern_return_t
2595 task_thaw(
2596         register task_t         task)
2597 {
2598         kern_return_t kr;
2599
2600         if (task == TASK_NULL || task == kernel_task)
2601                 return (KERN_INVALID_ARGUMENT);
2602
2603         task_lock(task);
2604
2605         while (task->changing_freeze_state) {
2606
2607                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2608                 task_unlock(task);
2609                 thread_block(THREAD_CONTINUE_NULL);
2610
2611                 task_lock(task);
2612         }
2613         if (!task->frozen) {
2614                 task_unlock(task);
2615                 return (KERN_FAILURE);
2616         }
2617         task->changing_freeze_state = TRUE;
2618
2619         if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) {
2620                 task_unlock(task);
2621
2622                 kr = vm_map_thaw(task->map);
2623
2624                 task_lock(task);
2625
2626                 if (kr == KERN_SUCCESS)
2627                         task->frozen = FALSE;
2628         } else {
2629                 task->frozen = FALSE;
2630                 kr = KERN_SUCCESS;
2631         }
2632
2633         task->changing_freeze_state = FALSE;
2634         thread_wakeup(&task->changing_freeze_state);
2635
2636         task_unlock(task);
2637
2638         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2639                 vm_wake_compactor_swapper();
2640         }
2641
2642         return (kr);
2643 }
2644
2645 #endif /* CONFIG_FREEZE */
2646
2647 kern_return_t
2648 host_security_set_task_token(
2649         host_security_t  host_security,
2650         task_t           task,
2651         security_token_t sec_token,
2652         audit_token_t    audit_token,
2653         host_priv_t      host_priv)
2654 {
2655         ipc_port_t       host_port;
2656         kern_return_t    kr;
2657
2658         if (task == TASK_NULL)
2659                 return(KERN_INVALID_ARGUMENT);
2660
2661         if (host_security == HOST_NULL)
2662                 return(KERN_INVALID_SECURITY);
2663
2664         task_lock(task);
2665         task->sec_token = sec_token;
2666         task->audit_token = audit_token;
2667
2668         task_unlock(task);
2669
2670         if (host_priv != HOST_PRIV_NULL) {
2671                 kr = host_get_host_priv_port(host_priv, &host_port);
2672         } else {
2673                 kr = host_get_host_port(host_priv_self(), &host_port);
2674         }
2675         assert(kr == KERN_SUCCESS);
2676         kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
2677         return(kr);
2678 }
2679
2680 kern_return_t
2681 task_send_trace_memory(
2682         task_t        target_task,
2683         __unused uint32_t pid,
2684         __unused uint64_t uniqueid)
2685 {
2686         kern_return_t kr = KERN_INVALID_ARGUMENT;
2687         if (target_task == TASK_NULL)
2688                 return (KERN_INVALID_ARGUMENT);
2689
2690 #if CONFIG_ATM
2691         kr = atm_send_proc_inspect_notification(target_task,
2692                                   pid,
2693                                   uniqueid);
2694
2695 #endif
2696         return (kr);
2697 }
2698 /*
2699  * This routine was added, pretty much exclusively, for registering the
2700  * RPC glue vector for in-kernel short circuited tasks.  Rather than
2701  * removing it completely, I have only disabled that feature (which was
2702  * the only feature at the time).  It just appears that we are going to
2703  * want to add some user data to tasks in the future (i.e. bsd info,
2704  * task names, etc...), so I left it in the formal task interface.
2705  */
2706 kern_return_t
2707 task_set_info(
2708         task_t          task,
2709         task_flavor_t   flavor,
2710         __unused task_info_t    task_info_in,           /* pointer to IN array */
2711         __unused mach_msg_type_number_t task_info_count)
2712 {
2713         if (task == TASK_NULL)
2714                 return(KERN_INVALID_ARGUMENT);
2715
2716         switch (flavor) {
2717
2718 #if CONFIG_ATM
2719                 case TASK_TRACE_MEMORY_INFO:
2720                 {
2721                         if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
2722                                 return (KERN_INVALID_ARGUMENT);
2723
2724                         assert(task_info_in != NULL);
2725                         task_trace_memory_info_t mem_info;
2726                         mem_info = (task_trace_memory_info_t) task_info_in;
2727                         kern_return_t kr = atm_register_trace_memory(task,
2728                                                 mem_info->user_memory_address,
2729                                                 mem_info->buffer_size);
2730                         return kr;
2731                         break;
2732                 }
2733
2734 #endif
2735             default:
2736                 return (KERN_INVALID_ARGUMENT);
2737         }
2738         return (KERN_SUCCESS);
2739 }
2740
2741 int radar_20146450 = 1;
2742 kern_return_t
2743 task_info(
2744         task_t                  task,
2745         task_flavor_t           flavor,
2746         task_info_t             task_info_out,
2747         mach_msg_type_number_t  *task_info_count)
2748 {
2749         kern_return_t error = KERN_SUCCESS;
2750
2751         if (task == TASK_NULL)
2752                 return (KERN_INVALID_ARGUMENT);
2753
2754         task_lock(task);
2755
2756         if ((task != current_task()) && (!task->active)) {
2757                 task_unlock(task);
2758                 return (KERN_INVALID_ARGUMENT);
2759         }
2760
2761         switch (flavor) {
2762
2763         case TASK_BASIC_INFO_32:
2764         case TASK_BASIC2_INFO_32:
2765         {
2766                 task_basic_info_32_t    basic_info;
2767                 vm_map_t                                map;
2768                 clock_sec_t                             secs;
2769                 clock_usec_t                    usecs;
2770
2771                 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
2772                     error = KERN_INVALID_ARGUMENT;
2773                     break;
2774                 }
2775
2776                 basic_info = (task_basic_info_32_t)task_info_out;
2777
2778                 map = (task == kernel_task)? kernel_map: task->map;
2779                 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
2780                 if (flavor == TASK_BASIC2_INFO_32) {
2781                         /*
2782                          * The "BASIC2" flavor gets the maximum resident
2783                          * size instead of the current resident size...
2784                          */
2785                         basic_info->resident_size = pmap_resident_max(map->pmap);
2786                 } else {
2787                         basic_info->resident_size = pmap_resident_count(map->pmap);
2788                 }
2789                 basic_info->resident_size *= PAGE_SIZE;
2790
2791                 basic_info->policy = ((task != kernel_task)?
2792                                                                                   POLICY_TIMESHARE: POLICY_RR);
2793                 basic_info->suspend_count = task->user_stop_count;
2794
2795                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2796                 basic_info->user_time.seconds =
2797                         (typeof(basic_info->user_time.seconds))secs;
2798                 basic_info->user_time.microseconds = usecs;
2799
2800                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2801                 basic_info->system_time.seconds =
2802                         (typeof(basic_info->system_time.seconds))secs;
2803                 basic_info->system_time.microseconds = usecs;
2804
2805                 *task_info_count = TASK_BASIC_INFO_32_COUNT;
2806                 break;
2807         }
2808
2809         case TASK_BASIC_INFO_64:
2810         {
2811                 task_basic_info_64_t    basic_info;
2812                 vm_map_t                                map;
2813                 clock_sec_t                             secs;
2814                 clock_usec_t                    usecs;
2815
2816                 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
2817                     error = KERN_INVALID_ARGUMENT;
2818                     break;
2819                 }
2820
2821                 basic_info = (task_basic_info_64_t)task_info_out;
2822
2823                 map = (task == kernel_task)? kernel_map: task->map;
2824                 basic_info->virtual_size  = map->size;
2825                 basic_info->resident_size =
2826                         (mach_vm_size_t)(pmap_resident_count(map->pmap))
2827                         * PAGE_SIZE_64;
2828
2829                 basic_info->policy = ((task != kernel_task)?
2830                                                                                   POLICY_TIMESHARE: POLICY_RR);
2831                 basic_info->suspend_count = task->user_stop_count;
2832
2833                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2834                 basic_info->user_time.seconds =
2835                         (typeof(basic_info->user_time.seconds))secs;
2836                 basic_info->user_time.microseconds = usecs;
2837
2838                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2839                 basic_info->system_time.seconds =
2840                         (typeof(basic_info->system_time.seconds))secs;
2841                 basic_info->system_time.microseconds = usecs;
2842
2843                 *task_info_count = TASK_BASIC_INFO_64_COUNT;
2844                 break;
2845         }
2846
2847         case MACH_TASK_BASIC_INFO:
2848         {
2849                 mach_task_basic_info_t  basic_info;
2850                 vm_map_t                map;
2851                 clock_sec_t             secs;
2852                 clock_usec_t            usecs;
2853
2854                 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
2855                     error = KERN_INVALID_ARGUMENT;
2856                     break;
2857                 }
2858
2859                 basic_info = (mach_task_basic_info_t)task_info_out;
2860
2861                 map = (task == kernel_task) ? kernel_map : task->map;
2862
2863                 basic_info->virtual_size  = map->size;
2864
2865                 basic_info->resident_size =
2866                     (mach_vm_size_t)(pmap_resident_count(map->pmap));
2867                 basic_info->resident_size *= PAGE_SIZE_64;
2868
2869                 basic_info->resident_size_max =
2870                     (mach_vm_size_t)(pmap_resident_max(map->pmap));
2871                 basic_info->resident_size_max *= PAGE_SIZE_64;
2872
2873                 basic_info->policy = ((task != kernel_task) ?
2874                                       POLICY_TIMESHARE : POLICY_RR);
2875
2876                 basic_info->suspend_count = task->user_stop_count;
2877
2878                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2879                 basic_info->user_time.seconds =
2880                     (typeof(basic_info->user_time.seconds))secs;
2881                 basic_info->user_time.microseconds = usecs;
2882
2883                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2884                 basic_info->system_time.seconds =
2885                     (typeof(basic_info->system_time.seconds))secs;
2886                 basic_info->system_time.microseconds = usecs;
2887
2888                 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
2889                 break;
2890         }
2891
2892         case TASK_THREAD_TIMES_INFO:
2893         {
2894                 register task_thread_times_info_t       times_info;
2895                 register thread_t                                       thread;
2896
2897                 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
2898                     error = KERN_INVALID_ARGUMENT;
2899                     break;
2900                 }
2901
2902                 times_info = (task_thread_times_info_t) task_info_out;
2903                 times_info->user_time.seconds = 0;
2904                 times_info->user_time.microseconds = 0;
2905                 times_info->system_time.seconds = 0;
2906                 times_info->system_time.microseconds = 0;
2907
2908
2909                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2910                         time_value_t    user_time, system_time;
2911
2912                         if (thread->options & TH_OPT_IDLE_THREAD)
2913                                 continue;
2914
2915                         thread_read_times(thread, &user_time, &system_time);
2916
2917                         time_value_add(&times_info->user_time, &user_time);
2918                         time_value_add(&times_info->system_time, &system_time);
2919                 }
2920
2921                 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
2922                 break;
2923         }
2924
2925         case TASK_ABSOLUTETIME_INFO:
2926         {
2927                 task_absolutetime_info_t        info;
2928                 register thread_t                       thread;
2929
2930                 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
2931                         error = KERN_INVALID_ARGUMENT;
2932                         break;
2933                 }
2934
2935                 info = (task_absolutetime_info_t)task_info_out;
2936                 info->threads_user = info->threads_system = 0;
2937
2938
2939                 info->total_user = task->total_user_time;
2940                 info->total_system = task->total_system_time;
2941
2942                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2943                         uint64_t        tval;
2944                         spl_t           x;
2945
2946                         if (thread->options & TH_OPT_IDLE_THREAD)
2947                                 continue;
2948
2949                         x = splsched();
2950                         thread_lock(thread);
2951
2952                         tval = timer_grab(&thread->user_timer);
2953                         info->threads_user += tval;
2954                         info->total_user += tval;
2955
2956                         tval = timer_grab(&thread->system_timer);
2957                         if (thread->precise_user_kernel_time) {
2958                                 info->threads_system += tval;
2959                                 info->total_system += tval;
2960                         } else {
2961                                 /* system_timer may represent either sys or user */
2962                                 info->threads_user += tval;
2963                                 info->total_user += tval;
2964                         }
2965
2966                         thread_unlock(thread);
2967                         splx(x);
2968                 }
2969
2970
2971                 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
2972                 break;
2973         }
2974
2975         case TASK_DYLD_INFO:
2976         {
2977                 task_dyld_info_t info;
2978
2979                 /*
2980                  * We added the format field to TASK_DYLD_INFO output.  For
2981                  * temporary backward compatibility, accept the fact that
2982                  * clients may ask for the old version - distinquished by the
2983                  * size of the expected result structure.
2984                  */
2985 #define TASK_LEGACY_DYLD_INFO_COUNT \
2986                 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
2987
2988                 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
2989                         error = KERN_INVALID_ARGUMENT;
2990                         break;
2991                 }
2992
2993                 info = (task_dyld_info_t)task_info_out;
2994                 info->all_image_info_addr = task->all_image_info_addr;
2995                 info->all_image_info_size = task->all_image_info_size;
2996
2997                 /* only set format on output for those expecting it */
2998                 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
2999                         info->all_image_info_format = task_has_64BitAddr(task) ?
3000                                                  TASK_DYLD_ALL_IMAGE_INFO_64 :
3001                                                  TASK_DYLD_ALL_IMAGE_INFO_32 ;
3002                         *task_info_count = TASK_DYLD_INFO_COUNT;
3003                 } else {
3004                         *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
3005                 }
3006                 break;
3007         }
3008
3009         case TASK_EXTMOD_INFO:
3010         {
3011                 task_extmod_info_t info;
3012                 void *p;
3013
3014                 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
3015                         error = KERN_INVALID_ARGUMENT;
3016                         break;
3017                 }
3018
3019                 info = (task_extmod_info_t)task_info_out;
3020
3021                 p = get_bsdtask_info(task);
3022                 if (p) {
3023                         proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
3024                 } else {
3025                         bzero(info->task_uuid, sizeof(info->task_uuid));
3026                 }
3027                 info->extmod_statistics = task->extmod_statistics;
3028                 *task_info_count = TASK_EXTMOD_INFO_COUNT;
3029
3030                 break;
3031         }
3032
3033         case TASK_KERNELMEMORY_INFO:
3034         {
3035                 task_kernelmemory_info_t        tkm_info;
3036                 ledger_amount_t                 credit, debit;
3037
3038                 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
3039                    error = KERN_INVALID_ARGUMENT;
3040                    break;
3041                 }
3042
3043                 tkm_info = (task_kernelmemory_info_t) task_info_out;
3044                 tkm_info->total_palloc = 0;
3045                 tkm_info->total_pfree = 0;
3046                 tkm_info->total_salloc = 0;
3047                 tkm_info->total_sfree = 0;
3048
3049                 if (task == kernel_task) {
3050                         /*
3051                          * All shared allocs/frees from other tasks count against
3052                          * the kernel private memory usage.  If we are looking up
3053                          * info for the kernel task, gather from everywhere.
3054                          */
3055                         task_unlock(task);
3056
3057                         /* start by accounting for all the terminated tasks against the kernel */
3058                         tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
3059                         tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
3060
3061                         /* count all other task/thread shared alloc/free against the kernel */
3062                         lck_mtx_lock(&tasks_threads_lock);
3063
3064                         /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
3065                         queue_iterate(&tasks, task, task_t, tasks) {
3066                                 if (task == kernel_task) {
3067                                         if (ledger_get_entries(task->ledger,
3068                                             task_ledgers.tkm_private, &credit,
3069                                             &debit) == KERN_SUCCESS) {
3070                                                 tkm_info->total_palloc += credit;
3071                                                 tkm_info->total_pfree += debit;
3072                                         }
3073                                 }
3074                                 if (!ledger_get_entries(task->ledger,
3075                                     task_ledgers.tkm_shared, &credit, &debit)) {
3076                                         tkm_info->total_palloc += credit;
3077                                         tkm_info->total_pfree += debit;
3078                                 }
3079                         }
3080                         lck_mtx_unlock(&tasks_threads_lock);
3081                 } else {
3082                         if (!ledger_get_entries(task->ledger,
3083                             task_ledgers.tkm_private, &credit, &debit)) {
3084                                 tkm_info->total_palloc = credit;
3085                                 tkm_info->total_pfree = debit;
3086                         }
3087                         if (!ledger_get_entries(task->ledger,
3088                             task_ledgers.tkm_shared, &credit, &debit)) {
3089                                 tkm_info->total_salloc = credit;
3090                                 tkm_info->total_sfree = debit;
3091                         }
3092                         task_unlock(task);
3093                 }
3094
3095                 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
3096                 return KERN_SUCCESS;
3097         }
3098
3099         /* OBSOLETE */
3100         case TASK_SCHED_FIFO_INFO:
3101         {
3102
3103                 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
3104                         error = KERN_INVALID_ARGUMENT;
3105                         break;
3106                 }
3107
3108                 error = KERN_INVALID_POLICY;
3109                 break;
3110         }
3111
3112         /* OBSOLETE */
3113         case TASK_SCHED_RR_INFO:
3114         {
3115                 register policy_rr_base_t       rr_base;
3116                 uint32_t quantum_time;
3117                 uint64_t quantum_ns;
3118
3119                 if (*task_info_count < POLICY_RR_BASE_COUNT) {
3120                         error = KERN_INVALID_ARGUMENT;
3121                         break;
3122                 }
3123
3124                 rr_base = (policy_rr_base_t) task_info_out;
3125
3126                 if (task != kernel_task) {
3127                         error = KERN_INVALID_POLICY;
3128                         break;
3129                 }
3130
3131                 rr_base->base_priority = task->priority;
3132
3133                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
3134                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
3135
3136                 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
3137
3138                 *task_info_count = POLICY_RR_BASE_COUNT;
3139                 break;
3140         }
3141
3142         /* OBSOLETE */
3143         case TASK_SCHED_TIMESHARE_INFO:
3144         {
3145                 register policy_timeshare_base_t        ts_base;
3146
3147                 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
3148                         error = KERN_INVALID_ARGUMENT;
3149                         break;
3150                 }
3151
3152                 ts_base = (policy_timeshare_base_t) task_info_out;
3153
3154                 if (task == kernel_task) {
3155                         error = KERN_INVALID_POLICY;
3156                         break;
3157                 }
3158
3159                 ts_base->base_priority = task->priority;
3160
3161                 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
3162                 break;
3163         }
3164
3165         case TASK_SECURITY_TOKEN:
3166         {
3167                 register security_token_t       *sec_token_p;
3168
3169                 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
3170                     error = KERN_INVALID_ARGUMENT;
3171                     break;
3172                 }
3173
3174                 sec_token_p = (security_token_t *) task_info_out;
3175
3176                 *sec_token_p = task->sec_token;
3177
3178                 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
3179                 break;
3180         }
3181
3182         case TASK_AUDIT_TOKEN:
3183         {
3184                 register audit_token_t  *audit_token_p;
3185
3186                 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
3187                     error = KERN_INVALID_ARGUMENT;
3188                     break;
3189                 }
3190
3191                 audit_token_p = (audit_token_t *) task_info_out;
3192
3193                 *audit_token_p = task->audit_token;
3194
3195                 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
3196                 break;
3197         }
3198
3199         case TASK_SCHED_INFO:
3200                 error = KERN_INVALID_ARGUMENT;
3201                 break;
3202
3203         case TASK_EVENTS_INFO:
3204         {
3205                 register task_events_info_t     events_info;
3206                 register thread_t                       thread;
3207
3208                 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
3209                    error = KERN_INVALID_ARGUMENT;
3210                    break;
3211                 }
3212
3213                 events_info = (task_events_info_t) task_info_out;
3214
3215
3216                 events_info->faults = task->faults;
3217                 events_info->pageins = task->pageins;
3218                 events_info->cow_faults = task->cow_faults;
3219                 events_info->messages_sent = task->messages_sent;
3220                 events_info->messages_received = task->messages_received;
3221                 events_info->syscalls_mach = task->syscalls_mach;
3222                 events_info->syscalls_unix = task->syscalls_unix;
3223
3224                 events_info->csw = task->c_switch;
3225
3226                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3227                         events_info->csw           += thread->c_switch;
3228                         events_info->syscalls_mach += thread->syscalls_mach;
3229                         events_info->syscalls_unix += thread->syscalls_unix;
3230                 }
3231
3232
3233                 *task_info_count = TASK_EVENTS_INFO_COUNT;
3234                 break;
3235         }
3236         case TASK_AFFINITY_TAG_INFO:
3237         {
3238                 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
3239                     error = KERN_INVALID_ARGUMENT;
3240                     break;
3241                 }
3242
3243                 error = task_affinity_info(task, task_info_out, task_info_count);
3244                 break;
3245         }
3246         case TASK_POWER_INFO:
3247         {
3248                 if (*task_info_count < TASK_POWER_INFO_COUNT) {
3249                         error = KERN_INVALID_ARGUMENT;
3250                         break;
3251                 }
3252
3253                 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL);
3254                 break;
3255         }
3256
3257         case TASK_POWER_INFO_V2:
3258         {
3259                 if (*task_info_count < TASK_POWER_INFO_V2_COUNT) {
3260                         error = KERN_INVALID_ARGUMENT;
3261                         break;
3262                 }
3263                 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
3264                 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy);
3265                 break;
3266         }
3267
3268         case TASK_VM_INFO:
3269         case TASK_VM_INFO_PURGEABLE:
3270         {
3271                 task_vm_info_t          vm_info;
3272                 vm_map_t                map;
3273
3274                 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
3275                     error = KERN_INVALID_ARGUMENT;
3276                     break;
3277                 }
3278
3279                 vm_info = (task_vm_info_t)task_info_out;
3280
3281                 if (task == kernel_task) {
3282                         map = kernel_map;
3283                         /* no lock */
3284                 } else {
3285                         map = task->map;
3286                         vm_map_lock_read(map);
3287                 }
3288
3289                 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
3290                 vm_info->region_count = map->hdr.nentries;
3291                 vm_info->page_size = vm_map_page_size(map);
3292
3293                 vm_info->resident_size = pmap_resident_count(map->pmap);
3294                 vm_info->resident_size *= PAGE_SIZE;
3295                 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
3296                 vm_info->resident_size_peak *= PAGE_SIZE;
3297
3298 #define _VM_INFO(_name) \
3299         vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
3300
3301                 _VM_INFO(device);
3302                 _VM_INFO(device_peak);
3303                 _VM_INFO(external);
3304                 _VM_INFO(external_peak);
3305                 _VM_INFO(internal);
3306                 _VM_INFO(internal_peak);
3307                 _VM_INFO(reusable);
3308                 _VM_INFO(reusable_peak);
3309                 _VM_INFO(compressed);
3310                 _VM_INFO(compressed_peak);
3311                 _VM_INFO(compressed_lifetime);
3312
3313                 vm_info->purgeable_volatile_pmap = 0;
3314                 vm_info->purgeable_volatile_resident = 0;
3315                 vm_info->purgeable_volatile_virtual = 0;
3316                 if (task == kernel_task) {
3317                         /*
3318                          * We do not maintain the detailed stats for the
3319                          * kernel_pmap, so just count everything as
3320                          * "internal"...
3321                          */
3322                         vm_info->internal = vm_info->resident_size;
3323                         /*
3324                          * ... but since the memory held by the VM compressor
3325                          * in the kernel address space ought to be attributed
3326                          * to user-space tasks, we subtract it from "internal"
3327                          * to give memory reporting tools a more accurate idea
3328                          * of what the kernel itself is actually using, instead
3329                          * of making it look like the kernel is leaking memory
3330                          * when the system is under memory pressure.
3331                          */
3332                         vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
3333                                               PAGE_SIZE);
3334                 } else {
3335                         mach_vm_size_t  volatile_virtual_size;
3336                         mach_vm_size_t  volatile_resident_size;
3337                         mach_vm_size_t  volatile_compressed_size;
3338                         mach_vm_size_t  volatile_pmap_size;
3339                         mach_vm_size_t  volatile_compressed_pmap_size;
3340                         kern_return_t   kr;
3341
3342                         if (flavor == TASK_VM_INFO_PURGEABLE) {
3343                                 kr = vm_map_query_volatile(
3344                                         map,
3345                                         &volatile_virtual_size,
3346                                         &volatile_resident_size,
3347                                         &volatile_compressed_size,
3348                                         &volatile_pmap_size,
3349                                         &volatile_compressed_pmap_size);
3350                                 if (kr == KERN_SUCCESS) {
3351                                         vm_info->purgeable_volatile_pmap =
3352                                                 volatile_pmap_size;
3353                                         if (radar_20146450) {
3354                                         vm_info->compressed -=
3355                                                 volatile_compressed_pmap_size;
3356                                         }
3357                                         vm_info->purgeable_volatile_resident =
3358                                                 volatile_resident_size;
3359                                         vm_info->purgeable_volatile_virtual =
3360                                                 volatile_virtual_size;
3361                                 }
3362                         }
3363                         vm_map_unlock_read(map);
3364                 }
3365
3366                 if (*task_info_count >= TASK_VM_INFO_COUNT) {
3367                         vm_info->phys_footprint = 0;
3368                         *task_info_count = TASK_VM_INFO_COUNT;
3369                 } else {
3370                         *task_info_count = TASK_VM_INFO_REV0_COUNT;
3371                 }
3372
3373                 break;
3374         }
3375
3376         case TASK_WAIT_STATE_INFO:
3377         {
3378                 /*
3379                  * Deprecated flavor. Currently allowing some results until all users
3380                  * stop calling it. The results may not be accurate.
3381          */
3382                 task_wait_state_info_t  wait_state_info;
3383                 uint64_t total_sfi_ledger_val = 0;
3384
3385                 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
3386                    error = KERN_INVALID_ARGUMENT;
3387                    break;
3388                 }
3389
3390                 wait_state_info = (task_wait_state_info_t) task_info_out;
3391
3392                 wait_state_info->total_wait_state_time = 0;
3393                 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
3394
3395 #if CONFIG_SCHED_SFI
3396                 int i, prev_lentry = -1;
3397                 int64_t  val_credit, val_debit;
3398
3399                 for (i = 0; i < MAX_SFI_CLASS_ID; i++){
3400                         val_credit =0;
3401                         /*
3402                          * checking with prev_lentry != entry ensures adjacent classes
3403                          * which share the same ledger do not add wait times twice.
3404                          * Note: Use ledger() call to get data for each individual sfi class.
3405                          */
3406                         if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
3407                                 KERN_SUCCESS == ledger_get_entries(task->ledger,
3408                                                 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
3409                                 total_sfi_ledger_val += val_credit;
3410                         }
3411                         prev_lentry = task_ledgers.sfi_wait_times[i];
3412                 }
3413
3414 #endif /* CONFIG_SCHED_SFI */
3415                 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
3416                 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
3417
3418                 break;
3419         }
3420         case TASK_VM_INFO_PURGEABLE_ACCOUNT:
3421         {
3422 #if DEVELOPMENT || DEBUG
3423                 pvm_account_info_t      acnt_info;
3424
3425                 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
3426                         error = KERN_INVALID_ARGUMENT;
3427                         break;
3428                 }
3429
3430                 if (task_info_out == NULL) {
3431                         error = KERN_INVALID_ARGUMENT;
3432                         break;
3433                 }
3434
3435                 acnt_info = (pvm_account_info_t) task_info_out;
3436
3437                 error = vm_purgeable_account(task, acnt_info);
3438
3439                 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
3440
3441                 break;
3442 #else /* DEVELOPMENT || DEBUG */
3443                 error = KERN_NOT_SUPPORTED;
3444                 break;
3445 #endif /* DEVELOPMENT || DEBUG */
3446         }
3447         case TASK_FLAGS_INFO:
3448         {
3449                 task_flags_info_t               flags_info;
3450
3451                 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
3452                     error = KERN_INVALID_ARGUMENT;
3453                     break;
3454                 }
3455
3456                 flags_info = (task_flags_info_t)task_info_out;
3457
3458                 /* only publish the 64-bit flag of the task */
3459                 flags_info->flags = task->t_flags & TF_64B_ADDR;
3460
3461                 *task_info_count = TASK_FLAGS_INFO_COUNT;
3462                 break;
3463         }
3464
3465         case TASK_DEBUG_INFO_INTERNAL:
3466         {
3467 #if DEVELOPMENT || DEBUG
3468                 task_debug_info_internal_t dbg_info;
3469                 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
3470                         error = KERN_NOT_SUPPORTED;
3471                         break;
3472                 }
3473
3474                 if (task_info_out == NULL) {
3475                         error = KERN_INVALID_ARGUMENT;
3476                         break;
3477                 }
3478                 dbg_info = (task_debug_info_internal_t) task_info_out;
3479                 dbg_info->ipc_space_size = 0;
3480                 if (task->itk_space){
3481                         dbg_info->ipc_space_size = task->itk_space->is_table_size;
3482                 }
3483
3484                 error = KERN_SUCCESS;
3485                 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
3486                 break;
3487 #else /* DEVELOPMENT || DEBUG */
3488                 error = KERN_NOT_SUPPORTED;
3489                 break;
3490 #endif /* DEVELOPMENT || DEBUG */
3491         }
3492         default:
3493                 error = KERN_INVALID_ARGUMENT;
3494         }
3495
3496         task_unlock(task);
3497         return (error);
3498 }
3499
3500 /*
3501  *      task_power_info
3502  *
3503  *      Returns power stats for the task.
3504  *      Note: Called with task locked.
3505  */
3506 void
3507 task_power_info_locked(
3508         task_t                  task,
3509         task_power_info_t       info,
3510         gpu_energy_data_t       ginfo)
3511 {
3512         thread_t                thread;
3513         ledger_amount_t         tmp;
3514
3515         task_lock_assert_owned(task);
3516
3517         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
3518                 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
3519         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
3520                 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
3521
3522         info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
3523         info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
3524
3525         info->total_user = task->total_user_time;
3526         info->total_system = task->total_system_time;
3527
3528         if (ginfo) {
3529                 ginfo->task_gpu_utilisation = task->task_gpu_ns;
3530         }
3531
3532         queue_iterate(&task->threads, thread, thread_t, task_threads) {
3533                 uint64_t        tval;
3534                 spl_t           x;
3535
3536                 if (thread->options & TH_OPT_IDLE_THREAD)
3537                         continue;
3538
3539                 x = splsched();
3540                 thread_lock(thread);
3541
3542                 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
3543                 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
3544
3545                 tval = timer_grab(&thread->user_timer);
3546                 info->total_user += tval;
3547
3548                 tval = timer_grab(&thread->system_timer);
3549                 if (thread->precise_user_kernel_time) {
3550                         info->total_system += tval;
3551                 } else {
3552                         /* system_timer may represent either sys or user */
3553                         info->total_user += tval;
3554                 }
3555
3556                 if (ginfo) {
3557                         ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
3558                 }
3559                 thread_unlock(thread);
3560                 splx(x);
3561         }
3562 }
3563
3564 /*
3565  *      task_gpu_utilisation
3566  *
3567  *      Returns the total gpu time used by the all the threads of the task
3568  *  (both dead and alive)
3569  */
3570 uint64_t
3571 task_gpu_utilisation(
3572         task_t  task)
3573 {
3574         uint64_t gpu_time = 0;
3575         thread_t thread;
3576
3577         task_lock(task);
3578         gpu_time += task->task_gpu_ns;
3579
3580         queue_iterate(&task->threads, thread, thread_t, task_threads) {
3581                 spl_t x;
3582                 x = splsched();
3583                 thread_lock(thread);
3584                 gpu_time += ml_gpu_stat(thread);
3585                 thread_unlock(thread);
3586                 splx(x);
3587         }
3588
3589         task_unlock(task);
3590         return gpu_time;
3591 }
3592
3593 kern_return_t
3594 task_purgable_info(
3595         task_t                  task,
3596         task_purgable_info_t    *stats)
3597 {
3598         if (task == TASK_NULL || stats == NULL)
3599                 return KERN_INVALID_ARGUMENT;
3600         /* Take task reference */
3601         task_reference(task);
3602         vm_purgeable_stats((vm_purgeable_info_t)stats, task);
3603         /* Drop task reference */
3604         task_deallocate(task);
3605         return KERN_SUCCESS;
3606 }
3607
3608 void
3609 task_vtimer_set(
3610         task_t          task,
3611         integer_t       which)
3612 {
3613         thread_t        thread;
3614         spl_t           x;
3615
3616         /* assert(task == current_task()); */ /* bogus assert 4803227 4807483 */
3617
3618         task_lock(task);
3619
3620         task->vtimers |= which;
3621
3622         switch (which) {
3623
3624         case TASK_VTIMER_USER:
3625                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3626                         x = splsched();
3627                         thread_lock(thread);
3628                         if (thread->precise_user_kernel_time)
3629                                 thread->vtimer_user_save = timer_grab(&thread->user_timer);
3630                         else
3631                                 thread->vtimer_user_save = timer_grab(&thread->system_timer);
3632                         thread_unlock(thread);
3633                         splx(x);
3634                 }
3635                 break;
3636
3637         case TASK_VTIMER_PROF:
3638                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3639                         x = splsched();
3640                         thread_lock(thread);
3641                         thread->vtimer_prof_save = timer_grab(&thread->user_timer);
3642                         thread->vtimer_prof_save += timer_grab(&thread->system_timer);
3643                         thread_unlock(thread);
3644                         splx(x);
3645                 }
3646                 break;
3647
3648         case TASK_VTIMER_RLIM:
3649                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3650                         x = splsched();
3651                         thread_lock(thread);
3652                         thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
3653                         thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
3654                         thread_unlock(thread);
3655                         splx(x);
3656                 }
3657                 break;
3658         }
3659
3660         task_unlock(task);
3661 }
3662
3663 void
3664 task_vtimer_clear(
3665         task_t          task,
3666         integer_t       which)
3667 {
3668         assert(task == current_task());
3669
3670         task_lock(task);
3671
3672         task->vtimers &= ~which;
3673
3674         task_unlock(task);
3675 }
3676
3677 void
3678 task_vtimer_update(
3679 __unused
3680         task_t          task,
3681         integer_t       which,
3682         uint32_t        *microsecs)
3683 {
3684         thread_t        thread = current_thread();
3685         uint32_t        tdelt;
3686         clock_sec_t     secs;
3687         uint64_t        tsum;
3688
3689         assert(task == current_task());
3690
3691         assert(task->vtimers & which);
3692
3693         secs = tdelt = 0;
3694
3695         switch (which) {
3696
3697         case TASK_VTIMER_USER:
3698                 if (thread->precise_user_kernel_time) {
3699                         tdelt = (uint32_t)timer_delta(&thread->user_timer,
3700                                                                 &thread->vtimer_user_save);
3701                 } else {
3702                         tdelt = (uint32_t)timer_delta(&thread->system_timer,
3703                                                                 &thread->vtimer_user_save);
3704                 }
3705                 absolutetime_to_microtime(tdelt, &secs, microsecs);
3706                 break;
3707
3708         case TASK_VTIMER_PROF:
3709                 tsum = timer_grab(&thread->user_timer);
3710                 tsum += timer_grab(&thread->system_timer);
3711                 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
3712                 absolutetime_to_microtime(tdelt, &secs, microsecs);
3713                 /* if the time delta is smaller than a usec, ignore */
3714                 if (*microsecs != 0)
3715                         thread->vtimer_prof_save = tsum;
3716                 break;
3717
3718         case TASK_VTIMER_RLIM:
3719                 tsum = timer_grab(&thread->user_timer);
3720                 tsum += timer_grab(&thread->system_timer);
3721                 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
3722                 thread->vtimer_rlim_save = tsum;
3723                 absolutetime_to_microtime(tdelt, &secs, microsecs);
3724                 break;
3725         }
3726
3727 }
3728
3729 /*
3730  *      task_assign:
3731  *
3732  *      Change the assigned processor set for the task
3733  */
3734 kern_return_t
3735 task_assign(
3736         __unused task_t         task,
3737         __unused processor_set_t        new_pset,
3738         __unused boolean_t      assign_threads)
3739 {
3740         return(KERN_FAILURE);
3741 }
3742
3743 /*
3744  *      task_assign_default:
3745  *
3746  *      Version of task_assign to assign to default processor set.
3747  */
3748 kern_return_t
3749 task_assign_default(
3750         task_t          task,
3751         boolean_t       assign_threads)
3752 {
3753     return (task_assign(task, &pset0, assign_threads));
3754 }
3755
3756 /*
3757  *      task_get_assignment
3758  *
3759  *      Return name of processor set that task is assigned to.
3760  */
3761 kern_return_t
3762 task_get_assignment(
3763         task_t          task,
3764         processor_set_t *pset)
3765 {
3766         if (!task->active)
3767                 return(KERN_FAILURE);
3768
3769         *pset = &pset0;
3770
3771         return (KERN_SUCCESS);
3772 }
3773
3774 uint64_t
3775 get_task_dispatchqueue_offset(
3776                 task_t          task)
3777 {
3778         return task->dispatchqueue_offset;
3779 }
3780
3781 /*
3782  *      task_policy
3783  *
3784  *      Set scheduling policy and parameters, both base and limit, for
3785  *      the given task. Policy must be a policy which is enabled for the
3786  *      processor set. Change contained threads if requested.
3787  */
3788 kern_return_t
3789 task_policy(
3790         __unused task_t                 task,
3791         __unused policy_t                       policy_id,
3792         __unused policy_base_t          base,
3793         __unused mach_msg_type_number_t count,
3794         __unused boolean_t                      set_limit,
3795         __unused boolean_t                      change)
3796 {
3797         return(KERN_FAILURE);
3798 }
3799
3800 /*
3801  *      task_set_policy
3802  *
3803  *      Set scheduling policy and parameters, both base and limit, for
3804  *      the given task. Policy can be any policy implemented by the
3805  *      processor set, whether enabled or not. Change contained threads
3806  *      if requested.
3807  */
3808 kern_return_t
3809 task_set_policy(
3810         __unused task_t                 task,
3811         __unused processor_set_t                pset,
3812         __unused policy_t                       policy_id,
3813         __unused policy_base_t          base,
3814         __unused mach_msg_type_number_t base_count,
3815         __unused policy_limit_t         limit,
3816         __unused mach_msg_type_number_t limit_count,
3817         __unused boolean_t                      change)
3818 {
3819         return(KERN_FAILURE);
3820 }
3821
3822 kern_return_t
3823 task_set_ras_pc(
3824         __unused task_t task,
3825         __unused vm_offset_t    pc,
3826         __unused vm_offset_t    endpc)
3827 {
3828         return KERN_FAILURE;
3829 }
3830
3831 void
3832 task_synchronizer_destroy_all(task_t task)
3833 {
3834         semaphore_t     semaphore;
3835
3836         /*
3837          *  Destroy owned semaphores
3838          */
3839
3840         while (!queue_empty(&task->semaphore_list)) {
3841                 semaphore = (semaphore_t) queue_first(&task->semaphore_list);
3842                 (void) semaphore_destroy_internal(task, semaphore);
3843         }
3844 }
3845
3846 /*
3847  * Install default (machine-dependent) initial thread state
3848  * on the task.  Subsequent thread creation will have this initial
3849  * state set on the thread by machine_thread_inherit_taskwide().
3850  * Flavors and structures are exactly the same as those to thread_set_state()
3851  */
3852 kern_return_t
3853 task_set_state(
3854         task_t task,
3855         int flavor,
3856         thread_state_t state,
3857         mach_msg_type_number_t state_count)
3858 {
3859         kern_return_t ret;
3860
3861         if (task == TASK_NULL) {
3862                 return (KERN_INVALID_ARGUMENT);
3863         }
3864
3865         task_lock(task);
3866
3867         if (!task->active) {
3868                 task_unlock(task);
3869                 return (KERN_FAILURE);
3870         }
3871
3872         ret = machine_task_set_state(task, flavor, state, state_count);
3873
3874         task_unlock(task);
3875         return ret;
3876 }
3877
3878 /*
3879  * Examine the default (machine-dependent) initial thread state
3880  * on the task, as set by task_set_state().  Flavors and structures
3881  * are exactly the same as those passed to thread_get_state().
3882  */
3883 kern_return_t
3884 task_get_state(
3885         task_t  task,
3886         int     flavor,
3887         thread_state_t state,
3888         mach_msg_type_number_t *state_count)
3889 {
3890         kern_return_t ret;
3891
3892         if (task == TASK_NULL) {
3893                 return (KERN_INVALID_ARGUMENT);
3894         }
3895
3896         task_lock(task);
3897
3898         if (!task->active) {
3899                 task_unlock(task);
3900                 return (KERN_FAILURE);
3901         }
3902
3903         ret = machine_task_get_state(task, flavor, state, state_count);
3904
3905         task_unlock(task);
3906         return ret;
3907 }
3908
3909 #if CONFIG_JETSAM
3910 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
3911
3912 void __attribute__((noinline))
3913 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb)
3914 {
3915         task_t                                          task            = current_task();
3916         int                                                     pid         = 0;
3917         const char                                      *procname       = "unknown";
3918         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
3919
3920 #ifdef MACH_BSD
3921         pid = proc_selfpid();
3922
3923         if (pid == 1) {
3924                 /*
3925                  * Cannot have ReportCrash analyzing
3926                  * a suspended initproc.
3927                  */
3928                 return;
3929         }
3930
3931         if (task->bsd_info != NULL)
3932                 procname = proc_name_address(current_task()->bsd_info);
3933 #endif
3934
3935         if (hwm_user_cores) {
3936                 int                             error;
3937                 uint64_t                starttime, end;
3938                 clock_sec_t             secs = 0;
3939                 uint32_t                microsecs = 0;
3940
3941                 starttime = mach_absolute_time();
3942                 /*
3943                  * Trigger a coredump of this process. Don't proceed unless we know we won't
3944                  * be filling up the disk; and ignore the core size resource limit for this
3945                  * core file.
3946                  */
3947                 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
3948                         printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
3949                 }
3950                 /*
3951                 * coredump() leaves the task suspended.
3952                 */
3953                 task_resume_internal(current_task());
3954
3955                 end = mach_absolute_time();
3956                 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
3957                 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
3958                        proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
3959         }
3960
3961         if (disable_exc_resource) {
3962                 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
3963                         "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
3964                 return;
3965         }
3966
3967         /*
3968          * A task that has triggered an EXC_RESOURCE, should not be
3969          * jetsammed when the device is under memory pressure.  Here
3970          * we set the P_MEMSTAT_TERMINATED flag so that the process
3971          * will be skipped if the memorystatus_thread wakes up.
3972          */
3973         proc_memstat_terminated(current_task()->bsd_info, TRUE);
3974
3975         printf("process %s[%d] crossed memory high watermark (%d MB); sending "
3976                 "EXC_RESOURCE.\n", procname, pid, max_footprint_mb);
3977
3978         code[0] = code[1] = 0;
3979         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
3980         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
3981         EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
3982
3983         /*
3984          * Use the _internal_ variant so that no user-space
3985          * process can resume our task from under us.
3986          */
3987         task_suspend_internal(task);
3988         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
3989         task_resume_internal(task);
3990
3991         /*
3992          * After the EXC_RESOURCE has been handled, we must clear the
3993          * P_MEMSTAT_TERMINATED flag so that the process can again be
3994          * considered for jetsam if the memorystatus_thread wakes up.
3995          */
3996         proc_memstat_terminated(current_task()->bsd_info, FALSE);  /* clear the flag */
3997 }
3998
3999 /*
4000  * Callback invoked when a task exceeds its physical footprint limit.
4001  */
4002 void
4003 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4004 {
4005         ledger_amount_t max_footprint, max_footprint_mb;
4006         ledger_amount_t footprint_after_purge;
4007         task_t task;
4008
4009         if (warning == LEDGER_WARNING_DIPPED_BELOW) {
4010                 /*
4011                  * Task memory limits only provide a warning on the way up.
4012                  */
4013                 return;
4014         }
4015
4016         task = current_task();
4017
4018         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
4019         max_footprint_mb = max_footprint >> 20;
4020
4021         /*
4022          * Try and purge all "volatile" memory in that task first.
4023          */
4024         (void) task_purge_volatile_memory(task);
4025         /* are we still over the limit ? */
4026         ledger_get_balance(task->ledger,
4027                            task_ledgers.phys_footprint,
4028                            &footprint_after_purge);
4029         if ((!warning &&
4030              footprint_after_purge <= max_footprint) ||
4031             (warning &&
4032              footprint_after_purge <= ((max_footprint *
4033                                         PHYS_FOOTPRINT_WARNING_LEVEL) / 100))) {
4034                 /* all better now */
4035                 ledger_reset_callback_state(task->ledger,
4036                                             task_ledgers.phys_footprint);
4037                 return;
4038         }
4039         /* still over the limit after purging... */
4040
4041         /*
4042          * If this an actual violation (not a warning),
4043          * generate a non-fatal high watermark EXC_RESOURCE.
4044          */
4045         if ((warning == 0) && (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) {
4046                 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb);
4047         }
4048
4049         memorystatus_on_ledger_footprint_exceeded((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE,
4050                 (int)max_footprint_mb);
4051 }
4052
4053 extern int proc_check_footprint_priv(void);
4054
4055 kern_return_t
4056 task_set_phys_footprint_limit(
4057         task_t task,
4058         int new_limit_mb,
4059         int *old_limit_mb)
4060 {
4061         kern_return_t error;
4062
4063         if ((error = proc_check_footprint_priv())) {
4064                 return (KERN_NO_ACCESS);
4065         }
4066
4067         return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, FALSE);
4068 }
4069
4070 kern_return_t
4071 task_convert_phys_footprint_limit(
4072         int limit_mb,
4073         int *converted_limit_mb)
4074 {
4075         if (limit_mb == -1) {
4076                 /*
4077                  * No limit
4078                  */
4079                 if (max_task_footprint != 0) {
4080                         *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024);   /* bytes to MB */
4081                 } else {
4082                         *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
4083                 }
4084         } else {
4085                 /* nothing to convert */
4086                 *converted_limit_mb = limit_mb;
4087         }
4088         return (KERN_SUCCESS);
4089 }
4090
4091
4092 kern_return_t
4093 task_set_phys_footprint_limit_internal(
4094         task_t task,
4095         int new_limit_mb,
4096         int *old_limit_mb,
4097         boolean_t trigger_exception)
4098 {
4099         ledger_amount_t old;
4100
4101         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
4102
4103         if (old_limit_mb) {
4104                 /*
4105                  * Check that limit >> 20 will not give an "unexpected" 32-bit
4106                  * result. There are, however, implicit assumptions that -1 mb limit
4107                  * equates to LEDGER_LIMIT_INFINITY.
4108                  */
4109                 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
4110                 *old_limit_mb = (int)(old >> 20);
4111         }
4112
4113         if (new_limit_mb == -1) {
4114                 /*
4115                  * Caller wishes to remove the limit.
4116                  */
4117                 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4118                                  max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
4119                                  max_task_footprint ? PHYS_FOOTPRINT_WARNING_LEVEL : 0);
4120                 return (KERN_SUCCESS);
4121         }
4122
4123 #ifdef CONFIG_NOMONITORS
4124         return (KERN_SUCCESS);
4125 #endif /* CONFIG_NOMONITORS */
4126
4127         task_lock(task);
4128
4129         if (trigger_exception) {
4130                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4131         } else {
4132                 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4133         }
4134
4135         ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4136                 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
4137
4138         if (task == current_task()) {
4139                 ledger_check_new_balance(task->ledger, task_ledgers.phys_footprint);
4140         }
4141
4142         task_unlock(task);
4143
4144         return (KERN_SUCCESS);
4145 }
4146
4147 kern_return_t
4148 task_get_phys_footprint_limit(
4149         task_t task,
4150         int *limit_mb)
4151 {
4152         ledger_amount_t limit;
4153
4154         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
4155         /*
4156          * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
4157          * result. There are, however, implicit assumptions that -1 mb limit
4158          * equates to LEDGER_LIMIT_INFINITY.
4159          */
4160         assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
4161         *limit_mb = (int)(limit >> 20);
4162
4163         return (KERN_SUCCESS);
4164 }
4165 #else /* CONFIG_JETSAM */
4166 kern_return_t
4167 task_set_phys_footprint_limit(
4168         __unused task_t task,
4169         __unused int new_limit_mb,
4170         __unused int *old_limit_mb)
4171 {
4172         return (KERN_FAILURE);
4173 }
4174
4175 kern_return_t
4176 task_get_phys_footprint_limit(
4177         __unused task_t task,
4178         __unused int *limit_mb)
4179 {
4180         return (KERN_FAILURE);
4181 }
4182 #endif /* CONFIG_JETSAM */
4183
4184 /*
4185  * We need to export some functions to other components that
4186  * are currently implemented in macros within the osfmk
4187  * component.  Just export them as functions of the same name.
4188  */
4189 boolean_t is_kerneltask(task_t t)
4190 {
4191         if (t == kernel_task)
4192                 return (TRUE);
4193
4194         return (FALSE);
4195 }
4196
4197 int
4198 check_for_tasksuspend(task_t task)
4199 {
4200
4201         if (task == TASK_NULL)
4202                 return (0);
4203
4204         return (task->suspend_count > 0);
4205 }
4206
4207 #undef current_task
4208 task_t current_task(void);
4209 task_t current_task(void)
4210 {
4211         return (current_task_fast());
4212 }
4213
4214 #undef task_reference
4215 void task_reference(task_t task);
4216 void
4217 task_reference(
4218         task_t          task)
4219 {
4220         if (task != TASK_NULL)
4221                 task_reference_internal(task);
4222 }
4223
4224 /* defined in bsd/kern/kern_prot.c */
4225 extern int get_audit_token_pid(audit_token_t *audit_token);
4226
4227 int task_pid(task_t task)
4228 {
4229         if (task)
4230                 return get_audit_token_pid(&task->audit_token);
4231         return -1;
4232 }
4233
4234
4235 /*
4236  * This routine is called always with task lock held.
4237  * And it returns a thread handle without reference as the caller
4238  * operates on it under the task lock held.
4239  */
4240 thread_t
4241 task_findtid(task_t task, uint64_t tid)
4242 {
4243         thread_t thread= THREAD_NULL;
4244
4245         queue_iterate(&task->threads, thread, thread_t, task_threads) {
4246                         if (thread->thread_id == tid)
4247                                 return(thread);
4248         }
4249         return(THREAD_NULL);
4250 }
4251
4252 /*
4253  * Control the CPU usage monitor for a task.
4254  */
4255 kern_return_t
4256 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
4257 {
4258         int error = KERN_SUCCESS;
4259
4260         if (*flags & CPUMON_MAKE_FATAL) {
4261                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
4262         } else {
4263                 error = KERN_INVALID_ARGUMENT;
4264         }
4265
4266         return error;
4267 }
4268
4269 /*
4270  * Control the wakeups monitor for a task.
4271  */
4272 kern_return_t
4273 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
4274 {
4275         ledger_t ledger = task->ledger;
4276
4277         task_lock(task);
4278         if (*flags & WAKEMON_GET_PARAMS) {
4279                 ledger_amount_t limit;
4280                 uint64_t                period;
4281
4282                 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
4283                 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
4284
4285                 if (limit != LEDGER_LIMIT_INFINITY) {
4286                         /*
4287                          * An active limit means the wakeups monitor is enabled.
4288                          */
4289                         *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
4290                         *flags = WAKEMON_ENABLE;
4291                         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
4292                                 *flags |= WAKEMON_MAKE_FATAL;
4293                         }
4294                 } else {
4295                         *flags = WAKEMON_DISABLE;
4296                         *rate_hz = -1;
4297                 }
4298
4299                 /*
4300                  * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
4301                  */
4302                 task_unlock(task);
4303                 return KERN_SUCCESS;
4304         }
4305
4306         if (*flags & WAKEMON_ENABLE) {
4307                 if (*flags & WAKEMON_SET_DEFAULTS) {
4308                         *rate_hz = task_wakeups_monitor_rate;
4309                 }
4310
4311 #ifndef CONFIG_NOMONITORS
4312                 if (*flags & WAKEMON_MAKE_FATAL) {
4313                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
4314                 }
4315 #endif /* CONFIG_NOMONITORS */
4316
4317                 if (*rate_hz < 0) {
4318                         task_unlock(task);
4319                         return KERN_INVALID_ARGUMENT;
4320                 }
4321
4322 #ifndef CONFIG_NOMONITORS
4323                 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
4324                         task_wakeups_monitor_ustackshots_trigger_pct);
4325                 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
4326                 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
4327 #endif /* CONFIG_NOMONITORS */
4328         } else if (*flags & WAKEMON_DISABLE) {
4329                 /*
4330                  * Caller wishes to disable wakeups monitor on the task.
4331                  *
4332                  * Disable telemetry if it was triggered by the wakeups monitor, and
4333                  * remove the limit & callback on the wakeups ledger entry.
4334                  */
4335 #if CONFIG_TELEMETRY
4336                 telemetry_task_ctl_locked(current_task(), TF_WAKEMON_WARNING, 0);
4337 #endif
4338                 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
4339                 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
4340         }
4341
4342         task_unlock(task);
4343         return KERN_SUCCESS;
4344 }
4345
4346 void
4347 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4348 {
4349         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
4350 #if CONFIG_TELEMETRY
4351                 /*
4352                  * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
4353                  * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
4354                  */
4355                 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
4356 #endif
4357                 return;
4358         }
4359
4360 #if CONFIG_TELEMETRY
4361         /*
4362          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
4363          * exceeded the limit, turn telemetry off for the task.
4364          */
4365         telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
4366 #endif
4367
4368         if (warning == 0) {
4369                 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE();
4370         }
4371 }
4372
4373 void __attribute__((noinline))
4374 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void)
4375 {
4376         task_t                                          task            = current_task();
4377         int                                                     pid         = 0;
4378         const char                                      *procname       = "unknown";
4379         uint64_t                                        observed_wakeups_rate;
4380         uint64_t                                        permitted_wakeups_rate;
4381         uint64_t                                        observation_interval;
4382         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
4383         struct ledger_entry_info        lei;
4384
4385 #ifdef MACH_BSD
4386         pid = proc_selfpid();
4387         if (task->bsd_info != NULL)
4388                 procname = proc_name_address(current_task()->bsd_info);
4389 #endif
4390
4391         ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
4392
4393         /*
4394          * Disable the exception notification so we don't overwhelm
4395          * the listener with an endless stream of redundant exceptions.
4396          */
4397         uint32_t flags = WAKEMON_DISABLE;
4398         task_wakeups_monitor_ctl(task, &flags, NULL);
4399
4400         observed_wakeups_rate = (lei.lei_balance * (int64_t)NSEC_PER_SEC) / lei.lei_last_refill;
4401         permitted_wakeups_rate = lei.lei_limit / task_wakeups_monitor_interval;
4402         observation_interval = lei.lei_refill_period / NSEC_PER_SEC;
4403
4404         if (disable_exc_resource) {
4405                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
4406                         "supressed by a boot-arg\n", procname, pid);
4407                 return;
4408         }
4409         if (audio_active) {
4410                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
4411                        "supressed due to audio playback\n", procname, pid);
4412                 return;
4413         }
4414         printf("process %s[%d] caught causing excessive wakeups. Observed wakeups rate "
4415                 "(per sec): %lld; Maximum permitted wakeups rate (per sec): %lld; Observation "
4416                 "period: %lld seconds; Task lifetime number of wakeups: %lld\n",
4417                 procname, pid, observed_wakeups_rate, permitted_wakeups_rate,
4418                 observation_interval, lei.lei_credit);
4419
4420         code[0] = code[1] = 0;
4421         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
4422         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
4423         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0], task_wakeups_monitor_rate);
4424         EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0], observation_interval);
4425         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1], lei.lei_balance * (int64_t)NSEC_PER_SEC / lei.lei_last_refill);
4426         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
4427
4428         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
4429                 task_terminate_internal(task);
4430         }
4431 }
4432
4433 kern_return_t
4434 task_purge_volatile_memory(
4435         task_t  task)
4436 {
4437         vm_map_t        map;
4438         int             num_object_purged;
4439
4440         if (task == TASK_NULL)
4441                 return KERN_INVALID_TASK;
4442
4443         task_lock(task);
4444
4445         if (!task->active) {
4446                 task_unlock(task);
4447                 return KERN_INVALID_TASK;
4448         }
4449         map = task->map;
4450         if (map == VM_MAP_NULL) {
4451                 task_unlock(task);
4452                 return KERN_INVALID_TASK;
4453         }
4454         vm_map_reference(task->map);
4455
4456         task_unlock(task);
4457
4458         num_object_purged = vm_map_purge(map);
4459         vm_map_deallocate(map);
4460
4461         return KERN_SUCCESS;
4462 }
4463
4464 /* Placeholders for the task set/get voucher interfaces */
4465 kern_return_t
4466 task_get_mach_voucher(
4467         task_t                  task,
4468         mach_voucher_selector_t __unused which,
4469         ipc_voucher_t           *voucher)
4470 {
4471         if (TASK_NULL == task)
4472                 return KERN_INVALID_TASK;
4473
4474         *voucher = NULL;
4475         return KERN_SUCCESS;
4476 }
4477
4478 kern_return_t
4479 task_set_mach_voucher(
4480         task_t                  task,
4481         ipc_voucher_t           __unused voucher)
4482 {
4483         if (TASK_NULL == task)
4484                 return KERN_INVALID_TASK;
4485
4486         return KERN_SUCCESS;
4487 }
4488
4489 kern_return_t
4490 task_swap_mach_voucher(
4491         task_t                  task,
4492         ipc_voucher_t           new_voucher,
4493         ipc_voucher_t           *in_out_old_voucher)
4494 {
4495         if (TASK_NULL == task)
4496                 return KERN_INVALID_TASK;
4497
4498         *in_out_old_voucher = new_voucher;
4499         return KERN_SUCCESS;
4500 }
4501
4502 void task_set_gpu_denied(task_t task, boolean_t denied)
4503 {
4504         task_lock(task);
4505
4506         if (denied) {
4507                 task->t_flags |= TF_GPU_DENIED;
4508         } else {
4509                 task->t_flags &= ~TF_GPU_DENIED;
4510         }
4511
4512         task_unlock(task);
4513 }
4514
4515 boolean_t task_is_gpu_denied(task_t task)
4516 {
4517         /* We don't need the lock to read this flag */
4518         return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
4519 }