osfmk/kern/task.c

   1 /*
   2  * Copyright (c) 2000-2010, 2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  *      File:   kern/task.c
  58  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
  59  *              David Black
  60  *
  61  *      Task management primitives implementation.
  62  */
  63 /*
  64  * Copyright (c) 1993 The University of Utah and
  65  * the Computer Systems Laboratory (CSL).  All rights reserved.
  66  *
  67  * Permission to use, copy, modify and distribute this software and its
  68  * documentation is hereby granted, provided that both the copyright
  69  * notice and this permission notice appear in all copies of the
  70  * software, derivative works or modified versions, and any portions
  71  * thereof, and that both notices appear in supporting documentation.
  72  *
  73  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  74  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  75  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  76  *
  77  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  78  * improvements that they make and grant CSL redistribution rights.
  79  *
  80  */
  81 /*
  82  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
  83  * support for mandatory and extensible security protections.  This notice
  84  * is included in support of clause 2.2 (b) of the Apple Public License,
  85  * Version 2.0.
  86  * Copyright (c) 2005 SPARTA, Inc.
  87  */
  88
  89 #include <mach/mach_types.h>
  90 #include <mach/boolean.h>
  91 #include <mach/host_priv.h>
  92 #include <mach/machine/vm_types.h>
  93 #include <mach/vm_param.h>
  94 #include <mach/mach_vm.h>
  95 #include <mach/semaphore.h>
  96 #include <mach/task_info.h>
  97 #include <mach/task_special_ports.h>
  98
  99 #include <ipc/ipc_importance.h>
 100 #include <ipc/ipc_types.h>
 101 #include <ipc/ipc_space.h>
 102 #include <ipc/ipc_entry.h>
 103 #include <ipc/ipc_hash.h>
 104
 105 #include <kern/kern_types.h>
 106 #include <kern/mach_param.h>
 107 #include <kern/misc_protos.h>
 108 #include <kern/task.h>
 109 #include <kern/thread.h>
 110 #include <kern/coalition.h>
 111 #include <kern/zalloc.h>
 112 #include <kern/kalloc.h>
 113 #include <kern/kern_cdata.h>
 114 #include <kern/processor.h>
 115 #include <kern/sched_prim.h>    /* for thread_wakeup */
 116 #include <kern/ipc_tt.h>
 117 #include <kern/host.h>
 118 #include <kern/clock.h>
 119 #include <kern/timer.h>
 120 #include <kern/assert.h>
 121 #include <kern/sync_lock.h>
 122 #include <kern/affinity.h>
 123 #include <kern/exc_resource.h>
 124 #include <kern/machine.h>
 125 #include <corpses/task_corpse.h>
 126 #if CONFIG_TELEMETRY
 127 #include <kern/telemetry.h>
 128 #endif
 129
 130 #include <vm/pmap.h>
 131 #include <vm/vm_map.h>
 132 #include <vm/vm_kern.h>         /* for kernel_map, ipc_kernel_map */
 133 #include <vm/vm_pageout.h>
 134 #include <vm/vm_protos.h>
 135 #include <vm/vm_purgeable_internal.h>
 136
 137 #include <sys/resource.h>
 138 #include <sys/signalvar.h> /* for coredump */
 139
 140 /*
 141  * Exported interfaces
 142  */
 143
 144 #include <mach/task_server.h>
 145 #include <mach/mach_host_server.h>
 146 #include <mach/host_security_server.h>
 147 #include <mach/mach_port_server.h>
 148
 149 #include <vm/vm_shared_region.h>
 150
 151 #include <libkern/OSDebug.h>
 152 #include <libkern/OSAtomic.h>
 153
 154 #if CONFIG_ATM
 155 #include <atm/atm_internal.h>
 156 #endif
 157
 158 #include <kern/sfi.h>
 159
 160 #if KPERF
 161 extern int kpc_force_all_ctrs(task_t, int);
 162 #endif
 163
 164 uint32_t qos_override_mode;
 165
 166 task_t                  kernel_task;
 167 zone_t                  task_zone;
 168 lck_attr_t      task_lck_attr;
 169 lck_grp_t       task_lck_grp;
 170 lck_grp_attr_t  task_lck_grp_attr;
 171
 172 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
 173 int audio_active = 0;
 174
 175 zinfo_usage_store_t tasks_tkm_private;
 176 zinfo_usage_store_t tasks_tkm_shared;
 177
 178 /* A container to accumulate statistics for expired tasks */
 179 expired_task_statistics_t               dead_task_statistics;
 180 lck_spin_t              dead_task_statistics_lock;
 181
 182 ledger_template_t task_ledger_template = NULL;
 183
 184 struct _task_ledger_indices task_ledgers __attribute__((used)) =
 185         {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
 186          { 0 /* initialized at runtime */},
 187 #ifdef CONFIG_BANK
 188          -1, -1,
 189 #endif
 190         };
 191
 192 /* System sleep state */
 193 boolean_t tasks_suspend_state;
 194
 195
 196 void init_task_ledgers(void);
 197 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 198 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 199 void __attribute__((noinline)) THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void);
 200 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb);
 201
 202 kern_return_t task_suspend_internal(task_t);
 203 kern_return_t task_resume_internal(task_t);
 204 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
 205
 206 extern kern_return_t iokit_task_terminate(task_t task);
 207
 208 void proc_init_cpumon_params(void);
 209 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
 210
 211 // Warn tasks when they hit 80% of their memory limit.
 212 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
 213
 214 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT              150 /* wakeups per second */
 215 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL   300 /* in seconds. */
 216
 217 /*
 218  * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
 219  *
 220  * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
 221  *  stacktraces, aka micro-stackshots)
 222  */
 223 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER        70
 224
 225 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
 226 int task_wakeups_monitor_rate;     /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
 227
 228 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 229
 230 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
 231
 232 ledger_amount_t max_task_footprint = 0;  /* Per-task limit on physical memory consumption in bytes     */
 233 int max_task_footprint_mb = 0;  /* Per-task limit on physical memory consumption in megabytes */
 234
 235 #if MACH_ASSERT
 236 int pmap_ledgers_panic = 1;
 237 #endif /* MACH_ASSERT */
 238
 239 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
 240
 241 int hwm_user_cores = 0; /* high watermark violations generate user core files */
 242
 243 #ifdef MACH_BSD
 244 extern void     proc_getexecutableuuid(void *, unsigned char *, unsigned long);
 245 extern int      proc_pid(struct proc *p);
 246 extern int      proc_selfpid(void);
 247 extern char     *proc_name_address(struct proc *p);
 248 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 249 #if CONFIG_JETSAM
 250 extern void     proc_memstat_terminated(struct proc* p, boolean_t set);
 251 extern void     memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb);
 252 #endif
 253 #endif
 254 #if MACH_ASSERT
 255 extern int pmap_ledgers_panic;
 256 #endif /* MACH_ASSERT */
 257
 258 /* Forwards */
 259
 260 void            task_hold_locked(
 261                         task_t          task);
 262 void            task_wait_locked(
 263                         task_t          task,
 264                         boolean_t       until_not_runnable);
 265 void            task_release_locked(
 266                         task_t          task);
 267 void            task_free(
 268                         task_t          task );
 269 void            task_synchronizer_destroy_all(
 270                         task_t          task);
 271
 272 int check_for_tasksuspend(
 273                         task_t task);
 274
 275 void
 276 task_backing_store_privileged(
 277                         task_t task)
 278 {
 279         task_lock(task);
 280         task->priv_flags |= VM_BACKING_STORE_PRIV;
 281         task_unlock(task);
 282         return;
 283 }
 284
 285
 286 void
 287 task_set_64bit(
 288                 task_t task,
 289                 boolean_t is64bit)
 290 {
 291 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
 292         thread_t thread;
 293 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
 294
 295         task_lock(task);
 296
 297         if (is64bit) {
 298                 if (task_has_64BitAddr(task))
 299                         goto out;
 300                 task_set_64BitAddr(task);
 301         } else {
 302                 if ( !task_has_64BitAddr(task))
 303                         goto out;
 304                 task_clear_64BitAddr(task);
 305         }
 306         /* FIXME: On x86, the thread save state flavor can diverge from the
 307          * task's 64-bit feature flag due to the 32-bit/64-bit register save
 308          * state dichotomy. Since we can be pre-empted in this interval,
 309          * certain routines may observe the thread as being in an inconsistent
 310          * state with respect to its task's 64-bitness.
 311          */
 312
 313 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
 314         queue_iterate(&task->threads, thread, thread_t, task_threads) {
 315                 thread_mtx_lock(thread);
 316                 machine_thread_switch_addrmode(thread);
 317                 thread_mtx_unlock(thread);
 318         }
 319 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
 320
 321 out:
 322         task_unlock(task);
 323 }
 324
 325
 326 void
 327 task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size)
 328 {
 329         task_lock(task);
 330         task->all_image_info_addr = addr;
 331         task->all_image_info_size = size;
 332         task_unlock(task);
 333 }
 334
 335 void
 336 task_atm_reset(__unused task_t task) {
 337
 338 #if CONFIG_ATM
 339         if (task->atm_context != NULL) {
 340                  atm_task_descriptor_destroy(task->atm_context);
 341                  task->atm_context = NULL;
 342         }
 343 #endif
 344
 345 }
 346
 347 void
 348 task_bank_reset(__unused task_t task) {
 349
 350 #if CONFIG_BANK
 351         if (task->bank_context != NULL) {
 352                  bank_task_destroy(task);
 353         }
 354 #endif
 355
 356 }
 357
 358 /*
 359  * NOTE: This should only be called when the P_LINTRANSIT
 360  *       flag is set (the proc_trans lock is held) on the
 361  *       proc associated with the task.
 362  */
 363 void
 364 task_bank_init(__unused task_t task) {
 365
 366 #if CONFIG_BANK
 367         if (task->bank_context != NULL) {
 368                 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
 369         }
 370         bank_task_initialize(task);
 371 #endif
 372
 373 }
 374
 375 #if TASK_REFERENCE_LEAK_DEBUG
 376 #include <kern/btlog.h>
 377
 378 decl_simple_lock_data(static,task_ref_lock);
 379 static btlog_t *task_ref_btlog;
 380 #define TASK_REF_OP_INCR        0x1
 381 #define TASK_REF_OP_DECR        0x2
 382
 383 #define TASK_REF_BTDEPTH        7
 384
 385 static void
 386 task_ref_lock_lock(void *context)
 387 {
 388         simple_lock((simple_lock_t)context);
 389 }
 390 static void
 391 task_ref_lock_unlock(void *context)
 392 {
 393         simple_unlock((simple_lock_t)context);
 394 }
 395
 396 void
 397 task_reference_internal(task_t task)
 398 {
 399         void *       bt[TASK_REF_BTDEPTH];
 400         int             numsaved = 0;
 401
 402         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 403
 404         (void)hw_atomic_add(&(task)->ref_count, 1);
 405         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
 406                                         bt, numsaved);
 407 }
 408
 409 uint32_t
 410 task_deallocate_internal(task_t task)
 411 {
 412         void *       bt[TASK_REF_BTDEPTH];
 413         int             numsaved = 0;
 414
 415         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 416
 417         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
 418                                         bt, numsaved);
 419         return hw_atomic_sub(&(task)->ref_count, 1);
 420 }
 421
 422 #endif /* TASK_REFERENCE_LEAK_DEBUG */
 423
 424 void
 425 task_init(void)
 426 {
 427
 428         lck_grp_attr_setdefault(&task_lck_grp_attr);
 429         lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
 430         lck_attr_setdefault(&task_lck_attr);
 431         lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
 432
 433         task_zone = zinit(
 434                         sizeof(struct task),
 435                         task_max * sizeof(struct task),
 436                         TASK_CHUNK * sizeof(struct task),
 437                         "tasks");
 438
 439         zone_change(task_zone, Z_NOENCRYPT, TRUE);
 440
 441         /*
 442          * Configure per-task memory limit.
 443          * The boot-arg is interpreted as Megabytes,
 444          * and takes precedence over the device tree.
 445          * Setting the boot-arg to 0 disables task limits.
 446          */
 447         if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
 448                         sizeof (max_task_footprint_mb))) {
 449                 /*
 450                  * No limit was found in boot-args, so go look in the device tree.
 451                  */
 452                 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
 453                                 sizeof(max_task_footprint_mb))) {
 454                         /*
 455                          * No limit was found in device tree.
 456                          */
 457                         max_task_footprint_mb = 0;
 458                 }
 459         }
 460
 461         if (max_task_footprint_mb != 0) {
 462 #if CONFIG_JETSAM
 463                 if (max_task_footprint_mb < 50) {
 464                                 printf("Warning: max_task_pmem %d below minimum.\n",
 465                                 max_task_footprint_mb);
 466                                 max_task_footprint_mb = 50;
 467                 }
 468                 printf("Limiting task physical memory footprint to %d MB\n",
 469                         max_task_footprint_mb);
 470
 471                 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
 472 #else
 473                 printf("Warning: max_task_footprint specified, but jetsam not configured; ignoring.\n");
 474 #endif
 475         }
 476
 477 #if MACH_ASSERT
 478         PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
 479                           sizeof (pmap_ledgers_panic));
 480 #endif /* MACH_ASSERT */
 481
 482         if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
 483                         sizeof (hwm_user_cores))) {
 484                 hwm_user_cores = 0;
 485         }
 486
 487         if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
 488                 printf("QOS override mode: 0x%08x\n", qos_override_mode);
 489         } else {
 490                 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
 491         }
 492
 493         proc_init_cpumon_params();
 494
 495         if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
 496                 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
 497         }
 498
 499         if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
 500                 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
 501         }
 502
 503         if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
 504                 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
 505                 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
 506         }
 507
 508         if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
 509                 sizeof (disable_exc_resource))) {
 510                 disable_exc_resource = 0;
 511         }
 512
 513 /*
 514  * If we have coalitions, coalition_init() will call init_task_ledgers() as it
 515  * sets up the ledgers for the default coalition. If we don't have coalitions,
 516  * then we have to call it now.
 517  */
 518 #if CONFIG_COALITIONS
 519         assert(task_ledger_template);
 520 #else /* CONFIG_COALITIONS */
 521         init_task_ledgers();
 522 #endif /* CONFIG_COALITIONS */
 523
 524 #if TASK_REFERENCE_LEAK_DEBUG
 525         simple_lock_init(&task_ref_lock, 0);
 526         task_ref_btlog = btlog_create(100000,
 527                                                                   TASK_REF_BTDEPTH,
 528                                                                   task_ref_lock_lock,
 529                                                                   task_ref_lock_unlock,
 530                                                                   &task_ref_lock);
 531         assert(task_ref_btlog);
 532 #endif
 533
 534         /*
 535          * Create the kernel task as the first task.
 536          */
 537 #ifdef __LP64__
 538         if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS)
 539 #else
 540         if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS)
 541 #endif
 542                 panic("task_init\n");
 543
 544         vm_map_deallocate(kernel_task->map);
 545         kernel_task->map = kernel_map;
 546         lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
 547
 548 }
 549
 550 /*
 551  * Create a task running in the kernel address space.  It may
 552  * have its own map of size mem_size and may have ipc privileges.
 553  */
 554 kern_return_t
 555 kernel_task_create(
 556         __unused task_t         parent_task,
 557         __unused vm_offset_t            map_base,
 558         __unused vm_size_t              map_size,
 559         __unused task_t         *child_task)
 560 {
 561         return (KERN_INVALID_ARGUMENT);
 562 }
 563
 564 kern_return_t
 565 task_create(
 566         task_t                          parent_task,
 567         __unused ledger_port_array_t    ledger_ports,
 568         __unused mach_msg_type_number_t num_ledger_ports,
 569         __unused boolean_t              inherit_memory,
 570         __unused task_t                 *child_task)    /* OUT */
 571 {
 572         if (parent_task == TASK_NULL)
 573                 return(KERN_INVALID_ARGUMENT);
 574
 575         /*
 576          * No longer supported: too many calls assume that a task has a valid
 577          * process attached.
 578          */
 579         return(KERN_FAILURE);
 580 }
 581
 582 kern_return_t
 583 host_security_create_task_token(
 584         host_security_t                 host_security,
 585         task_t                          parent_task,
 586         __unused security_token_t       sec_token,
 587         __unused audit_token_t          audit_token,
 588         __unused host_priv_t            host_priv,
 589         __unused ledger_port_array_t    ledger_ports,
 590         __unused mach_msg_type_number_t num_ledger_ports,
 591         __unused boolean_t              inherit_memory,
 592         __unused task_t                 *child_task)    /* OUT */
 593 {
 594         if (parent_task == TASK_NULL)
 595                 return(KERN_INVALID_ARGUMENT);
 596
 597         if (host_security == HOST_NULL)
 598                 return(KERN_INVALID_SECURITY);
 599
 600         /*
 601          * No longer supported.
 602          */
 603         return(KERN_FAILURE);
 604 }
 605
 606 /*
 607  * Task ledgers
 608  * ------------
 609  *
 610  * phys_footprint
 611  *   Physical footprint: This is the sum of:
 612  *     + (internal - alternate_accounting)
 613  *     + (internal_compressed - alternate_accounting_compressed)
 614  *     + iokit_mapped
 615  *     + purgeable_nonvolatile
 616  *     + purgeable_nonvolatile_compressed
 617  *
 618  * internal
 619  *   The task's anonymous memory, which on iOS is always resident.
 620  *
 621  * internal_compressed
 622  *   Amount of this task's internal memory which is held by the compressor.
 623  *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
 624  *   and could be either decompressed back into memory, or paged out to storage, depending
 625  *   on our implementation.
 626  *
 627  * iokit_mapped
 628  *   IOKit mappings: The total size of all IOKit mappings in this task, regardless of
 629      clean/dirty or internal/external state].
 630  *
 631  * alternate_accounting
 632  *   The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
 633  *   are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
 634  *   double counting.
 635  */
 636 void
 637 init_task_ledgers(void)
 638 {
 639         ledger_template_t t;
 640
 641         assert(task_ledger_template == NULL);
 642         assert(kernel_task == TASK_NULL);
 643
 644         if ((t = ledger_template_create("Per-task ledger")) == NULL)
 645                 panic("couldn't create task ledger template");
 646
 647         task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
 648         task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
 649             "physmem", "bytes");
 650         task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
 651             "bytes");
 652         task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
 653             "bytes");
 654         task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
 655             "bytes");
 656         task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
 657             "bytes");
 658         task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
 659             "bytes");
 660         task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
 661             "bytes");
 662         task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
 663             "bytes");
 664         task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
 665             "bytes");
 666         task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
 667             "bytes");
 668         task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
 669         task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
 670         task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
 671         task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
 672         task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
 673             "count");
 674         task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
 675             "count");
 676
 677 #if CONFIG_SCHED_SFI
 678         sfi_class_id_t class_id, ledger_alias;
 679         for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
 680                 task_ledgers.sfi_wait_times[class_id] = -1;
 681         }
 682
 683         /* don't account for UNSPECIFIED */
 684         for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
 685                 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
 686                 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
 687                         /* Check to see if alias has been registered yet */
 688                         if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
 689                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
 690                         } else {
 691                                 /* Otherwise, initialize it first */
 692                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
 693                         }
 694                 } else {
 695                         task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
 696                 }
 697
 698                 if (task_ledgers.sfi_wait_times[class_id] < 0) {
 699                         panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
 700                 }
 701         }
 702
 703         assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
 704 #endif /* CONFIG_SCHED_SFI */
 705
 706 #ifdef CONFIG_BANK
 707         task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
 708         task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
 709 #endif
 710         if ((task_ledgers.cpu_time < 0) ||
 711             (task_ledgers.tkm_private < 0) ||
 712             (task_ledgers.tkm_shared < 0) ||
 713             (task_ledgers.phys_mem < 0) ||
 714             (task_ledgers.wired_mem < 0) ||
 715             (task_ledgers.internal < 0) ||
 716             (task_ledgers.iokit_mapped < 0) ||
 717             (task_ledgers.alternate_accounting < 0) ||
 718             (task_ledgers.alternate_accounting_compressed < 0) ||
 719             (task_ledgers.phys_footprint < 0) ||
 720             (task_ledgers.internal_compressed < 0) ||
 721             (task_ledgers.purgeable_volatile < 0) ||
 722             (task_ledgers.purgeable_nonvolatile < 0) ||
 723             (task_ledgers.purgeable_volatile_compressed < 0) ||
 724             (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
 725             (task_ledgers.platform_idle_wakeups < 0) ||
 726             (task_ledgers.interrupt_wakeups < 0)
 727 #ifdef CONFIG_BANK
 728             || (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0)
 729 #endif
 730             ) {
 731                 panic("couldn't create entries for task ledger template");
 732         }
 733
 734         ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
 735 #if MACH_ASSERT
 736         if (pmap_ledgers_panic) {
 737                 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
 738                 ledger_panic_on_negative(t, task_ledgers.internal);
 739                 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
 740                 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
 741                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
 742                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
 743                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
 744                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
 745                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
 746                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
 747         }
 748 #endif /* MACH_ASSERT */
 749
 750 #if CONFIG_JETSAM
 751         ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
 752 #endif
 753
 754         ledger_set_callback(t, task_ledgers.interrupt_wakeups,
 755                 task_wakeups_rate_exceeded, NULL, NULL);
 756
 757         task_ledger_template = t;
 758 }
 759
 760 kern_return_t
 761 task_create_internal(
 762         task_t          parent_task,
 763         coalition_t     *parent_coalitions __unused,
 764         boolean_t       inherit_memory,
 765         boolean_t       is_64bit,
 766         task_t          *child_task)            /* OUT */
 767 {
 768         task_t                  new_task;
 769         vm_shared_region_t      shared_region;
 770         ledger_t                ledger = NULL;
 771
 772         new_task = (task_t) zalloc(task_zone);
 773
 774         if (new_task == TASK_NULL)
 775                 return(KERN_RESOURCE_SHORTAGE);
 776
 777         /* one ref for just being alive; one for our caller */
 778         new_task->ref_count = 2;
 779
 780         /* allocate with active entries */
 781         assert(task_ledger_template != NULL);
 782         if ((ledger = ledger_instantiate(task_ledger_template,
 783                         LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
 784                 zfree(task_zone, new_task);
 785                 return(KERN_RESOURCE_SHORTAGE);
 786         }
 787
 788         new_task->ledger = ledger;
 789
 790 #if defined(CONFIG_SCHED_MULTIQ)
 791         new_task->sched_group = sched_group_create();
 792 #endif
 793
 794         /* if inherit_memory is true, parent_task MUST not be NULL */
 795         if (inherit_memory)
 796                 new_task->map = vm_map_fork(ledger, parent_task->map);
 797         else
 798                 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
 799                                 (vm_map_offset_t)(VM_MIN_ADDRESS),
 800                                 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
 801
 802         /* Inherit memlock limit from parent */
 803         if (parent_task)
 804                 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
 805
 806         lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
 807         queue_init(&new_task->threads);
 808         new_task->suspend_count = 0;
 809         new_task->thread_count = 0;
 810         new_task->active_thread_count = 0;
 811         new_task->user_stop_count = 0;
 812         new_task->legacy_stop_count = 0;
 813         new_task->active = TRUE;
 814         new_task->halting = FALSE;
 815         new_task->user_data = NULL;
 816         new_task->faults = 0;
 817         new_task->cow_faults = 0;
 818         new_task->pageins = 0;
 819         new_task->messages_sent = 0;
 820         new_task->messages_received = 0;
 821         new_task->syscalls_mach = 0;
 822         new_task->priv_flags = 0;
 823         new_task->syscalls_unix=0;
 824         new_task->c_switch = new_task->p_switch = new_task->ps_switch = 0;
 825         new_task->t_flags = 0;
 826         new_task->importance = 0;
 827
 828 #if CONFIG_ATM
 829         new_task->atm_context = NULL;
 830 #endif
 831 #if CONFIG_BANK
 832         new_task->bank_context = NULL;
 833 #endif
 834
 835         zinfo_task_init(new_task);
 836
 837 #ifdef MACH_BSD
 838         new_task->bsd_info = NULL;
 839         new_task->corpse_info = NULL;
 840 #endif /* MACH_BSD */
 841
 842 #if CONFIG_JETSAM
 843         if (max_task_footprint != 0) {
 844                 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
 845         }
 846 #endif
 847
 848         if (task_wakeups_monitor_rate != 0) {
 849                 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
 850                 int32_t  rate; // Ignored because of WAKEMON_SET_DEFAULTS
 851                 task_wakeups_monitor_ctl(new_task, &flags, &rate);
 852         }
 853
 854 #if defined(__i386__) || defined(__x86_64__)
 855         new_task->i386_ldt = 0;
 856 #endif
 857
 858         new_task->task_debug = NULL;
 859
 860         queue_init(&new_task->semaphore_list);
 861         new_task->semaphores_owned = 0;
 862
 863         ipc_task_init(new_task, parent_task);
 864
 865         new_task->total_user_time = 0;
 866         new_task->total_system_time = 0;
 867
 868         new_task->vtimers = 0;
 869
 870         new_task->shared_region = NULL;
 871
 872         new_task->affinity_space = NULL;
 873
 874         new_task->pidsuspended = FALSE;
 875         new_task->frozen = FALSE;
 876         new_task->changing_freeze_state = FALSE;
 877         new_task->rusage_cpu_flags = 0;
 878         new_task->rusage_cpu_percentage = 0;
 879         new_task->rusage_cpu_interval = 0;
 880         new_task->rusage_cpu_deadline = 0;
 881         new_task->rusage_cpu_callt = NULL;
 882 #if MACH_ASSERT
 883         new_task->suspends_outstanding = 0;
 884 #endif
 885
 886 #if HYPERVISOR
 887         new_task->hv_task_target = NULL;
 888 #endif /* HYPERVISOR */
 889
 890
 891         new_task->low_mem_notified_warn = 0;
 892         new_task->low_mem_notified_critical = 0;
 893         new_task->low_mem_privileged_listener = 0;
 894         new_task->purged_memory_warn = 0;
 895         new_task->purged_memory_critical = 0;
 896         new_task->mem_notify_reserved = 0;
 897 #if IMPORTANCE_INHERITANCE
 898         new_task->task_imp_base = NULL;
 899 #endif /* IMPORTANCE_INHERITANCE */
 900
 901 #if     defined(__x86_64__)
 902         new_task->uexc_range_start = new_task->uexc_range_size = new_task->uexc_handler = 0;
 903 #endif
 904
 905         new_task->requested_policy = default_task_requested_policy;
 906         new_task->effective_policy = default_task_effective_policy;
 907         new_task->pended_policy    = default_task_pended_policy;
 908
 909         if (parent_task != TASK_NULL) {
 910                 new_task->sec_token = parent_task->sec_token;
 911                 new_task->audit_token = parent_task->audit_token;
 912
 913                 /* inherit the parent's shared region */
 914                 shared_region = vm_shared_region_get(parent_task);
 915                 vm_shared_region_set(new_task, shared_region);
 916
 917                 if(task_has_64BitAddr(parent_task))
 918                         task_set_64BitAddr(new_task);
 919                 new_task->all_image_info_addr = parent_task->all_image_info_addr;
 920                 new_task->all_image_info_size = parent_task->all_image_info_size;
 921
 922 #if defined(__i386__) || defined(__x86_64__)
 923                 if (inherit_memory && parent_task->i386_ldt)
 924                         new_task->i386_ldt = user_ldt_copy(parent_task->i386_ldt);
 925 #endif
 926                 if (inherit_memory && parent_task->affinity_space)
 927                         task_affinity_create(parent_task, new_task);
 928
 929                 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
 930
 931 #if IMPORTANCE_INHERITANCE
 932                 ipc_importance_task_t new_task_imp = IIT_NULL;
 933
 934                 if (task_is_marked_importance_donor(parent_task)) {
 935                         new_task_imp = ipc_importance_for_task(new_task, FALSE);
 936                         assert(IIT_NULL != new_task_imp);
 937                         ipc_importance_task_mark_donor(new_task_imp, TRUE);
 938                 }
 939                 /* Embedded doesn't want this to inherit */
 940                 if (task_is_marked_importance_receiver(parent_task)) {
 941                         if (IIT_NULL == new_task_imp)
 942                                 new_task_imp = ipc_importance_for_task(new_task, FALSE);
 943                         assert(IIT_NULL != new_task_imp);
 944                         ipc_importance_task_mark_receiver(new_task_imp, TRUE);
 945                 }
 946                 if (task_is_marked_importance_denap_receiver(parent_task)) {
 947                         if (IIT_NULL == new_task_imp)
 948                                 new_task_imp = ipc_importance_for_task(new_task, FALSE);
 949                         assert(IIT_NULL != new_task_imp);
 950                         ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
 951                 }
 952
 953                 if (IIT_NULL != new_task_imp) {
 954                         assert(new_task->task_imp_base == new_task_imp);
 955                         ipc_importance_task_release(new_task_imp);
 956                 }
 957 #endif /* IMPORTANCE_INHERITANCE */
 958
 959                 new_task->priority = BASEPRI_DEFAULT;
 960                 new_task->max_priority = MAXPRI_USER;
 961
 962                 new_task->requested_policy.t_apptype     = parent_task->requested_policy.t_apptype;
 963
 964                 new_task->requested_policy.int_darwinbg  = parent_task->requested_policy.int_darwinbg;
 965                 new_task->requested_policy.ext_darwinbg  = parent_task->requested_policy.ext_darwinbg;
 966                 new_task->requested_policy.int_iotier    = parent_task->requested_policy.int_iotier;
 967                 new_task->requested_policy.ext_iotier    = parent_task->requested_policy.ext_iotier;
 968                 new_task->requested_policy.int_iopassive = parent_task->requested_policy.int_iopassive;
 969                 new_task->requested_policy.ext_iopassive = parent_task->requested_policy.ext_iopassive;
 970                 new_task->requested_policy.bg_iotier     = parent_task->requested_policy.bg_iotier;
 971                 new_task->requested_policy.terminated    = parent_task->requested_policy.terminated;
 972                 new_task->requested_policy.t_qos_clamp   = parent_task->requested_policy.t_qos_clamp;
 973
 974                 task_policy_create(new_task, parent_task->requested_policy.t_boosted);
 975         } else {
 976                 new_task->sec_token = KERNEL_SECURITY_TOKEN;
 977                 new_task->audit_token = KERNEL_AUDIT_TOKEN;
 978 #ifdef __LP64__
 979                 if(is_64bit)
 980                         task_set_64BitAddr(new_task);
 981 #endif
 982                 new_task->all_image_info_addr = (mach_vm_address_t)0;
 983                 new_task->all_image_info_size = (mach_vm_size_t)0;
 984
 985                 new_task->pset_hint = PROCESSOR_SET_NULL;
 986
 987                 if (kernel_task == TASK_NULL) {
 988                         new_task->priority = BASEPRI_KERNEL;
 989                         new_task->max_priority = MAXPRI_KERNEL;
 990                 } else {
 991                         new_task->priority = BASEPRI_DEFAULT;
 992                         new_task->max_priority = MAXPRI_USER;
 993                 }
 994         }
 995
 996         bzero(new_task->coalition, sizeof(new_task->coalition));
 997         for (int i = 0; i < COALITION_NUM_TYPES; i++)
 998                 queue_chain_init(new_task->task_coalition[i]);
 999
1000         /* Allocate I/O Statistics */
1001         new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1002         assert(new_task->task_io_stats != NULL);
1003         bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1004         new_task->task_immediate_writes = 0;
1005         new_task->task_deferred_writes = 0;
1006         new_task->task_invalidated_writes = 0;
1007         new_task->task_metadata_writes = 0;
1008
1009         bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats));
1010
1011         bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1012         new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0;
1013         new_task->task_gpu_ns = 0;
1014
1015 #if CONFIG_COALITIONS
1016
1017         /* TODO: there is no graceful failure path here... */
1018         if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1019                 coalitions_adopt_task(parent_coalitions, new_task);
1020         } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1021                 /*
1022                  * all tasks at least have a resource coalition, so
1023                  * if the parent has one then inherit all coalitions
1024                  * the parent is a part of
1025                  */
1026                 coalitions_adopt_task(parent_task->coalition, new_task);
1027         } else {
1028                 /* TODO: assert that new_task will be PID 1 (launchd) */
1029                 coalitions_adopt_init_task(new_task);
1030         }
1031
1032         if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1033                 panic("created task is not a member of a resource coalition");
1034         }
1035 #endif /* CONFIG_COALITIONS */
1036
1037         new_task->dispatchqueue_offset = 0;
1038         if (parent_task != NULL) {
1039                 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1040         }
1041
1042         if (vm_backing_store_low && parent_task != NULL)
1043                 new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
1044
1045         new_task->task_volatile_objects = 0;
1046         new_task->task_nonvolatile_objects = 0;
1047         new_task->task_purgeable_disowning = FALSE;
1048         new_task->task_purgeable_disowned = FALSE;
1049
1050         queue_init(&new_task->io_user_clients);
1051
1052         ipc_task_enable(new_task);
1053
1054         lck_mtx_lock(&tasks_threads_lock);
1055         queue_enter(&tasks, new_task, task_t, tasks);
1056         tasks_count++;
1057         if (tasks_suspend_state) {
1058             task_suspend_internal(new_task);
1059         }
1060         lck_mtx_unlock(&tasks_threads_lock);
1061
1062         *child_task = new_task;
1063         return(KERN_SUCCESS);
1064 }
1065
1066 int task_dropped_imp_count = 0;
1067
1068 /*
1069  *      task_deallocate:
1070  *
1071  *      Drop a reference on a task.
1072  */
1073 void
1074 task_deallocate(
1075         task_t          task)
1076 {
1077         ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1078         uint32_t refs;
1079
1080         if (task == TASK_NULL)
1081             return;
1082
1083         refs = task_deallocate_internal(task);
1084
1085 #if IMPORTANCE_INHERITANCE
1086         if (refs > 1)
1087                 return;
1088
1089         if (refs == 1) {
1090                 /*
1091                  * If last ref potentially comes from the task's importance,
1092                  * disconnect it.  But more task refs may be added before
1093                  * that completes, so wait for the reference to go to zero
1094                  * naturually (it may happen on a recursive task_deallocate()
1095                  * from the ipc_importance_disconnect_task() call).
1096                  */
1097                 if (IIT_NULL != task->task_imp_base)
1098                         ipc_importance_disconnect_task(task);
1099                 return;
1100         }
1101 #else
1102         if (refs > 0)
1103                 return;
1104 #endif /* IMPORTANCE_INHERITANCE */
1105
1106         lck_mtx_lock(&tasks_threads_lock);
1107         queue_remove(&terminated_tasks, task, task_t, tasks);
1108         terminated_tasks_count--;
1109         lck_mtx_unlock(&tasks_threads_lock);
1110
1111         /*
1112          * remove the reference on atm descriptor
1113          */
1114         task_atm_reset(task);
1115
1116         /*
1117          * remove the reference on bank context
1118          */
1119         task_bank_reset(task);
1120
1121         if (task->task_io_stats)
1122                 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1123
1124         /*
1125          *      Give the machine dependent code a chance
1126          *      to perform cleanup before ripping apart
1127          *      the task.
1128          */
1129         machine_task_terminate(task);
1130
1131         ipc_task_terminate(task);
1132
1133         /* let iokit know */
1134         iokit_task_terminate(task);
1135
1136         if (task->affinity_space)
1137                 task_affinity_deallocate(task);
1138
1139 #if MACH_ASSERT
1140         if (task->ledger != NULL &&
1141             task->map != NULL &&
1142             task->map->pmap != NULL &&
1143             task->map->pmap->ledger != NULL) {
1144                 assert(task->ledger == task->map->pmap->ledger);
1145         }
1146 #endif /* MACH_ASSERT */
1147
1148         vm_purgeable_disown(task);
1149         assert(task->task_purgeable_disowned);
1150         if (task->task_volatile_objects != 0 ||
1151             task->task_nonvolatile_objects != 0) {
1152                 panic("task_deallocate(%p): "
1153                       "volatile_objects=%d nonvolatile_objects=%d\n",
1154                       task,
1155                       task->task_volatile_objects,
1156                       task->task_nonvolatile_objects);
1157         }
1158
1159         vm_map_deallocate(task->map);
1160         is_release(task->itk_space);
1161
1162         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1163                            &interrupt_wakeups, &debit);
1164         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1165                            &platform_idle_wakeups, &debit);
1166
1167 #if defined(CONFIG_SCHED_MULTIQ)
1168         sched_group_destroy(task->sched_group);
1169 #endif
1170
1171         /* Accumulate statistics for dead tasks */
1172         lck_spin_lock(&dead_task_statistics_lock);
1173         dead_task_statistics.total_user_time += task->total_user_time;
1174         dead_task_statistics.total_system_time += task->total_system_time;
1175
1176         dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1177         dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1178
1179         dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1180         dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1181
1182         lck_spin_unlock(&dead_task_statistics_lock);
1183         lck_mtx_destroy(&task->lock, &task_lck_grp);
1184
1185         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1186             &debit)) {
1187                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1188                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1189         }
1190         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1191             &debit)) {
1192                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1193                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1194         }
1195         ledger_dereference(task->ledger);
1196         zinfo_task_free(task);
1197
1198 #if TASK_REFERENCE_LEAK_DEBUG
1199         btlog_remove_entries_for_element(task_ref_btlog, task);
1200 #endif
1201
1202 #if CONFIG_COALITIONS
1203         if (!task->coalition[COALITION_TYPE_RESOURCE])
1204                 panic("deallocating task was not a member of a resource coalition");
1205         task_release_coalitions(task);
1206 #endif /* CONFIG_COALITIONS */
1207
1208         bzero(task->coalition, sizeof(task->coalition));
1209
1210 #if MACH_BSD
1211         /* clean up collected information since last reference to task is gone */
1212         if (task->corpse_info) {
1213                 task_crashinfo_destroy(task->corpse_info);
1214                 task->corpse_info = NULL;
1215         }
1216 #endif
1217
1218         zfree(task_zone, task);
1219 }
1220
1221 /*
1222  *      task_name_deallocate:
1223  *
1224  *      Drop a reference on a task name.
1225  */
1226 void
1227 task_name_deallocate(
1228         task_name_t             task_name)
1229 {
1230         return(task_deallocate((task_t)task_name));
1231 }
1232
1233 /*
1234  *      task_suspension_token_deallocate:
1235  *
1236  *      Drop a reference on a task suspension token.
1237  */
1238 void
1239 task_suspension_token_deallocate(
1240         task_suspension_token_t         token)
1241 {
1242         return(task_deallocate((task_t)token));
1243 }
1244
1245
1246 /*
1247  * task_collect_crash_info:
1248  *
1249  * collect crash info from bsd and mach based data
1250  */
1251 kern_return_t
1252 task_collect_crash_info(task_t task)
1253 {
1254         kern_return_t kr = KERN_SUCCESS;
1255
1256         kcdata_descriptor_t crash_data = NULL;
1257         kcdata_descriptor_t crash_data_release = NULL;
1258         mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1259         mach_vm_offset_t crash_data_user_ptr = 0;
1260
1261         if (!corpses_enabled()) {
1262                 return KERN_NOT_SUPPORTED;
1263         }
1264
1265         task_lock(task);
1266         assert(task->bsd_info != NULL);
1267         if (task->corpse_info == NULL && task->bsd_info != NULL) {
1268                 task_unlock(task);
1269                 /* map crash data memory in task's vm map */
1270                 kr = mach_vm_allocate(task->map, &crash_data_user_ptr, size, (VM_MAKE_TAG(VM_MEMORY_CORPSEINFO) | VM_FLAGS_ANYWHERE));
1271
1272                 if (kr != KERN_SUCCESS)
1273                         goto out_no_lock;
1274
1275                 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_user_ptr, size);
1276                 if (crash_data) {
1277                         task_lock(task);
1278                         crash_data_release = task->corpse_info;
1279                         task->corpse_info = crash_data;
1280                         task_unlock(task);
1281                         kr = KERN_SUCCESS;
1282                 } else {
1283                         /* if failed to create corpse info, free the mapping */
1284                         if (KERN_SUCCESS != mach_vm_deallocate(task->map, crash_data_user_ptr, size)) {
1285                                 printf("mach_vm_deallocate failed to clear corpse_data for pid %d.\n", task_pid(task));
1286                         }
1287                         kr = KERN_FAILURE;
1288                 }
1289
1290                 if (crash_data_release != NULL) {
1291                         task_crashinfo_destroy(crash_data_release);
1292                 }
1293         } else {
1294                 task_unlock(task);
1295         }
1296
1297 out_no_lock:
1298         return kr;
1299 }
1300
1301 /*
1302  * task_deliver_crash_notification:
1303  *
1304  * Makes outcall to registered host port for a corpse.
1305  */
1306 kern_return_t
1307 task_deliver_crash_notification(task_t task)
1308 {
1309         kcdata_descriptor_t crash_info = task->corpse_info;
1310         thread_t th_iter = NULL;
1311         kern_return_t kr = KERN_SUCCESS;
1312         wait_interrupt_t wsave;
1313         mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1314
1315         if (crash_info == NULL)
1316                 return KERN_FAILURE;
1317
1318         code[0] = crash_info->kcd_addr_begin;
1319         code[1] = crash_info->kcd_length;
1320
1321         task_lock(task);
1322         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1323         {
1324                 ipc_thread_reset(th_iter);
1325         }
1326         task_unlock(task);
1327
1328         wsave = thread_interrupt_level(THREAD_UNINT);
1329         kr = exception_triage(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX);
1330         if (kr != KERN_SUCCESS) {
1331                 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
1332         }
1333
1334         /*
1335          * crash reporting is done. Now release threads
1336          * for reaping by thread_terminate_daemon
1337          */
1338         task_lock(task);
1339         assert(task->active_thread_count == 0);
1340         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1341         {
1342                 thread_mtx_lock(th_iter);
1343                 assert(th_iter->inspection == TRUE);
1344                 th_iter->inspection = FALSE;
1345                 /* now that the corpse has been autopsied, dispose of the thread name */
1346                 uthread_cleanup_name(th_iter->uthread);
1347                 thread_mtx_unlock(th_iter);
1348         }
1349
1350         thread_terminate_crashed_threads();
1351         /* remove the pending corpse report flag */
1352         task_clear_corpse_pending_report(task);
1353
1354         task_unlock(task);
1355
1356         (void)thread_interrupt_level(wsave);
1357         task_terminate_internal(task);
1358
1359         return kr;
1360 }
1361
1362 /*
1363  *      task_terminate:
1364  *
1365  *      Terminate the specified task.  See comments on thread_terminate
1366  *      (kern/thread.c) about problems with terminating the "current task."
1367  */
1368
1369 kern_return_t
1370 task_terminate(
1371         task_t          task)
1372 {
1373         if (task == TASK_NULL)
1374                 return (KERN_INVALID_ARGUMENT);
1375
1376         if (task->bsd_info)
1377                 return (KERN_FAILURE);
1378
1379         return (task_terminate_internal(task));
1380 }
1381
1382 #if MACH_ASSERT
1383 extern int proc_pid(struct proc *);
1384 extern void proc_name_kdp(task_t t, char *buf, int size);
1385 #endif /* MACH_ASSERT */
1386
1387 #define VM_MAP_PARTIAL_REAP 0x54  /* 0x150 */
1388 static void
1389 __unused task_partial_reap(task_t task, __unused int pid)
1390 {
1391         unsigned int    reclaimed_resident = 0;
1392         unsigned int    reclaimed_compressed = 0;
1393         uint64_t        task_page_count;
1394
1395         task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1396
1397         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
1398                               pid, task_page_count, 0, 0, 0);
1399
1400         vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1401
1402         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
1403                               pid, reclaimed_resident, reclaimed_compressed, 0, 0);
1404 }
1405
1406 kern_return_t
1407 task_mark_corpse(task_t task)
1408 {
1409         kern_return_t kr = KERN_SUCCESS;
1410         thread_t self_thread;
1411         (void) self_thread;
1412         wait_interrupt_t wsave;
1413
1414         assert(task != kernel_task);
1415         assert(task == current_task());
1416         assert(!task_is_a_corpse(task));
1417
1418         kr = task_collect_crash_info(task);
1419         if (kr != KERN_SUCCESS) {
1420                 return kr;
1421         }
1422
1423         self_thread = current_thread();
1424
1425         wsave = thread_interrupt_level(THREAD_UNINT);
1426         task_lock(task);
1427
1428         task_set_corpse_pending_report(task);
1429         task_set_corpse(task);
1430
1431         kr = task_start_halt_locked(task, TRUE);
1432         assert(kr == KERN_SUCCESS);
1433         ipc_task_reset(task);
1434         ipc_task_enable(task);
1435
1436         task_unlock(task);
1437         /* terminate the ipc space */
1438         ipc_space_terminate(task->itk_space);
1439
1440         task_start_halt(task);
1441         thread_terminate_internal(self_thread);
1442         (void) thread_interrupt_level(wsave);
1443         assert(task->halting == TRUE);
1444         return kr;
1445 }
1446
1447 kern_return_t
1448 task_terminate_internal(
1449         task_t                  task)
1450 {
1451         thread_t                        thread, self;
1452         task_t                          self_task;
1453         boolean_t                       interrupt_save;
1454         int                             pid = 0;
1455
1456         assert(task != kernel_task);
1457
1458         self = current_thread();
1459         self_task = self->task;
1460
1461         /*
1462          *      Get the task locked and make sure that we are not racing
1463          *      with someone else trying to terminate us.
1464          */
1465         if (task == self_task)
1466                 task_lock(task);
1467         else
1468         if (task < self_task) {
1469                 task_lock(task);
1470                 task_lock(self_task);
1471         }
1472         else {
1473                 task_lock(self_task);
1474                 task_lock(task);
1475         }
1476
1477         if (!task->active) {
1478                 /*
1479                  *      Task is already being terminated.
1480                  *      Just return an error. If we are dying, this will
1481                  *      just get us to our AST special handler and that
1482                  *      will get us to finalize the termination of ourselves.
1483                  */
1484                 task_unlock(task);
1485                 if (self_task != task)
1486                         task_unlock(self_task);
1487
1488                 return (KERN_FAILURE);
1489         }
1490
1491         if (task_corpse_pending_report(task)) {
1492                 /*
1493                  *      Task is marked for reporting as corpse.
1494                  *      Just return an error. This will
1495                  *      just get us to our AST special handler and that
1496                  *      will get us to finish the path to death
1497                  */
1498                 task_unlock(task);
1499                 if (self_task != task)
1500                         task_unlock(self_task);
1501
1502                 return (KERN_FAILURE);
1503         }
1504
1505         if (self_task != task)
1506                 task_unlock(self_task);
1507
1508         /*
1509          * Make sure the current thread does not get aborted out of
1510          * the waits inside these operations.
1511          */
1512         interrupt_save = thread_interrupt_level(THREAD_UNINT);
1513
1514         /*
1515          *      Indicate that we want all the threads to stop executing
1516          *      at user space by holding the task (we would have held
1517          *      each thread independently in thread_terminate_internal -
1518          *      but this way we may be more likely to already find it
1519          *      held there).  Mark the task inactive, and prevent
1520          *      further task operations via the task port.
1521          */
1522         task_hold_locked(task);
1523         task->active = FALSE;
1524         ipc_task_disable(task);
1525
1526 #if CONFIG_TELEMETRY
1527         /*
1528          * Notify telemetry that this task is going away.
1529          */
1530         telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
1531 #endif
1532
1533         /*
1534          *      Terminate each thread in the task.
1535          */
1536         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1537                         thread_terminate_internal(thread);
1538         }
1539
1540 #ifdef MACH_BSD
1541         if (task->bsd_info != NULL) {
1542                 pid = proc_pid(task->bsd_info);
1543         }
1544 #endif /* MACH_BSD */
1545
1546         task_unlock(task);
1547
1548         proc_set_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE,
1549                              TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
1550
1551         /* Early object reap phase */
1552
1553 // PR-17045188: Revisit implementation
1554 //        task_partial_reap(task, pid);
1555
1556
1557         /*
1558          *      Destroy all synchronizers owned by the task.
1559          */
1560         task_synchronizer_destroy_all(task);
1561
1562         /*
1563          *      Destroy the IPC space, leaving just a reference for it.
1564          */
1565         ipc_space_terminate(task->itk_space);
1566
1567 #if 00
1568         /* if some ledgers go negative on tear-down again... */
1569         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1570                                          task_ledgers.phys_footprint);
1571         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1572                                          task_ledgers.internal);
1573         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1574                                          task_ledgers.internal_compressed);
1575         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1576                                          task_ledgers.iokit_mapped);
1577         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1578                                          task_ledgers.alternate_accounting);
1579         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1580                                          task_ledgers.alternate_accounting_compressed);
1581 #endif
1582
1583         /*
1584          * If the current thread is a member of the task
1585          * being terminated, then the last reference to
1586          * the task will not be dropped until the thread
1587          * is finally reaped.  To avoid incurring the
1588          * expense of removing the address space regions
1589          * at reap time, we do it explictly here.
1590          */
1591
1592         vm_map_lock(task->map);
1593         vm_map_disable_hole_optimization(task->map);
1594         vm_map_unlock(task->map);
1595
1596         vm_map_remove(task->map,
1597                       task->map->min_offset,
1598                       task->map->max_offset,
1599                       /* no unnesting on final cleanup: */
1600                       VM_MAP_REMOVE_NO_UNNESTING);
1601
1602         /* release our shared region */
1603         vm_shared_region_set(task, NULL);
1604
1605
1606 #if MACH_ASSERT
1607         /*
1608          * Identify the pmap's process, in case the pmap ledgers drift
1609          * and we have to report it.
1610          */
1611         char procname[17];
1612         if (task->bsd_info) {
1613                 pid = proc_pid(task->bsd_info);
1614                 proc_name_kdp(task, procname, sizeof (procname));
1615         } else {
1616                 pid = 0;
1617                 strlcpy(procname, "<unknown>", sizeof (procname));
1618         }
1619         pmap_set_process(task->map->pmap, pid, procname);
1620 #endif /* MACH_ASSERT */
1621
1622         lck_mtx_lock(&tasks_threads_lock);
1623         queue_remove(&tasks, task, task_t, tasks);
1624         queue_enter(&terminated_tasks, task, task_t, tasks);
1625         tasks_count--;
1626         terminated_tasks_count++;
1627         lck_mtx_unlock(&tasks_threads_lock);
1628
1629         /*
1630          * We no longer need to guard against being aborted, so restore
1631          * the previous interruptible state.
1632          */
1633         thread_interrupt_level(interrupt_save);
1634
1635 #if KPERF
1636         /* force the task to release all ctrs */
1637         if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS)
1638                 kpc_force_all_ctrs(task, 0);
1639 #endif
1640
1641 #if CONFIG_COALITIONS
1642         /*
1643          * Leave our coalitions. (drop activation but not reference)
1644          */
1645         coalitions_remove_task(task);
1646 #endif
1647
1648         /*
1649          * Get rid of the task active reference on itself.
1650          */
1651         task_deallocate(task);
1652
1653         return (KERN_SUCCESS);
1654 }
1655
1656 void
1657 tasks_system_suspend(boolean_t suspend)
1658 {
1659         task_t task;
1660
1661         lck_mtx_lock(&tasks_threads_lock);
1662         assert(tasks_suspend_state != suspend);
1663         tasks_suspend_state = suspend;
1664         queue_iterate(&tasks, task, task_t, tasks) {
1665                 if (task == kernel_task) {
1666                         continue;
1667                 }
1668                 suspend ? task_suspend_internal(task) : task_resume_internal(task);
1669         }
1670         lck_mtx_unlock(&tasks_threads_lock);
1671 }
1672
1673 /*
1674  * task_start_halt:
1675  *
1676  *      Shut the current task down (except for the current thread) in
1677  *      preparation for dramatic changes to the task (probably exec).
1678  *      We hold the task and mark all other threads in the task for
1679  *      termination.
1680  */
1681 kern_return_t
1682 task_start_halt(task_t task)
1683 {
1684         kern_return_t kr = KERN_SUCCESS;
1685         task_lock(task);
1686         kr = task_start_halt_locked(task, FALSE);
1687         task_unlock(task);
1688         return kr;
1689 }
1690
1691 static kern_return_t
1692 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
1693 {
1694         thread_t thread, self;
1695         uint64_t dispatchqueue_offset;
1696
1697         assert(task != kernel_task);
1698
1699         self = current_thread();
1700
1701         if (task != self->task)
1702                 return (KERN_INVALID_ARGUMENT);
1703
1704         if (task->halting || !task->active || !self->active) {
1705                 /*
1706                  * Task or current thread is already being terminated.
1707                  * Hurry up and return out of the current kernel context
1708                  * so that we run our AST special handler to terminate
1709                  * ourselves.
1710                  */
1711                 return (KERN_FAILURE);
1712         }
1713
1714         task->halting = TRUE;
1715
1716         /*
1717          * Mark all the threads to keep them from starting any more
1718          * user-level execution.  The thread_terminate_internal code
1719          * would do this on a thread by thread basis anyway, but this
1720          * gives us a better chance of not having to wait there.
1721          */
1722         task_hold_locked(task);
1723         dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
1724
1725         /*
1726          * Terminate all the other threads in the task.
1727          */
1728         queue_iterate(&task->threads, thread, thread_t, task_threads)
1729         {
1730                 if (should_mark_corpse) {
1731                         thread_mtx_lock(thread);
1732                         thread->inspection = TRUE;
1733                         thread_mtx_unlock(thread);
1734                 }
1735                 if (thread != self)
1736                         thread_terminate_internal(thread);
1737         }
1738         task->dispatchqueue_offset = dispatchqueue_offset;
1739
1740         task_release_locked(task);
1741
1742         return KERN_SUCCESS;
1743 }
1744
1745
1746 /*
1747  * task_complete_halt:
1748  *
1749  *      Complete task halt by waiting for threads to terminate, then clean
1750  *      up task resources (VM, port namespace, etc...) and then let the
1751  *      current thread go in the (practically empty) task context.
1752  */
1753 void
1754 task_complete_halt(task_t task)
1755 {
1756         task_lock(task);
1757         assert(task->halting);
1758         assert(task == current_task());
1759
1760         /*
1761          *      Wait for the other threads to get shut down.
1762          *      When the last other thread is reaped, we'll be
1763          *      woken up.
1764          */
1765         if (task->thread_count > 1) {
1766                 assert_wait((event_t)&task->halting, THREAD_UNINT);
1767                 task_unlock(task);
1768                 thread_block(THREAD_CONTINUE_NULL);
1769         } else {
1770                 task_unlock(task);
1771         }
1772
1773         /*
1774          *      Give the machine dependent code a chance
1775          *      to perform cleanup of task-level resources
1776          *      associated with the current thread before
1777          *      ripping apart the task.
1778          */
1779         machine_task_terminate(task);
1780
1781         /*
1782          *      Destroy all synchronizers owned by the task.
1783          */
1784         task_synchronizer_destroy_all(task);
1785
1786         /*
1787          *      Destroy the contents of the IPC space, leaving just
1788          *      a reference for it.
1789          */
1790         ipc_space_clean(task->itk_space);
1791
1792         /*
1793          * Clean out the address space, as we are going to be
1794          * getting a new one.
1795          */
1796         vm_map_remove(task->map, task->map->min_offset,
1797                       task->map->max_offset,
1798                       /* no unnesting on final cleanup: */
1799                       VM_MAP_REMOVE_NO_UNNESTING);
1800
1801         task->halting = FALSE;
1802 }
1803
1804 /*
1805  *      task_hold_locked:
1806  *
1807  *      Suspend execution of the specified task.
1808  *      This is a recursive-style suspension of the task, a count of
1809  *      suspends is maintained.
1810  *
1811  *      CONDITIONS: the task is locked and active.
1812  */
1813 void
1814 task_hold_locked(
1815         register task_t         task)
1816 {
1817         register thread_t       thread;
1818
1819         assert(task->active);
1820
1821         if (task->suspend_count++ > 0)
1822                 return;
1823
1824         /*
1825          *      Iterate through all the threads and hold them.
1826          */
1827         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1828                 thread_mtx_lock(thread);
1829                 thread_hold(thread);
1830                 thread_mtx_unlock(thread);
1831         }
1832 }
1833
1834 /*
1835  *      task_hold:
1836  *
1837  *      Same as the internal routine above, except that is must lock
1838  *      and verify that the task is active.  This differs from task_suspend
1839  *      in that it places a kernel hold on the task rather than just a
1840  *      user-level hold.  This keeps users from over resuming and setting
1841  *      it running out from under the kernel.
1842  *
1843  *      CONDITIONS: the caller holds a reference on the task
1844  */
1845 kern_return_t
1846 task_hold(
1847         register task_t         task)
1848 {
1849         if (task == TASK_NULL)
1850                 return (KERN_INVALID_ARGUMENT);
1851
1852         task_lock(task);
1853
1854         if (!task->active) {
1855                 task_unlock(task);
1856
1857                 return (KERN_FAILURE);
1858         }
1859
1860         task_hold_locked(task);
1861         task_unlock(task);
1862
1863         return (KERN_SUCCESS);
1864 }
1865
1866 kern_return_t
1867 task_wait(
1868                 task_t          task,
1869                 boolean_t       until_not_runnable)
1870 {
1871         if (task == TASK_NULL)
1872                 return (KERN_INVALID_ARGUMENT);
1873
1874         task_lock(task);
1875
1876         if (!task->active) {
1877                 task_unlock(task);
1878
1879                 return (KERN_FAILURE);
1880         }
1881
1882         task_wait_locked(task, until_not_runnable);
1883         task_unlock(task);
1884
1885         return (KERN_SUCCESS);
1886 }
1887
1888 /*
1889  *      task_wait_locked:
1890  *
1891  *      Wait for all threads in task to stop.
1892  *
1893  * Conditions:
1894  *      Called with task locked, active, and held.
1895  */
1896 void
1897 task_wait_locked(
1898         register task_t         task,
1899         boolean_t               until_not_runnable)
1900 {
1901         register thread_t       thread, self;
1902
1903         assert(task->active);
1904         assert(task->suspend_count > 0);
1905
1906         self = current_thread();
1907
1908         /*
1909          *      Iterate through all the threads and wait for them to
1910          *      stop.  Do not wait for the current thread if it is within
1911          *      the task.
1912          */
1913         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1914                 if (thread != self)
1915                         thread_wait(thread, until_not_runnable);
1916         }
1917 }
1918
1919 /*
1920  *      task_release_locked:
1921  *
1922  *      Release a kernel hold on a task.
1923  *
1924  *      CONDITIONS: the task is locked and active
1925  */
1926 void
1927 task_release_locked(
1928         register task_t         task)
1929 {
1930         register thread_t       thread;
1931
1932         assert(task->active);
1933         assert(task->suspend_count > 0);
1934
1935         if (--task->suspend_count > 0)
1936                 return;
1937
1938         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1939                 thread_mtx_lock(thread);
1940                 thread_release(thread);
1941                 thread_mtx_unlock(thread);
1942         }
1943 }
1944
1945 /*
1946  *      task_release:
1947  *
1948  *      Same as the internal routine above, except that it must lock
1949  *      and verify that the task is active.
1950  *
1951  *      CONDITIONS: The caller holds a reference to the task
1952  */
1953 kern_return_t
1954 task_release(
1955         task_t          task)
1956 {
1957         if (task == TASK_NULL)
1958                 return (KERN_INVALID_ARGUMENT);
1959
1960         task_lock(task);
1961
1962         if (!task->active) {
1963                 task_unlock(task);
1964
1965                 return (KERN_FAILURE);
1966         }
1967
1968         task_release_locked(task);
1969         task_unlock(task);
1970
1971         return (KERN_SUCCESS);
1972 }
1973
1974 kern_return_t
1975 task_threads(
1976         task_t                                  task,
1977         thread_act_array_t              *threads_out,
1978         mach_msg_type_number_t  *count)
1979 {
1980         mach_msg_type_number_t  actual;
1981         thread_t                                *thread_list;
1982         thread_t                                thread;
1983         vm_size_t                               size, size_needed;
1984         void                                    *addr;
1985         unsigned int                    i, j;
1986
1987         if (task == TASK_NULL)
1988                 return (KERN_INVALID_ARGUMENT);
1989
1990         size = 0; addr = NULL;
1991
1992         for (;;) {
1993                 task_lock(task);
1994                 if (!task->active) {
1995                         task_unlock(task);
1996
1997                         if (size != 0)
1998                                 kfree(addr, size);
1999
2000                         return (KERN_FAILURE);
2001                 }
2002
2003                 actual = task->thread_count;
2004
2005                 /* do we have the memory we need? */
2006                 size_needed = actual * sizeof (mach_port_t);
2007                 if (size_needed <= size)
2008                         break;
2009
2010                 /* unlock the task and allocate more memory */
2011                 task_unlock(task);
2012
2013                 if (size != 0)
2014                         kfree(addr, size);
2015
2016                 assert(size_needed > 0);
2017                 size = size_needed;
2018
2019                 addr = kalloc(size);
2020                 if (addr == 0)
2021                         return (KERN_RESOURCE_SHORTAGE);
2022         }
2023
2024         /* OK, have memory and the task is locked & active */
2025         thread_list = (thread_t *)addr;
2026
2027         i = j = 0;
2028
2029         for (thread = (thread_t)queue_first(&task->threads); i < actual;
2030                                 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
2031                 thread_reference_internal(thread);
2032                 thread_list[j++] = thread;
2033         }
2034
2035         assert(queue_end(&task->threads, (queue_entry_t)thread));
2036
2037         actual = j;
2038         size_needed = actual * sizeof (mach_port_t);
2039
2040         /* can unlock task now that we've got the thread refs */
2041         task_unlock(task);
2042
2043         if (actual == 0) {
2044                 /* no threads, so return null pointer and deallocate memory */
2045
2046                 *threads_out = NULL;
2047                 *count = 0;
2048
2049                 if (size != 0)
2050                         kfree(addr, size);
2051         }
2052         else {
2053                 /* if we allocated too much, must copy */
2054
2055                 if (size_needed < size) {
2056                         void *newaddr;
2057
2058                         newaddr = kalloc(size_needed);
2059                         if (newaddr == 0) {
2060                                 for (i = 0; i < actual; ++i)
2061                                         thread_deallocate(thread_list[i]);
2062                                 kfree(addr, size);
2063                                 return (KERN_RESOURCE_SHORTAGE);
2064                         }
2065
2066                         bcopy(addr, newaddr, size_needed);
2067                         kfree(addr, size);
2068                         thread_list = (thread_t *)newaddr;
2069                 }
2070
2071                 *threads_out = thread_list;
2072                 *count = actual;
2073
2074                 /* do the conversion that Mig should handle */
2075
2076                 for (i = 0; i < actual; ++i)
2077                         ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
2078         }
2079
2080         return (KERN_SUCCESS);
2081 }
2082
2083 #define TASK_HOLD_NORMAL        0
2084 #define TASK_HOLD_PIDSUSPEND    1
2085 #define TASK_HOLD_LEGACY        2
2086 #define TASK_HOLD_LEGACY_ALL    3
2087
2088 static kern_return_t
2089 place_task_hold    (
2090         register task_t task,
2091         int mode)
2092 {
2093         if (!task->active) {
2094                 return (KERN_FAILURE);
2095         }
2096
2097         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2098             MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
2099             task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2100             task->user_stop_count, task->user_stop_count + 1, 0);
2101
2102 #if MACH_ASSERT
2103         current_task()->suspends_outstanding++;
2104 #endif
2105
2106         if (mode == TASK_HOLD_LEGACY)
2107                 task->legacy_stop_count++;
2108
2109         if (task->user_stop_count++ > 0) {
2110                 /*
2111                  *      If the stop count was positive, the task is
2112                  *      already stopped and we can exit.
2113                  */
2114                 return (KERN_SUCCESS);
2115         }
2116
2117         /*
2118          * Put a kernel-level hold on the threads in the task (all
2119          * user-level task suspensions added together represent a
2120          * single kernel-level hold).  We then wait for the threads
2121          * to stop executing user code.
2122          */
2123         task_hold_locked(task);
2124         task_wait_locked(task, FALSE);
2125
2126         return (KERN_SUCCESS);
2127 }
2128
2129 static kern_return_t
2130 release_task_hold    (
2131         register task_t         task,
2132         int                     mode)
2133 {
2134         register boolean_t release = FALSE;
2135
2136         if (!task->active) {
2137                 return (KERN_FAILURE);
2138         }
2139
2140         if (mode == TASK_HOLD_PIDSUSPEND) {
2141             if (task->pidsuspended == FALSE) {
2142                     return (KERN_FAILURE);
2143             }
2144             task->pidsuspended = FALSE;
2145         }
2146
2147         if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
2148
2149                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2150                     MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
2151                     task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2152                     task->user_stop_count, mode, task->legacy_stop_count);
2153
2154 #if MACH_ASSERT
2155                 /*
2156                  * This is obviously not robust; if we suspend one task and then resume a different one,
2157                  * we'll fly under the radar. This is only meant to catch the common case of a crashed
2158                  * or buggy suspender.
2159                  */
2160                 current_task()->suspends_outstanding--;
2161 #endif
2162
2163                 if (mode == TASK_HOLD_LEGACY_ALL) {
2164                         if (task->legacy_stop_count >= task->user_stop_count) {
2165                                 task->user_stop_count = 0;
2166                                 release = TRUE;
2167                         } else {
2168                                 task->user_stop_count -= task->legacy_stop_count;
2169                         }
2170                         task->legacy_stop_count = 0;
2171                 } else {
2172                         if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
2173                                 task->legacy_stop_count--;
2174                         if (--task->user_stop_count == 0)
2175                                 release = TRUE;
2176                 }
2177         }
2178         else {
2179                 return (KERN_FAILURE);
2180         }
2181
2182         /*
2183          *      Release the task if necessary.
2184          */
2185         if (release)
2186                 task_release_locked(task);
2187
2188     return (KERN_SUCCESS);
2189 }
2190
2191
2192 /*
2193  *      task_suspend:
2194  *
2195  *      Implement an (old-fashioned) user-level suspension on a task.
2196  *
2197  *      Because the user isn't expecting to have to manage a suspension
2198  *      token, we'll track it for him in the kernel in the form of a naked
2199  *      send right to the task's resume port.  All such send rights
2200  *      account for a single suspension against the task (unlike task_suspend2()
2201  *      where each caller gets a unique suspension count represented by a
2202  *      unique send-once right).
2203  *
2204  * Conditions:
2205  *      The caller holds a reference to the task
2206  */
2207 kern_return_t
2208 task_suspend(
2209         register task_t         task)
2210 {
2211         kern_return_t                   kr;
2212         mach_port_t                     port, send, old_notify;
2213         mach_port_name_t                name;
2214
2215         if (task == TASK_NULL || task == kernel_task)
2216                 return (KERN_INVALID_ARGUMENT);
2217
2218         task_lock(task);
2219
2220         /*
2221          * Claim a send right on the task resume port, and request a no-senders
2222          * notification on that port (if none outstanding).
2223          */
2224         if (task->itk_resume == IP_NULL) {
2225                 task->itk_resume = ipc_port_alloc_kernel();
2226                 if (!IP_VALID(task->itk_resume))
2227                         panic("failed to create resume port");
2228                 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
2229         }
2230
2231         port = task->itk_resume;
2232         ip_lock(port);
2233         assert(ip_active(port));
2234
2235         send = ipc_port_make_send_locked(port);
2236         assert(IP_VALID(send));
2237
2238         if (port->ip_nsrequest == IP_NULL) {
2239                 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2240                 assert(old_notify == IP_NULL);
2241                 /* port unlocked */
2242         } else {
2243                 ip_unlock(port);
2244         }
2245
2246         /*
2247          * place a legacy hold on the task.
2248          */
2249         kr = place_task_hold(task, TASK_HOLD_LEGACY);
2250         if (kr != KERN_SUCCESS) {
2251                 task_unlock(task);
2252                 ipc_port_release_send(send);
2253                 return kr;
2254         }
2255
2256         task_unlock(task);
2257
2258         /*
2259          * Copyout the send right into the calling task's IPC space.  It won't know it is there,
2260          * but we'll look it up when calling a traditional resume.  Any IPC operations that
2261          * deallocate the send right will auto-release the suspension.
2262          */
2263         if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
2264                 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
2265                 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
2266                                 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2267                                 task_pid(task), kr);
2268                 return (kr);
2269         }
2270
2271         return (kr);
2272 }
2273
2274 /*
2275  *      task_resume:
2276  *              Release a user hold on a task.
2277  *
2278  * Conditions:
2279  *              The caller holds a reference to the task
2280  */
2281 kern_return_t
2282 task_resume(
2283         register task_t task)
2284 {
2285         kern_return_t    kr;
2286         mach_port_name_t resume_port_name;
2287         ipc_entry_t              resume_port_entry;
2288         ipc_space_t              space = current_task()->itk_space;
2289
2290         if (task == TASK_NULL || task == kernel_task )
2291                 return (KERN_INVALID_ARGUMENT);
2292
2293         /* release a legacy task hold */
2294         task_lock(task);
2295         kr = release_task_hold(task, TASK_HOLD_LEGACY);
2296         task_unlock(task);
2297
2298         is_write_lock(space);
2299         if (is_active(space) && IP_VALID(task->itk_resume) &&
2300             ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
2301                 /*
2302                  * We found a suspension token in the caller's IPC space. Release a send right to indicate that
2303                  * we are holding one less legacy hold on the task from this caller.  If the release failed,
2304                  * go ahead and drop all the rights, as someone either already released our holds or the task
2305                  * is gone.
2306                  */
2307                 if (kr == KERN_SUCCESS)
2308                         ipc_right_dealloc(space, resume_port_name, resume_port_entry);
2309                 else
2310                         ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
2311                 /* space unlocked */
2312         } else {
2313                 is_write_unlock(space);
2314                 if (kr == KERN_SUCCESS)
2315                         printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
2316                                proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2317                                task_pid(task));
2318         }
2319
2320         return kr;
2321 }
2322
2323 /*
2324  * Suspend the target task.
2325  * Making/holding a token/reference/port is the callers responsibility.
2326  */
2327 kern_return_t
2328 task_suspend_internal(task_t task)
2329 {
2330         kern_return_t    kr;
2331
2332         if (task == TASK_NULL || task == kernel_task)
2333                 return (KERN_INVALID_ARGUMENT);
2334
2335         task_lock(task);
2336         kr = place_task_hold(task, TASK_HOLD_NORMAL);
2337         task_unlock(task);
2338         return (kr);
2339 }
2340
2341 /*
2342  * Suspend the target task, and return a suspension token. The token
2343  * represents a reference on the suspended task.
2344  */
2345 kern_return_t
2346 task_suspend2(
2347         register task_t                 task,
2348         task_suspension_token_t *suspend_token)
2349 {
2350         kern_return_t    kr;
2351
2352         kr = task_suspend_internal(task);
2353         if (kr != KERN_SUCCESS) {
2354                 *suspend_token = TASK_NULL;
2355                 return (kr);
2356         }
2357
2358         /*
2359          * Take a reference on the target task and return that to the caller
2360          * as a "suspension token," which can be converted into an SO right to
2361          * the now-suspended task's resume port.
2362          */
2363         task_reference_internal(task);
2364         *suspend_token = task;
2365
2366         return (KERN_SUCCESS);
2367 }
2368
2369 /*
2370  * Resume the task
2371  * (reference/token/port management is caller's responsibility).
2372  */
2373 kern_return_t
2374 task_resume_internal(
2375         register task_suspension_token_t                task)
2376 {
2377         kern_return_t kr;
2378
2379         if (task == TASK_NULL || task == kernel_task)
2380                 return (KERN_INVALID_ARGUMENT);
2381
2382         task_lock(task);
2383         kr = release_task_hold(task, TASK_HOLD_NORMAL);
2384         task_unlock(task);
2385         return (kr);
2386 }
2387
2388 /*
2389  * Resume the task using a suspension token. Consumes the token's ref.
2390  */
2391 kern_return_t
2392 task_resume2(
2393         register task_suspension_token_t                task)
2394 {
2395         kern_return_t kr;
2396
2397         kr = task_resume_internal(task);
2398         task_suspension_token_deallocate(task);
2399
2400         return (kr);
2401 }
2402
2403 boolean_t
2404 task_suspension_notify(mach_msg_header_t *request_header)
2405 {
2406         ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
2407         task_t task = convert_port_to_task_suspension_token(port);
2408         mach_msg_type_number_t not_count;
2409
2410         if (task == TASK_NULL || task == kernel_task)
2411                 return TRUE;  /* nothing to do */
2412
2413         switch (request_header->msgh_id) {
2414
2415         case MACH_NOTIFY_SEND_ONCE:
2416                 /* release the hold held by this specific send-once right */
2417                 task_lock(task);
2418                 release_task_hold(task, TASK_HOLD_NORMAL);
2419                 task_unlock(task);
2420                 break;
2421
2422         case MACH_NOTIFY_NO_SENDERS:
2423                 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
2424
2425                 task_lock(task);
2426                 ip_lock(port);
2427                 if (port->ip_mscount == not_count) {
2428
2429                         /* release all the [remaining] outstanding legacy holds */
2430                         assert(port->ip_nsrequest == IP_NULL);
2431                         ip_unlock(port);
2432                         release_task_hold(task, TASK_HOLD_LEGACY_ALL);
2433                         task_unlock(task);
2434
2435                 } else if (port->ip_nsrequest == IP_NULL) {
2436                         ipc_port_t old_notify;
2437
2438                         task_unlock(task);
2439                         /* new send rights, re-arm notification at current make-send count */
2440                         ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2441                         assert(old_notify == IP_NULL);
2442                         /* port unlocked */
2443                 } else {
2444                         ip_unlock(port);
2445                         task_unlock(task);
2446                 }
2447                 break;
2448
2449         default:
2450                 break;
2451         }
2452
2453         task_suspension_token_deallocate(task); /* drop token reference */
2454         return TRUE;
2455 }
2456
2457 kern_return_t
2458 task_pidsuspend_locked(task_t task)
2459 {
2460         kern_return_t kr;
2461
2462         if (task->pidsuspended) {
2463                 kr = KERN_FAILURE;
2464                 goto out;
2465         }
2466
2467         task->pidsuspended = TRUE;
2468
2469         kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
2470         if (kr != KERN_SUCCESS) {
2471                 task->pidsuspended = FALSE;
2472         }
2473 out:
2474         return(kr);
2475 }
2476
2477
2478 /*
2479  *      task_pidsuspend:
2480  *
2481  *      Suspends a task by placing a hold on its threads.
2482  *
2483  * Conditions:
2484  *      The caller holds a reference to the task
2485  */
2486 kern_return_t
2487 task_pidsuspend(
2488         register task_t         task)
2489 {
2490         kern_return_t    kr;
2491
2492         if (task == TASK_NULL || task == kernel_task)
2493                 return (KERN_INVALID_ARGUMENT);
2494
2495         task_lock(task);
2496
2497         kr = task_pidsuspend_locked(task);
2498
2499         task_unlock(task);
2500
2501         return (kr);
2502 }
2503
2504 /* If enabled, we bring all the frozen pages back in prior to resumption; otherwise, they're faulted back in on demand */
2505 #define THAW_ON_RESUME 1
2506
2507 /*
2508  *      task_pidresume:
2509  *              Resumes a previously suspended task.
2510  *
2511  * Conditions:
2512  *              The caller holds a reference to the task
2513  */
2514 kern_return_t
2515 task_pidresume(
2516         register task_t task)
2517 {
2518         kern_return_t    kr;
2519
2520         if (task == TASK_NULL || task == kernel_task)
2521                 return (KERN_INVALID_ARGUMENT);
2522
2523         task_lock(task);
2524
2525 #if (CONFIG_FREEZE && THAW_ON_RESUME)
2526
2527         while (task->changing_freeze_state) {
2528
2529                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2530                 task_unlock(task);
2531                 thread_block(THREAD_CONTINUE_NULL);
2532
2533                 task_lock(task);
2534         }
2535         task->changing_freeze_state = TRUE;
2536 #endif
2537
2538         kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
2539
2540         task_unlock(task);
2541
2542 #if (CONFIG_FREEZE && THAW_ON_RESUME)
2543         if ((kr == KERN_SUCCESS) && (task->frozen == TRUE)) {
2544
2545                 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2546
2547                         kr = KERN_SUCCESS;
2548                 } else {
2549
2550                         kr = vm_map_thaw(task->map);
2551                 }
2552         }
2553         task_lock(task);
2554
2555         if (kr == KERN_SUCCESS)
2556                 task->frozen = FALSE;
2557         task->changing_freeze_state = FALSE;
2558         thread_wakeup(&task->changing_freeze_state);
2559
2560         task_unlock(task);
2561 #endif
2562
2563         return (kr);
2564 }
2565
2566 #if CONFIG_FREEZE
2567
2568 /*
2569  *      task_freeze:
2570  *
2571  *      Freeze a task.
2572  *
2573  * Conditions:
2574  *      The caller holds a reference to the task
2575  */
2576 extern void             vm_wake_compactor_swapper();
2577 extern queue_head_t     c_swapout_list_head;
2578
2579 kern_return_t
2580 task_freeze(
2581         register task_t    task,
2582         uint32_t           *purgeable_count,
2583         uint32_t           *wired_count,
2584         uint32_t           *clean_count,
2585         uint32_t           *dirty_count,
2586         uint32_t           dirty_budget,
2587         boolean_t          *shared,
2588         boolean_t          walk_only)
2589 {
2590         kern_return_t kr;
2591
2592         if (task == TASK_NULL || task == kernel_task)
2593                 return (KERN_INVALID_ARGUMENT);
2594
2595         task_lock(task);
2596
2597         while (task->changing_freeze_state) {
2598
2599                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2600                 task_unlock(task);
2601                 thread_block(THREAD_CONTINUE_NULL);
2602
2603                 task_lock(task);
2604         }
2605         if (task->frozen) {
2606                 task_unlock(task);
2607                 return (KERN_FAILURE);
2608         }
2609         task->changing_freeze_state = TRUE;
2610
2611         task_unlock(task);
2612
2613         if (walk_only) {
2614                 kr = vm_map_freeze_walk(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2615         } else {
2616                 kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2617         }
2618
2619         task_lock(task);
2620
2621         if (walk_only == FALSE && kr == KERN_SUCCESS)
2622                 task->frozen = TRUE;
2623         task->changing_freeze_state = FALSE;
2624         thread_wakeup(&task->changing_freeze_state);
2625
2626         task_unlock(task);
2627
2628         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2629                 vm_wake_compactor_swapper();
2630                 /*
2631                  * We do an explicit wakeup of the swapout thread here
2632                  * because the compact_and_swap routines don't have
2633                  * knowledge about these kind of "per-task packed c_segs"
2634                  * and so will not be evaluating whether we need to do
2635                  * a wakeup there.
2636                  */
2637                 thread_wakeup((event_t)&c_swapout_list_head);
2638         }
2639
2640         return (kr);
2641 }
2642
2643 /*
2644  *      task_thaw:
2645  *
2646  *      Thaw a currently frozen task.
2647  *
2648  * Conditions:
2649  *      The caller holds a reference to the task
2650  */
2651 kern_return_t
2652 task_thaw(
2653         register task_t         task)
2654 {
2655         kern_return_t kr;
2656
2657         if (task == TASK_NULL || task == kernel_task)
2658                 return (KERN_INVALID_ARGUMENT);
2659
2660         task_lock(task);
2661
2662         while (task->changing_freeze_state) {
2663
2664                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2665                 task_unlock(task);
2666                 thread_block(THREAD_CONTINUE_NULL);
2667
2668                 task_lock(task);
2669         }
2670         if (!task->frozen) {
2671                 task_unlock(task);
2672                 return (KERN_FAILURE);
2673         }
2674         task->changing_freeze_state = TRUE;
2675
2676         if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) {
2677                 task_unlock(task);
2678
2679                 kr = vm_map_thaw(task->map);
2680
2681                 task_lock(task);
2682
2683                 if (kr == KERN_SUCCESS)
2684                         task->frozen = FALSE;
2685         } else {
2686                 task->frozen = FALSE;
2687                 kr = KERN_SUCCESS;
2688         }
2689
2690         task->changing_freeze_state = FALSE;
2691         thread_wakeup(&task->changing_freeze_state);
2692
2693         task_unlock(task);
2694
2695         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2696                 vm_wake_compactor_swapper();
2697         }
2698
2699         return (kr);
2700 }
2701
2702 #endif /* CONFIG_FREEZE */
2703
2704 kern_return_t
2705 host_security_set_task_token(
2706         host_security_t  host_security,
2707         task_t           task,
2708         security_token_t sec_token,
2709         audit_token_t    audit_token,
2710         host_priv_t      host_priv)
2711 {
2712         ipc_port_t       host_port;
2713         kern_return_t    kr;
2714
2715         if (task == TASK_NULL)
2716                 return(KERN_INVALID_ARGUMENT);
2717
2718         if (host_security == HOST_NULL)
2719                 return(KERN_INVALID_SECURITY);
2720
2721         task_lock(task);
2722         task->sec_token = sec_token;
2723         task->audit_token = audit_token;
2724
2725         task_unlock(task);
2726
2727         if (host_priv != HOST_PRIV_NULL) {
2728                 kr = host_get_host_priv_port(host_priv, &host_port);
2729         } else {
2730                 kr = host_get_host_port(host_priv_self(), &host_port);
2731         }
2732         assert(kr == KERN_SUCCESS);
2733         kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
2734         return(kr);
2735 }
2736
2737 kern_return_t
2738 task_send_trace_memory(
2739         task_t        target_task,
2740         __unused uint32_t pid,
2741         __unused uint64_t uniqueid)
2742 {
2743         kern_return_t kr = KERN_INVALID_ARGUMENT;
2744         if (target_task == TASK_NULL)
2745                 return (KERN_INVALID_ARGUMENT);
2746
2747 #if CONFIG_ATM
2748         kr = atm_send_proc_inspect_notification(target_task,
2749                                   pid,
2750                                   uniqueid);
2751
2752 #endif
2753         return (kr);
2754 }
2755 /*
2756  * This routine was added, pretty much exclusively, for registering the
2757  * RPC glue vector for in-kernel short circuited tasks.  Rather than
2758  * removing it completely, I have only disabled that feature (which was
2759  * the only feature at the time).  It just appears that we are going to
2760  * want to add some user data to tasks in the future (i.e. bsd info,
2761  * task names, etc...), so I left it in the formal task interface.
2762  */
2763 kern_return_t
2764 task_set_info(
2765         task_t          task,
2766         task_flavor_t   flavor,
2767         __unused task_info_t    task_info_in,           /* pointer to IN array */
2768         __unused mach_msg_type_number_t task_info_count)
2769 {
2770         if (task == TASK_NULL)
2771                 return(KERN_INVALID_ARGUMENT);
2772
2773         switch (flavor) {
2774
2775 #if CONFIG_ATM
2776                 case TASK_TRACE_MEMORY_INFO:
2777                 {
2778                         if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
2779                                 return (KERN_INVALID_ARGUMENT);
2780
2781                         assert(task_info_in != NULL);
2782                         task_trace_memory_info_t mem_info;
2783                         mem_info = (task_trace_memory_info_t) task_info_in;
2784                         kern_return_t kr = atm_register_trace_memory(task,
2785                                                 mem_info->user_memory_address,
2786                                                 mem_info->buffer_size);
2787                         return kr;
2788                         break;
2789                 }
2790
2791 #endif
2792             default:
2793                 return (KERN_INVALID_ARGUMENT);
2794         }
2795         return (KERN_SUCCESS);
2796 }
2797
2798 int radar_20146450 = 1;
2799 kern_return_t
2800 task_info(
2801         task_t                  task,
2802         task_flavor_t           flavor,
2803         task_info_t             task_info_out,
2804         mach_msg_type_number_t  *task_info_count)
2805 {
2806         kern_return_t error = KERN_SUCCESS;
2807
2808         if (task == TASK_NULL)
2809                 return (KERN_INVALID_ARGUMENT);
2810
2811         task_lock(task);
2812
2813         if ((task != current_task()) && (!task->active)) {
2814                 task_unlock(task);
2815                 return (KERN_INVALID_ARGUMENT);
2816         }
2817
2818         switch (flavor) {
2819
2820         case TASK_BASIC_INFO_32:
2821         case TASK_BASIC2_INFO_32:
2822         {
2823                 task_basic_info_32_t    basic_info;
2824                 vm_map_t                                map;
2825                 clock_sec_t                             secs;
2826                 clock_usec_t                    usecs;
2827
2828                 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
2829                     error = KERN_INVALID_ARGUMENT;
2830                     break;
2831                 }
2832
2833                 basic_info = (task_basic_info_32_t)task_info_out;
2834
2835                 map = (task == kernel_task)? kernel_map: task->map;
2836                 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
2837                 if (flavor == TASK_BASIC2_INFO_32) {
2838                         /*
2839                          * The "BASIC2" flavor gets the maximum resident
2840                          * size instead of the current resident size...
2841                          */
2842                         basic_info->resident_size = pmap_resident_max(map->pmap);
2843                 } else {
2844                         basic_info->resident_size = pmap_resident_count(map->pmap);
2845                 }
2846                 basic_info->resident_size *= PAGE_SIZE;
2847
2848                 basic_info->policy = ((task != kernel_task)?
2849                                                                                   POLICY_TIMESHARE: POLICY_RR);
2850                 basic_info->suspend_count = task->user_stop_count;
2851
2852                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2853                 basic_info->user_time.seconds =
2854                         (typeof(basic_info->user_time.seconds))secs;
2855                 basic_info->user_time.microseconds = usecs;
2856
2857                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2858                 basic_info->system_time.seconds =
2859                         (typeof(basic_info->system_time.seconds))secs;
2860                 basic_info->system_time.microseconds = usecs;
2861
2862                 *task_info_count = TASK_BASIC_INFO_32_COUNT;
2863                 break;
2864         }
2865
2866         case TASK_BASIC_INFO_64:
2867         {
2868                 task_basic_info_64_t    basic_info;
2869                 vm_map_t                                map;
2870                 clock_sec_t                             secs;
2871                 clock_usec_t                    usecs;
2872
2873                 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
2874                     error = KERN_INVALID_ARGUMENT;
2875                     break;
2876                 }
2877
2878                 basic_info = (task_basic_info_64_t)task_info_out;
2879
2880                 map = (task == kernel_task)? kernel_map: task->map;
2881                 basic_info->virtual_size  = map->size;
2882                 basic_info->resident_size =
2883                         (mach_vm_size_t)(pmap_resident_count(map->pmap))
2884                         * PAGE_SIZE_64;
2885
2886                 basic_info->policy = ((task != kernel_task)?
2887                                                                                   POLICY_TIMESHARE: POLICY_RR);
2888                 basic_info->suspend_count = task->user_stop_count;
2889
2890                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2891                 basic_info->user_time.seconds =
2892                         (typeof(basic_info->user_time.seconds))secs;
2893                 basic_info->user_time.microseconds = usecs;
2894
2895                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2896                 basic_info->system_time.seconds =
2897                         (typeof(basic_info->system_time.seconds))secs;
2898                 basic_info->system_time.microseconds = usecs;
2899
2900                 *task_info_count = TASK_BASIC_INFO_64_COUNT;
2901                 break;
2902         }
2903
2904         case MACH_TASK_BASIC_INFO:
2905         {
2906                 mach_task_basic_info_t  basic_info;
2907                 vm_map_t                map;
2908                 clock_sec_t             secs;
2909                 clock_usec_t            usecs;
2910
2911                 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
2912                     error = KERN_INVALID_ARGUMENT;
2913                     break;
2914                 }
2915
2916                 basic_info = (mach_task_basic_info_t)task_info_out;
2917
2918                 map = (task == kernel_task) ? kernel_map : task->map;
2919
2920                 basic_info->virtual_size  = map->size;
2921
2922                 basic_info->resident_size =
2923                     (mach_vm_size_t)(pmap_resident_count(map->pmap));
2924                 basic_info->resident_size *= PAGE_SIZE_64;
2925
2926                 basic_info->resident_size_max =
2927                     (mach_vm_size_t)(pmap_resident_max(map->pmap));
2928                 basic_info->resident_size_max *= PAGE_SIZE_64;
2929
2930                 basic_info->policy = ((task != kernel_task) ?
2931                                       POLICY_TIMESHARE : POLICY_RR);
2932
2933                 basic_info->suspend_count = task->user_stop_count;
2934
2935                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2936                 basic_info->user_time.seconds =
2937                     (typeof(basic_info->user_time.seconds))secs;
2938                 basic_info->user_time.microseconds = usecs;
2939
2940                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2941                 basic_info->system_time.seconds =
2942                     (typeof(basic_info->system_time.seconds))secs;
2943                 basic_info->system_time.microseconds = usecs;
2944
2945                 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
2946                 break;
2947         }
2948
2949         case TASK_THREAD_TIMES_INFO:
2950         {
2951                 register task_thread_times_info_t       times_info;
2952                 register thread_t                                       thread;
2953
2954                 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
2955                     error = KERN_INVALID_ARGUMENT;
2956                     break;
2957                 }
2958
2959                 times_info = (task_thread_times_info_t) task_info_out;
2960                 times_info->user_time.seconds = 0;
2961                 times_info->user_time.microseconds = 0;
2962                 times_info->system_time.seconds = 0;
2963                 times_info->system_time.microseconds = 0;
2964
2965
2966                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2967                         time_value_t    user_time, system_time;
2968
2969                         if (thread->options & TH_OPT_IDLE_THREAD)
2970                                 continue;
2971
2972                         thread_read_times(thread, &user_time, &system_time);
2973
2974                         time_value_add(&times_info->user_time, &user_time);
2975                         time_value_add(&times_info->system_time, &system_time);
2976                 }
2977
2978                 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
2979                 break;
2980         }
2981
2982         case TASK_ABSOLUTETIME_INFO:
2983         {
2984                 task_absolutetime_info_t        info;
2985                 register thread_t                       thread;
2986
2987                 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
2988                         error = KERN_INVALID_ARGUMENT;
2989                         break;
2990                 }
2991
2992                 info = (task_absolutetime_info_t)task_info_out;
2993                 info->threads_user = info->threads_system = 0;
2994
2995
2996                 info->total_user = task->total_user_time;
2997                 info->total_system = task->total_system_time;
2998
2999                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3000                         uint64_t        tval;
3001                         spl_t           x;
3002
3003                         if (thread->options & TH_OPT_IDLE_THREAD)
3004                                 continue;
3005
3006                         x = splsched();
3007                         thread_lock(thread);
3008
3009                         tval = timer_grab(&thread->user_timer);
3010                         info->threads_user += tval;
3011                         info->total_user += tval;
3012
3013                         tval = timer_grab(&thread->system_timer);
3014                         if (thread->precise_user_kernel_time) {
3015                                 info->threads_system += tval;
3016                                 info->total_system += tval;
3017                         } else {
3018                                 /* system_timer may represent either sys or user */
3019                                 info->threads_user += tval;
3020                                 info->total_user += tval;
3021                         }
3022
3023                         thread_unlock(thread);
3024                         splx(x);
3025                 }
3026
3027
3028                 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
3029                 break;
3030         }
3031
3032         case TASK_DYLD_INFO:
3033         {
3034                 task_dyld_info_t info;
3035
3036                 /*
3037                  * We added the format field to TASK_DYLD_INFO output.  For
3038                  * temporary backward compatibility, accept the fact that
3039                  * clients may ask for the old version - distinquished by the
3040                  * size of the expected result structure.
3041                  */
3042 #define TASK_LEGACY_DYLD_INFO_COUNT \
3043                 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
3044
3045                 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
3046                         error = KERN_INVALID_ARGUMENT;
3047                         break;
3048                 }
3049
3050                 info = (task_dyld_info_t)task_info_out;
3051                 info->all_image_info_addr = task->all_image_info_addr;
3052                 info->all_image_info_size = task->all_image_info_size;
3053
3054                 /* only set format on output for those expecting it */
3055                 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
3056                         info->all_image_info_format = task_has_64BitAddr(task) ?
3057                                                  TASK_DYLD_ALL_IMAGE_INFO_64 :
3058                                                  TASK_DYLD_ALL_IMAGE_INFO_32 ;
3059                         *task_info_count = TASK_DYLD_INFO_COUNT;
3060                 } else {
3061                         *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
3062                 }
3063                 break;
3064         }
3065
3066         case TASK_EXTMOD_INFO:
3067         {
3068                 task_extmod_info_t info;
3069                 void *p;
3070
3071                 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
3072                         error = KERN_INVALID_ARGUMENT;
3073                         break;
3074                 }
3075
3076                 info = (task_extmod_info_t)task_info_out;
3077
3078                 p = get_bsdtask_info(task);
3079                 if (p) {
3080                         proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
3081                 } else {
3082                         bzero(info->task_uuid, sizeof(info->task_uuid));
3083                 }
3084                 info->extmod_statistics = task->extmod_statistics;
3085                 *task_info_count = TASK_EXTMOD_INFO_COUNT;
3086
3087                 break;
3088         }
3089
3090         case TASK_KERNELMEMORY_INFO:
3091         {
3092                 task_kernelmemory_info_t        tkm_info;
3093                 ledger_amount_t                 credit, debit;
3094
3095                 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
3096                    error = KERN_INVALID_ARGUMENT;
3097                    break;
3098                 }
3099
3100                 tkm_info = (task_kernelmemory_info_t) task_info_out;
3101                 tkm_info->total_palloc = 0;
3102                 tkm_info->total_pfree = 0;
3103                 tkm_info->total_salloc = 0;
3104                 tkm_info->total_sfree = 0;
3105
3106                 if (task == kernel_task) {
3107                         /*
3108                          * All shared allocs/frees from other tasks count against
3109                          * the kernel private memory usage.  If we are looking up
3110                          * info for the kernel task, gather from everywhere.
3111                          */
3112                         task_unlock(task);
3113
3114                         /* start by accounting for all the terminated tasks against the kernel */
3115                         tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
3116                         tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
3117
3118                         /* count all other task/thread shared alloc/free against the kernel */
3119                         lck_mtx_lock(&tasks_threads_lock);
3120
3121                         /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
3122                         queue_iterate(&tasks, task, task_t, tasks) {
3123                                 if (task == kernel_task) {
3124                                         if (ledger_get_entries(task->ledger,
3125                                             task_ledgers.tkm_private, &credit,
3126                                             &debit) == KERN_SUCCESS) {
3127                                                 tkm_info->total_palloc += credit;
3128                                                 tkm_info->total_pfree += debit;
3129                                         }
3130                                 }
3131                                 if (!ledger_get_entries(task->ledger,
3132                                     task_ledgers.tkm_shared, &credit, &debit)) {
3133                                         tkm_info->total_palloc += credit;
3134                                         tkm_info->total_pfree += debit;
3135                                 }
3136                         }
3137                         lck_mtx_unlock(&tasks_threads_lock);
3138                 } else {
3139                         if (!ledger_get_entries(task->ledger,
3140                             task_ledgers.tkm_private, &credit, &debit)) {
3141                                 tkm_info->total_palloc = credit;
3142                                 tkm_info->total_pfree = debit;
3143                         }
3144                         if (!ledger_get_entries(task->ledger,
3145                             task_ledgers.tkm_shared, &credit, &debit)) {
3146                                 tkm_info->total_salloc = credit;
3147                                 tkm_info->total_sfree = debit;
3148                         }
3149                         task_unlock(task);
3150                 }
3151
3152                 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
3153                 return KERN_SUCCESS;
3154         }
3155
3156         /* OBSOLETE */
3157         case TASK_SCHED_FIFO_INFO:
3158         {
3159
3160                 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
3161                         error = KERN_INVALID_ARGUMENT;
3162                         break;
3163                 }
3164
3165                 error = KERN_INVALID_POLICY;
3166                 break;
3167         }
3168
3169         /* OBSOLETE */
3170         case TASK_SCHED_RR_INFO:
3171         {
3172                 register policy_rr_base_t       rr_base;
3173                 uint32_t quantum_time;
3174                 uint64_t quantum_ns;
3175
3176                 if (*task_info_count < POLICY_RR_BASE_COUNT) {
3177                         error = KERN_INVALID_ARGUMENT;
3178                         break;
3179                 }
3180
3181                 rr_base = (policy_rr_base_t) task_info_out;
3182
3183                 if (task != kernel_task) {
3184                         error = KERN_INVALID_POLICY;
3185                         break;
3186                 }
3187
3188                 rr_base->base_priority = task->priority;
3189
3190                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
3191                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
3192
3193                 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
3194
3195                 *task_info_count = POLICY_RR_BASE_COUNT;
3196                 break;
3197         }
3198
3199         /* OBSOLETE */
3200         case TASK_SCHED_TIMESHARE_INFO:
3201         {
3202                 register policy_timeshare_base_t        ts_base;
3203
3204                 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
3205                         error = KERN_INVALID_ARGUMENT;
3206                         break;
3207                 }
3208
3209                 ts_base = (policy_timeshare_base_t) task_info_out;
3210
3211                 if (task == kernel_task) {
3212                         error = KERN_INVALID_POLICY;
3213                         break;
3214                 }
3215
3216                 ts_base->base_priority = task->priority;
3217
3218                 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
3219                 break;
3220         }
3221
3222         case TASK_SECURITY_TOKEN:
3223         {
3224                 register security_token_t       *sec_token_p;
3225
3226                 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
3227                     error = KERN_INVALID_ARGUMENT;
3228                     break;
3229                 }
3230
3231                 sec_token_p = (security_token_t *) task_info_out;
3232
3233                 *sec_token_p = task->sec_token;
3234
3235                 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
3236                 break;
3237         }
3238
3239         case TASK_AUDIT_TOKEN:
3240         {
3241                 register audit_token_t  *audit_token_p;
3242
3243                 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
3244                     error = KERN_INVALID_ARGUMENT;
3245                     break;
3246                 }
3247
3248                 audit_token_p = (audit_token_t *) task_info_out;
3249
3250                 *audit_token_p = task->audit_token;
3251
3252                 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
3253                 break;
3254         }
3255
3256         case TASK_SCHED_INFO:
3257                 error = KERN_INVALID_ARGUMENT;
3258                 break;
3259
3260         case TASK_EVENTS_INFO:
3261         {
3262                 register task_events_info_t     events_info;
3263                 register thread_t                       thread;
3264
3265                 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
3266                    error = KERN_INVALID_ARGUMENT;
3267                    break;
3268                 }
3269
3270                 events_info = (task_events_info_t) task_info_out;
3271
3272
3273                 events_info->faults = task->faults;
3274                 events_info->pageins = task->pageins;
3275                 events_info->cow_faults = task->cow_faults;
3276                 events_info->messages_sent = task->messages_sent;
3277                 events_info->messages_received = task->messages_received;
3278                 events_info->syscalls_mach = task->syscalls_mach;
3279                 events_info->syscalls_unix = task->syscalls_unix;
3280
3281                 events_info->csw = task->c_switch;
3282
3283                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3284                         events_info->csw           += thread->c_switch;
3285                         events_info->syscalls_mach += thread->syscalls_mach;
3286                         events_info->syscalls_unix += thread->syscalls_unix;
3287                 }
3288
3289
3290                 *task_info_count = TASK_EVENTS_INFO_COUNT;
3291                 break;
3292         }
3293         case TASK_AFFINITY_TAG_INFO:
3294         {
3295                 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
3296                     error = KERN_INVALID_ARGUMENT;
3297                     break;
3298                 }
3299
3300                 error = task_affinity_info(task, task_info_out, task_info_count);
3301                 break;
3302         }
3303         case TASK_POWER_INFO:
3304         {
3305                 if (*task_info_count < TASK_POWER_INFO_COUNT) {
3306                         error = KERN_INVALID_ARGUMENT;
3307                         break;
3308                 }
3309
3310                 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL);
3311                 break;
3312         }
3313
3314         case TASK_POWER_INFO_V2:
3315         {
3316                 if (*task_info_count < TASK_POWER_INFO_V2_COUNT) {
3317                         error = KERN_INVALID_ARGUMENT;
3318                         break;
3319                 }
3320                 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
3321                 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy);
3322                 break;
3323         }
3324
3325         case TASK_VM_INFO:
3326         case TASK_VM_INFO_PURGEABLE:
3327         {
3328                 task_vm_info_t          vm_info;
3329                 vm_map_t                map;
3330
3331                 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
3332                     error = KERN_INVALID_ARGUMENT;
3333                     break;
3334                 }
3335
3336                 vm_info = (task_vm_info_t)task_info_out;
3337
3338                 if (task == kernel_task) {
3339                         map = kernel_map;
3340                         /* no lock */
3341                 } else {
3342                         map = task->map;
3343                         vm_map_lock_read(map);
3344                 }
3345
3346                 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
3347                 vm_info->region_count = map->hdr.nentries;
3348                 vm_info->page_size = vm_map_page_size(map);
3349
3350                 vm_info->resident_size = pmap_resident_count(map->pmap);
3351                 vm_info->resident_size *= PAGE_SIZE;
3352                 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
3353                 vm_info->resident_size_peak *= PAGE_SIZE;
3354
3355 #define _VM_INFO(_name) \
3356         vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
3357
3358                 _VM_INFO(device);
3359                 _VM_INFO(device_peak);
3360                 _VM_INFO(external);
3361                 _VM_INFO(external_peak);
3362                 _VM_INFO(internal);
3363                 _VM_INFO(internal_peak);
3364                 _VM_INFO(reusable);
3365                 _VM_INFO(reusable_peak);
3366                 _VM_INFO(compressed);
3367                 _VM_INFO(compressed_peak);
3368                 _VM_INFO(compressed_lifetime);
3369
3370                 vm_info->purgeable_volatile_pmap = 0;
3371                 vm_info->purgeable_volatile_resident = 0;
3372                 vm_info->purgeable_volatile_virtual = 0;
3373                 if (task == kernel_task) {
3374                         /*
3375                          * We do not maintain the detailed stats for the
3376                          * kernel_pmap, so just count everything as
3377                          * "internal"...
3378                          */
3379                         vm_info->internal = vm_info->resident_size;
3380                         /*
3381                          * ... but since the memory held by the VM compressor
3382                          * in the kernel address space ought to be attributed
3383                          * to user-space tasks, we subtract it from "internal"
3384                          * to give memory reporting tools a more accurate idea
3385                          * of what the kernel itself is actually using, instead
3386                          * of making it look like the kernel is leaking memory
3387                          * when the system is under memory pressure.
3388                          */
3389                         vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
3390                                               PAGE_SIZE);
3391                 } else {
3392                         mach_vm_size_t  volatile_virtual_size;
3393                         mach_vm_size_t  volatile_resident_size;
3394                         mach_vm_size_t  volatile_compressed_size;
3395                         mach_vm_size_t  volatile_pmap_size;
3396                         mach_vm_size_t  volatile_compressed_pmap_size;
3397                         kern_return_t   kr;
3398
3399                         if (flavor == TASK_VM_INFO_PURGEABLE) {
3400                                 kr = vm_map_query_volatile(
3401                                         map,
3402                                         &volatile_virtual_size,
3403                                         &volatile_resident_size,
3404                                         &volatile_compressed_size,
3405                                         &volatile_pmap_size,
3406                                         &volatile_compressed_pmap_size);
3407                                 if (kr == KERN_SUCCESS) {
3408                                         vm_info->purgeable_volatile_pmap =
3409                                                 volatile_pmap_size;
3410                                         if (radar_20146450) {
3411                                         vm_info->compressed -=
3412                                                 volatile_compressed_pmap_size;
3413                                         }
3414                                         vm_info->purgeable_volatile_resident =
3415                                                 volatile_resident_size;
3416                                         vm_info->purgeable_volatile_virtual =
3417                                                 volatile_virtual_size;
3418                                 }
3419                         }
3420                         vm_map_unlock_read(map);
3421                 }
3422
3423                 if (*task_info_count >= TASK_VM_INFO_COUNT) {
3424                         vm_info->phys_footprint = 0;
3425                         *task_info_count = TASK_VM_INFO_COUNT;
3426                 } else {
3427                         *task_info_count = TASK_VM_INFO_REV0_COUNT;
3428                 }
3429
3430                 break;
3431         }
3432
3433         case TASK_WAIT_STATE_INFO:
3434         {
3435                 /*
3436                  * Deprecated flavor. Currently allowing some results until all users
3437                  * stop calling it. The results may not be accurate.
3438          */
3439                 task_wait_state_info_t  wait_state_info;
3440                 uint64_t total_sfi_ledger_val = 0;
3441
3442                 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
3443                    error = KERN_INVALID_ARGUMENT;
3444                    break;
3445                 }
3446
3447                 wait_state_info = (task_wait_state_info_t) task_info_out;
3448
3449                 wait_state_info->total_wait_state_time = 0;
3450                 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
3451
3452 #if CONFIG_SCHED_SFI
3453                 int i, prev_lentry = -1;
3454                 int64_t  val_credit, val_debit;
3455
3456                 for (i = 0; i < MAX_SFI_CLASS_ID; i++){
3457                         val_credit =0;
3458                         /*
3459                          * checking with prev_lentry != entry ensures adjacent classes
3460                          * which share the same ledger do not add wait times twice.
3461                          * Note: Use ledger() call to get data for each individual sfi class.
3462                          */
3463                         if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
3464                                 KERN_SUCCESS == ledger_get_entries(task->ledger,
3465                                                 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
3466                                 total_sfi_ledger_val += val_credit;
3467                         }
3468                         prev_lentry = task_ledgers.sfi_wait_times[i];
3469                 }
3470
3471 #endif /* CONFIG_SCHED_SFI */
3472                 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
3473                 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
3474
3475                 break;
3476         }
3477         case TASK_VM_INFO_PURGEABLE_ACCOUNT:
3478         {
3479 #if DEVELOPMENT || DEBUG
3480                 pvm_account_info_t      acnt_info;
3481
3482                 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
3483                         error = KERN_INVALID_ARGUMENT;
3484                         break;
3485                 }
3486
3487                 if (task_info_out == NULL) {
3488                         error = KERN_INVALID_ARGUMENT;
3489                         break;
3490                 }
3491
3492                 acnt_info = (pvm_account_info_t) task_info_out;
3493
3494                 error = vm_purgeable_account(task, acnt_info);
3495
3496                 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
3497
3498                 break;
3499 #else /* DEVELOPMENT || DEBUG */
3500                 error = KERN_NOT_SUPPORTED;
3501                 break;
3502 #endif /* DEVELOPMENT || DEBUG */
3503         }
3504         case TASK_FLAGS_INFO:
3505         {
3506                 task_flags_info_t               flags_info;
3507
3508                 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
3509                     error = KERN_INVALID_ARGUMENT;
3510                     break;
3511                 }
3512
3513                 flags_info = (task_flags_info_t)task_info_out;
3514
3515                 /* only publish the 64-bit flag of the task */
3516                 flags_info->flags = task->t_flags & TF_64B_ADDR;
3517
3518                 *task_info_count = TASK_FLAGS_INFO_COUNT;
3519                 break;
3520         }
3521
3522         case TASK_DEBUG_INFO_INTERNAL:
3523         {
3524 #if DEVELOPMENT || DEBUG
3525                 task_debug_info_internal_t dbg_info;
3526                 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
3527                         error = KERN_NOT_SUPPORTED;
3528                         break;
3529                 }
3530
3531                 if (task_info_out == NULL) {
3532                         error = KERN_INVALID_ARGUMENT;
3533                         break;
3534                 }
3535                 dbg_info = (task_debug_info_internal_t) task_info_out;
3536                 dbg_info->ipc_space_size = 0;
3537                 if (task->itk_space){
3538                         dbg_info->ipc_space_size = task->itk_space->is_table_size;
3539                 }
3540
3541                 error = KERN_SUCCESS;
3542                 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
3543                 break;
3544 #else /* DEVELOPMENT || DEBUG */
3545                 error = KERN_NOT_SUPPORTED;
3546                 break;
3547 #endif /* DEVELOPMENT || DEBUG */
3548         }
3549         default:
3550                 error = KERN_INVALID_ARGUMENT;
3551         }
3552
3553         task_unlock(task);
3554         return (error);
3555 }
3556
3557 /*
3558  *      task_power_info
3559  *
3560  *      Returns power stats for the task.
3561  *      Note: Called with task locked.
3562  */
3563 void
3564 task_power_info_locked(
3565         task_t                  task,
3566         task_power_info_t       info,
3567         gpu_energy_data_t       ginfo)
3568 {
3569         thread_t                thread;
3570         ledger_amount_t         tmp;
3571
3572         task_lock_assert_owned(task);
3573
3574         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
3575                 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
3576         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
3577                 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
3578
3579         info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
3580         info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
3581
3582         info->total_user = task->total_user_time;
3583         info->total_system = task->total_system_time;
3584
3585         if (ginfo) {
3586                 ginfo->task_gpu_utilisation = task->task_gpu_ns;
3587         }
3588
3589         queue_iterate(&task->threads, thread, thread_t, task_threads) {
3590                 uint64_t        tval;
3591                 spl_t           x;
3592
3593                 if (thread->options & TH_OPT_IDLE_THREAD)
3594                         continue;
3595
3596                 x = splsched();
3597                 thread_lock(thread);
3598
3599                 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
3600                 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
3601
3602                 tval = timer_grab(&thread->user_timer);
3603                 info->total_user += tval;
3604
3605                 tval = timer_grab(&thread->system_timer);
3606                 if (thread->precise_user_kernel_time) {
3607                         info->total_system += tval;
3608                 } else {
3609                         /* system_timer may represent either sys or user */
3610                         info->total_user += tval;
3611                 }
3612
3613                 if (ginfo) {
3614                         ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
3615                 }
3616                 thread_unlock(thread);
3617                 splx(x);
3618         }
3619 }
3620
3621 /*
3622  *      task_gpu_utilisation
3623  *
3624  *      Returns the total gpu time used by the all the threads of the task
3625  *  (both dead and alive)
3626  */
3627 uint64_t
3628 task_gpu_utilisation(
3629         task_t  task)
3630 {
3631         uint64_t gpu_time = 0;
3632         thread_t thread;
3633
3634         task_lock(task);
3635         gpu_time += task->task_gpu_ns;
3636
3637         queue_iterate(&task->threads, thread, thread_t, task_threads) {
3638                 spl_t x;
3639                 x = splsched();
3640                 thread_lock(thread);
3641                 gpu_time += ml_gpu_stat(thread);
3642                 thread_unlock(thread);
3643                 splx(x);
3644         }
3645
3646         task_unlock(task);
3647         return gpu_time;
3648 }
3649
3650 kern_return_t
3651 task_purgable_info(
3652         task_t                  task,
3653         task_purgable_info_t    *stats)
3654 {
3655         if (task == TASK_NULL || stats == NULL)
3656                 return KERN_INVALID_ARGUMENT;
3657         /* Take task reference */
3658         task_reference(task);
3659         vm_purgeable_stats((vm_purgeable_info_t)stats, task);
3660         /* Drop task reference */
3661         task_deallocate(task);
3662         return KERN_SUCCESS;
3663 }
3664
3665 void
3666 task_vtimer_set(
3667         task_t          task,
3668         integer_t       which)
3669 {
3670         thread_t        thread;
3671         spl_t           x;
3672
3673         /* assert(task == current_task()); */ /* bogus assert 4803227 4807483 */
3674
3675         task_lock(task);
3676
3677         task->vtimers |= which;
3678
3679         switch (which) {
3680
3681         case TASK_VTIMER_USER:
3682                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3683                         x = splsched();
3684                         thread_lock(thread);
3685                         if (thread->precise_user_kernel_time)
3686                                 thread->vtimer_user_save = timer_grab(&thread->user_timer);
3687                         else
3688                                 thread->vtimer_user_save = timer_grab(&thread->system_timer);
3689                         thread_unlock(thread);
3690                         splx(x);
3691                 }
3692                 break;
3693
3694         case TASK_VTIMER_PROF:
3695                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3696                         x = splsched();
3697                         thread_lock(thread);
3698                         thread->vtimer_prof_save = timer_grab(&thread->user_timer);
3699                         thread->vtimer_prof_save += timer_grab(&thread->system_timer);
3700                         thread_unlock(thread);
3701                         splx(x);
3702                 }
3703                 break;
3704
3705         case TASK_VTIMER_RLIM:
3706                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3707                         x = splsched();
3708                         thread_lock(thread);
3709                         thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
3710                         thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
3711                         thread_unlock(thread);
3712                         splx(x);
3713                 }
3714                 break;
3715         }
3716
3717         task_unlock(task);
3718 }
3719
3720 void
3721 task_vtimer_clear(
3722         task_t          task,
3723         integer_t       which)
3724 {
3725         assert(task == current_task());
3726
3727         task_lock(task);
3728
3729         task->vtimers &= ~which;
3730
3731         task_unlock(task);
3732 }
3733
3734 void
3735 task_vtimer_update(
3736 __unused
3737         task_t          task,
3738         integer_t       which,
3739         uint32_t        *microsecs)
3740 {
3741         thread_t        thread = current_thread();
3742         uint32_t        tdelt;
3743         clock_sec_t     secs;
3744         uint64_t        tsum;
3745
3746         assert(task == current_task());
3747
3748         assert(task->vtimers & which);
3749
3750         secs = tdelt = 0;
3751
3752         switch (which) {
3753
3754         case TASK_VTIMER_USER:
3755                 if (thread->precise_user_kernel_time) {
3756                         tdelt = (uint32_t)timer_delta(&thread->user_timer,
3757                                                                 &thread->vtimer_user_save);
3758                 } else {
3759                         tdelt = (uint32_t)timer_delta(&thread->system_timer,
3760                                                                 &thread->vtimer_user_save);
3761                 }
3762                 absolutetime_to_microtime(tdelt, &secs, microsecs);
3763                 break;
3764
3765         case TASK_VTIMER_PROF:
3766                 tsum = timer_grab(&thread->user_timer);
3767                 tsum += timer_grab(&thread->system_timer);
3768                 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
3769                 absolutetime_to_microtime(tdelt, &secs, microsecs);
3770                 /* if the time delta is smaller than a usec, ignore */
3771                 if (*microsecs != 0)
3772                         thread->vtimer_prof_save = tsum;
3773                 break;
3774
3775         case TASK_VTIMER_RLIM:
3776                 tsum = timer_grab(&thread->user_timer);
3777                 tsum += timer_grab(&thread->system_timer);
3778                 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
3779                 thread->vtimer_rlim_save = tsum;
3780                 absolutetime_to_microtime(tdelt, &secs, microsecs);
3781                 break;
3782         }
3783
3784 }
3785
3786 /*
3787  *      task_assign:
3788  *
3789  *      Change the assigned processor set for the task
3790  */
3791 kern_return_t
3792 task_assign(
3793         __unused task_t         task,
3794         __unused processor_set_t        new_pset,
3795         __unused boolean_t      assign_threads)
3796 {
3797         return(KERN_FAILURE);
3798 }
3799
3800 /*
3801  *      task_assign_default:
3802  *
3803  *      Version of task_assign to assign to default processor set.
3804  */
3805 kern_return_t
3806 task_assign_default(
3807         task_t          task,
3808         boolean_t       assign_threads)
3809 {
3810     return (task_assign(task, &pset0, assign_threads));
3811 }
3812
3813 /*
3814  *      task_get_assignment
3815  *
3816  *      Return name of processor set that task is assigned to.
3817  */
3818 kern_return_t
3819 task_get_assignment(
3820         task_t          task,
3821         processor_set_t *pset)
3822 {
3823         if (!task || !task->active)
3824                 return KERN_FAILURE;
3825
3826         *pset = &pset0;
3827
3828         return KERN_SUCCESS;
3829 }
3830
3831 uint64_t
3832 get_task_dispatchqueue_offset(
3833                 task_t          task)
3834 {
3835         return task->dispatchqueue_offset;
3836 }
3837
3838 /*
3839  *      task_policy
3840  *
3841  *      Set scheduling policy and parameters, both base and limit, for
3842  *      the given task. Policy must be a policy which is enabled for the
3843  *      processor set. Change contained threads if requested.
3844  */
3845 kern_return_t
3846 task_policy(
3847         __unused task_t                 task,
3848         __unused policy_t                       policy_id,
3849         __unused policy_base_t          base,
3850         __unused mach_msg_type_number_t count,
3851         __unused boolean_t                      set_limit,
3852         __unused boolean_t                      change)
3853 {
3854         return(KERN_FAILURE);
3855 }
3856
3857 /*
3858  *      task_set_policy
3859  *
3860  *      Set scheduling policy and parameters, both base and limit, for
3861  *      the given task. Policy can be any policy implemented by the
3862  *      processor set, whether enabled or not. Change contained threads
3863  *      if requested.
3864  */
3865 kern_return_t
3866 task_set_policy(
3867         __unused task_t                 task,
3868         __unused processor_set_t                pset,
3869         __unused policy_t                       policy_id,
3870         __unused policy_base_t          base,
3871         __unused mach_msg_type_number_t base_count,
3872         __unused policy_limit_t         limit,
3873         __unused mach_msg_type_number_t limit_count,
3874         __unused boolean_t                      change)
3875 {
3876         return(KERN_FAILURE);
3877 }
3878
3879 kern_return_t
3880 task_set_ras_pc(
3881         __unused task_t task,
3882         __unused vm_offset_t    pc,
3883         __unused vm_offset_t    endpc)
3884 {
3885         return KERN_FAILURE;
3886 }
3887
3888 void
3889 task_synchronizer_destroy_all(task_t task)
3890 {
3891         /*
3892          *  Destroy owned semaphores
3893          */
3894         semaphore_destroy_all(task);
3895 }
3896
3897 /*
3898  * Install default (machine-dependent) initial thread state
3899  * on the task.  Subsequent thread creation will have this initial
3900  * state set on the thread by machine_thread_inherit_taskwide().
3901  * Flavors and structures are exactly the same as those to thread_set_state()
3902  */
3903 kern_return_t
3904 task_set_state(
3905         task_t task,
3906         int flavor,
3907         thread_state_t state,
3908         mach_msg_type_number_t state_count)
3909 {
3910         kern_return_t ret;
3911
3912         if (task == TASK_NULL) {
3913                 return (KERN_INVALID_ARGUMENT);
3914         }
3915
3916         task_lock(task);
3917
3918         if (!task->active) {
3919                 task_unlock(task);
3920                 return (KERN_FAILURE);
3921         }
3922
3923         ret = machine_task_set_state(task, flavor, state, state_count);
3924
3925         task_unlock(task);
3926         return ret;
3927 }
3928
3929 /*
3930  * Examine the default (machine-dependent) initial thread state
3931  * on the task, as set by task_set_state().  Flavors and structures
3932  * are exactly the same as those passed to thread_get_state().
3933  */
3934 kern_return_t
3935 task_get_state(
3936         task_t  task,
3937         int     flavor,
3938         thread_state_t state,
3939         mach_msg_type_number_t *state_count)
3940 {
3941         kern_return_t ret;
3942
3943         if (task == TASK_NULL) {
3944                 return (KERN_INVALID_ARGUMENT);
3945         }
3946
3947         task_lock(task);
3948
3949         if (!task->active) {
3950                 task_unlock(task);
3951                 return (KERN_FAILURE);
3952         }
3953
3954         ret = machine_task_get_state(task, flavor, state, state_count);
3955
3956         task_unlock(task);
3957         return ret;
3958 }
3959
3960 #if CONFIG_JETSAM
3961 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
3962
3963 void __attribute__((noinline))
3964 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb)
3965 {
3966         task_t                                          task            = current_task();
3967         int                                                     pid         = 0;
3968         const char                                      *procname       = "unknown";
3969         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
3970
3971 #ifdef MACH_BSD
3972         pid = proc_selfpid();
3973
3974         if (pid == 1) {
3975                 /*
3976                  * Cannot have ReportCrash analyzing
3977                  * a suspended initproc.
3978                  */
3979                 return;
3980         }
3981
3982         if (task->bsd_info != NULL)
3983                 procname = proc_name_address(current_task()->bsd_info);
3984 #endif
3985
3986         if (hwm_user_cores) {
3987                 int                             error;
3988                 uint64_t                starttime, end;
3989                 clock_sec_t             secs = 0;
3990                 uint32_t                microsecs = 0;
3991
3992                 starttime = mach_absolute_time();
3993                 /*
3994                  * Trigger a coredump of this process. Don't proceed unless we know we won't
3995                  * be filling up the disk; and ignore the core size resource limit for this
3996                  * core file.
3997                  */
3998                 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
3999                         printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
4000                 }
4001                 /*
4002                 * coredump() leaves the task suspended.
4003                 */
4004                 task_resume_internal(current_task());
4005
4006                 end = mach_absolute_time();
4007                 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
4008                 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
4009                        proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
4010         }
4011
4012         if (disable_exc_resource) {
4013                 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
4014                         "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
4015                 return;
4016         }
4017
4018         /*
4019          * A task that has triggered an EXC_RESOURCE, should not be
4020          * jetsammed when the device is under memory pressure.  Here
4021          * we set the P_MEMSTAT_TERMINATED flag so that the process
4022          * will be skipped if the memorystatus_thread wakes up.
4023          */
4024         proc_memstat_terminated(current_task()->bsd_info, TRUE);
4025
4026         printf("process %s[%d] crossed memory high watermark (%d MB); sending "
4027                 "EXC_RESOURCE.\n", procname, pid, max_footprint_mb);
4028
4029         code[0] = code[1] = 0;
4030         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
4031         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
4032         EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
4033
4034         /*
4035          * Use the _internal_ variant so that no user-space
4036          * process can resume our task from under us.
4037          */
4038         task_suspend_internal(task);
4039         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
4040         task_resume_internal(task);
4041
4042         /*
4043          * After the EXC_RESOURCE has been handled, we must clear the
4044          * P_MEMSTAT_TERMINATED flag so that the process can again be
4045          * considered for jetsam if the memorystatus_thread wakes up.
4046          */
4047         proc_memstat_terminated(current_task()->bsd_info, FALSE);  /* clear the flag */
4048 }
4049
4050 /*
4051  * Callback invoked when a task exceeds its physical footprint limit.
4052  */
4053 void
4054 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4055 {
4056         ledger_amount_t max_footprint, max_footprint_mb;
4057         ledger_amount_t footprint_after_purge;
4058         task_t task;
4059
4060         if (warning == LEDGER_WARNING_DIPPED_BELOW) {
4061                 /*
4062                  * Task memory limits only provide a warning on the way up.
4063                  */
4064                 return;
4065         }
4066
4067         task = current_task();
4068
4069         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
4070         max_footprint_mb = max_footprint >> 20;
4071
4072         /*
4073          * Try and purge all "volatile" memory in that task first.
4074          */
4075         (void) task_purge_volatile_memory(task);
4076         /* are we still over the limit ? */
4077         ledger_get_balance(task->ledger,
4078                            task_ledgers.phys_footprint,
4079                            &footprint_after_purge);
4080         if ((!warning &&
4081              footprint_after_purge <= max_footprint) ||
4082             (warning &&
4083              footprint_after_purge <= ((max_footprint *
4084                                         PHYS_FOOTPRINT_WARNING_LEVEL) / 100))) {
4085                 /* all better now */
4086                 ledger_reset_callback_state(task->ledger,
4087                                             task_ledgers.phys_footprint);
4088                 return;
4089         }
4090         /* still over the limit after purging... */
4091
4092         /*
4093          * If this an actual violation (not a warning),
4094          * generate a non-fatal high watermark EXC_RESOURCE.
4095          */
4096         if ((warning == 0) && (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) {
4097                 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb);
4098         }
4099
4100         memorystatus_on_ledger_footprint_exceeded((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE,
4101                 (int)max_footprint_mb);
4102 }
4103
4104 extern int proc_check_footprint_priv(void);
4105
4106 kern_return_t
4107 task_set_phys_footprint_limit(
4108         task_t task,
4109         int new_limit_mb,
4110         int *old_limit_mb)
4111 {
4112         kern_return_t error;
4113
4114         if ((error = proc_check_footprint_priv())) {
4115                 return (KERN_NO_ACCESS);
4116         }
4117
4118         return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, FALSE);
4119 }
4120
4121 kern_return_t
4122 task_convert_phys_footprint_limit(
4123         int limit_mb,
4124         int *converted_limit_mb)
4125 {
4126         if (limit_mb == -1) {
4127                 /*
4128                  * No limit
4129                  */
4130                 if (max_task_footprint != 0) {
4131                         *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024);   /* bytes to MB */
4132                 } else {
4133                         *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
4134                 }
4135         } else {
4136                 /* nothing to convert */
4137                 *converted_limit_mb = limit_mb;
4138         }
4139         return (KERN_SUCCESS);
4140 }
4141
4142
4143 kern_return_t
4144 task_set_phys_footprint_limit_internal(
4145         task_t task,
4146         int new_limit_mb,
4147         int *old_limit_mb,
4148         boolean_t trigger_exception)
4149 {
4150         ledger_amount_t old;
4151
4152         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
4153
4154         if (old_limit_mb) {
4155                 /*
4156                  * Check that limit >> 20 will not give an "unexpected" 32-bit
4157                  * result. There are, however, implicit assumptions that -1 mb limit
4158                  * equates to LEDGER_LIMIT_INFINITY.
4159                  */
4160                 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
4161                 *old_limit_mb = (int)(old >> 20);
4162         }
4163
4164         if (new_limit_mb == -1) {
4165                 /*
4166                  * Caller wishes to remove the limit.
4167                  */
4168                 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4169                                  max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
4170                                  max_task_footprint ? PHYS_FOOTPRINT_WARNING_LEVEL : 0);
4171                 return (KERN_SUCCESS);
4172         }
4173
4174 #ifdef CONFIG_NOMONITORS
4175         return (KERN_SUCCESS);
4176 #endif /* CONFIG_NOMONITORS */
4177
4178         task_lock(task);
4179
4180         if (trigger_exception) {
4181                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4182         } else {
4183                 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4184         }
4185
4186         ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4187                 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
4188
4189         if (task == current_task()) {
4190                 ledger_check_new_balance(task->ledger, task_ledgers.phys_footprint);
4191         }
4192
4193         task_unlock(task);
4194
4195         return (KERN_SUCCESS);
4196 }
4197
4198 kern_return_t
4199 task_get_phys_footprint_limit(
4200         task_t task,
4201         int *limit_mb)
4202 {
4203         ledger_amount_t limit;
4204
4205         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
4206         /*
4207          * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
4208          * result. There are, however, implicit assumptions that -1 mb limit
4209          * equates to LEDGER_LIMIT_INFINITY.
4210          */
4211         assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
4212         *limit_mb = (int)(limit >> 20);
4213
4214         return (KERN_SUCCESS);
4215 }
4216 #else /* CONFIG_JETSAM */
4217 kern_return_t
4218 task_set_phys_footprint_limit(
4219         __unused task_t task,
4220         __unused int new_limit_mb,
4221         __unused int *old_limit_mb)
4222 {
4223         return (KERN_FAILURE);
4224 }
4225
4226 kern_return_t
4227 task_get_phys_footprint_limit(
4228         __unused task_t task,
4229         __unused int *limit_mb)
4230 {
4231         return (KERN_FAILURE);
4232 }
4233 #endif /* CONFIG_JETSAM */
4234
4235 /*
4236  * We need to export some functions to other components that
4237  * are currently implemented in macros within the osfmk
4238  * component.  Just export them as functions of the same name.
4239  */
4240 boolean_t is_kerneltask(task_t t)
4241 {
4242         if (t == kernel_task)
4243                 return (TRUE);
4244
4245         return (FALSE);
4246 }
4247
4248 int
4249 check_for_tasksuspend(task_t task)
4250 {
4251
4252         if (task == TASK_NULL)
4253                 return (0);
4254
4255         return (task->suspend_count > 0);
4256 }
4257
4258 #undef current_task
4259 task_t current_task(void);
4260 task_t current_task(void)
4261 {
4262         return (current_task_fast());
4263 }
4264
4265 #undef task_reference
4266 void task_reference(task_t task);
4267 void
4268 task_reference(
4269         task_t          task)
4270 {
4271         if (task != TASK_NULL)
4272                 task_reference_internal(task);
4273 }
4274
4275 /* defined in bsd/kern/kern_prot.c */
4276 extern int get_audit_token_pid(audit_token_t *audit_token);
4277
4278 int task_pid(task_t task)
4279 {
4280         if (task)
4281                 return get_audit_token_pid(&task->audit_token);
4282         return -1;
4283 }
4284
4285
4286 /*
4287  * This routine is called always with task lock held.
4288  * And it returns a thread handle without reference as the caller
4289  * operates on it under the task lock held.
4290  */
4291 thread_t
4292 task_findtid(task_t task, uint64_t tid)
4293 {
4294         thread_t thread= THREAD_NULL;
4295
4296         queue_iterate(&task->threads, thread, thread_t, task_threads) {
4297                         if (thread->thread_id == tid)
4298                                 return(thread);
4299         }
4300         return(THREAD_NULL);
4301 }
4302
4303 /*
4304  * Control the CPU usage monitor for a task.
4305  */
4306 kern_return_t
4307 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
4308 {
4309         int error = KERN_SUCCESS;
4310
4311         if (*flags & CPUMON_MAKE_FATAL) {
4312                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
4313         } else {
4314                 error = KERN_INVALID_ARGUMENT;
4315         }
4316
4317         return error;
4318 }
4319
4320 /*
4321  * Control the wakeups monitor for a task.
4322  */
4323 kern_return_t
4324 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
4325 {
4326         ledger_t ledger = task->ledger;
4327
4328         task_lock(task);
4329         if (*flags & WAKEMON_GET_PARAMS) {
4330                 ledger_amount_t limit;
4331                 uint64_t                period;
4332
4333                 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
4334                 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
4335
4336                 if (limit != LEDGER_LIMIT_INFINITY) {
4337                         /*
4338                          * An active limit means the wakeups monitor is enabled.
4339                          */
4340                         *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
4341                         *flags = WAKEMON_ENABLE;
4342                         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
4343                                 *flags |= WAKEMON_MAKE_FATAL;
4344                         }
4345                 } else {
4346                         *flags = WAKEMON_DISABLE;
4347                         *rate_hz = -1;
4348                 }
4349
4350                 /*
4351                  * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
4352                  */
4353                 task_unlock(task);
4354                 return KERN_SUCCESS;
4355         }
4356
4357         if (*flags & WAKEMON_ENABLE) {
4358                 if (*flags & WAKEMON_SET_DEFAULTS) {
4359                         *rate_hz = task_wakeups_monitor_rate;
4360                 }
4361
4362 #ifndef CONFIG_NOMONITORS
4363                 if (*flags & WAKEMON_MAKE_FATAL) {
4364                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
4365                 }
4366 #endif /* CONFIG_NOMONITORS */
4367
4368                 if (*rate_hz < 0) {
4369                         task_unlock(task);
4370                         return KERN_INVALID_ARGUMENT;
4371                 }
4372
4373 #ifndef CONFIG_NOMONITORS
4374                 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
4375                         task_wakeups_monitor_ustackshots_trigger_pct);
4376                 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
4377                 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
4378 #endif /* CONFIG_NOMONITORS */
4379         } else if (*flags & WAKEMON_DISABLE) {
4380                 /*
4381                  * Caller wishes to disable wakeups monitor on the task.
4382                  *
4383                  * Disable telemetry if it was triggered by the wakeups monitor, and
4384                  * remove the limit & callback on the wakeups ledger entry.
4385                  */
4386 #if CONFIG_TELEMETRY
4387                 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
4388 #endif
4389                 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
4390                 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
4391         }
4392
4393         task_unlock(task);
4394         return KERN_SUCCESS;
4395 }
4396
4397 void
4398 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4399 {
4400         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
4401 #if CONFIG_TELEMETRY
4402                 /*
4403                  * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
4404                  * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
4405                  */
4406                 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
4407 #endif
4408                 return;
4409         }
4410
4411 #if CONFIG_TELEMETRY
4412         /*
4413          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
4414          * exceeded the limit, turn telemetry off for the task.
4415          */
4416         telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
4417 #endif
4418
4419         if (warning == 0) {
4420                 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE();
4421         }
4422 }
4423
4424 void __attribute__((noinline))
4425 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void)
4426 {
4427         task_t                                          task            = current_task();
4428         int                                                     pid         = 0;
4429         const char                                      *procname       = "unknown";
4430         uint64_t                                        observed_wakeups_rate;
4431         uint64_t                                        permitted_wakeups_rate;
4432         uint64_t                                        observation_interval;
4433         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
4434         struct ledger_entry_info        lei;
4435
4436 #ifdef MACH_BSD
4437         pid = proc_selfpid();
4438         if (task->bsd_info != NULL)
4439                 procname = proc_name_address(current_task()->bsd_info);
4440 #endif
4441
4442         ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
4443
4444         /*
4445          * Disable the exception notification so we don't overwhelm
4446          * the listener with an endless stream of redundant exceptions.
4447          */
4448         uint32_t flags = WAKEMON_DISABLE;
4449         task_wakeups_monitor_ctl(task, &flags, NULL);
4450
4451         observed_wakeups_rate = (lei.lei_balance * (int64_t)NSEC_PER_SEC) / lei.lei_last_refill;
4452         permitted_wakeups_rate = lei.lei_limit / task_wakeups_monitor_interval;
4453         observation_interval = lei.lei_refill_period / NSEC_PER_SEC;
4454
4455         if (disable_exc_resource) {
4456                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
4457                         "supressed by a boot-arg\n", procname, pid);
4458                 return;
4459         }
4460         if (audio_active) {
4461                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
4462                        "supressed due to audio playback\n", procname, pid);
4463                 return;
4464         }
4465         printf("process %s[%d] caught causing excessive wakeups. Observed wakeups rate "
4466                 "(per sec): %lld; Maximum permitted wakeups rate (per sec): %lld; Observation "
4467                 "period: %lld seconds; Task lifetime number of wakeups: %lld\n",
4468                 procname, pid, observed_wakeups_rate, permitted_wakeups_rate,
4469                 observation_interval, lei.lei_credit);
4470
4471         code[0] = code[1] = 0;
4472         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
4473         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
4474         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0], task_wakeups_monitor_rate);
4475         EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0], observation_interval);
4476         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1], lei.lei_balance * (int64_t)NSEC_PER_SEC / lei.lei_last_refill);
4477         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
4478
4479         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
4480                 task_terminate_internal(task);
4481         }
4482 }
4483
4484 kern_return_t
4485 task_purge_volatile_memory(
4486         task_t  task)
4487 {
4488         vm_map_t        map;
4489         int             num_object_purged;
4490
4491         if (task == TASK_NULL)
4492                 return KERN_INVALID_TASK;
4493
4494         task_lock(task);
4495
4496         if (!task->active) {
4497                 task_unlock(task);
4498                 return KERN_INVALID_TASK;
4499         }
4500         map = task->map;
4501         if (map == VM_MAP_NULL) {
4502                 task_unlock(task);
4503                 return KERN_INVALID_TASK;
4504         }
4505         vm_map_reference(task->map);
4506
4507         task_unlock(task);
4508
4509         num_object_purged = vm_map_purge(map);
4510         vm_map_deallocate(map);
4511
4512         return KERN_SUCCESS;
4513 }
4514
4515 /* Placeholders for the task set/get voucher interfaces */
4516 kern_return_t
4517 task_get_mach_voucher(
4518         task_t                  task,
4519         mach_voucher_selector_t __unused which,
4520         ipc_voucher_t           *voucher)
4521 {
4522         if (TASK_NULL == task)
4523                 return KERN_INVALID_TASK;
4524
4525         *voucher = NULL;
4526         return KERN_SUCCESS;
4527 }
4528
4529 kern_return_t
4530 task_set_mach_voucher(
4531         task_t                  task,
4532         ipc_voucher_t           __unused voucher)
4533 {
4534         if (TASK_NULL == task)
4535                 return KERN_INVALID_TASK;
4536
4537         return KERN_SUCCESS;
4538 }
4539
4540 kern_return_t
4541 task_swap_mach_voucher(
4542         task_t                  task,
4543         ipc_voucher_t           new_voucher,
4544         ipc_voucher_t           *in_out_old_voucher)
4545 {
4546         if (TASK_NULL == task)
4547                 return KERN_INVALID_TASK;
4548
4549         *in_out_old_voucher = new_voucher;
4550         return KERN_SUCCESS;
4551 }
4552
4553 void task_set_gpu_denied(task_t task, boolean_t denied)
4554 {
4555         task_lock(task);
4556
4557         if (denied) {
4558                 task->t_flags |= TF_GPU_DENIED;
4559         } else {
4560                 task->t_flags &= ~TF_GPU_DENIED;
4561         }
4562
4563         task_unlock(task);
4564 }
4565
4566 boolean_t task_is_gpu_denied(task_t task)
4567 {
4568         /* We don't need the lock to read this flag */
4569         return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
4570 }
4571
4572 void task_update_logical_writes(task_t task, uint32_t io_size, int flags)
4573 {
4574         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE, task_pid(task), io_size, flags, 0, 0);
4575         switch(flags) {
4576                 case TASK_WRITE_IMMEDIATE:
4577                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_immediate_writes));
4578                         break;
4579                 case TASK_WRITE_DEFERRED:
4580                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_deferred_writes));
4581                         break;
4582                 case TASK_WRITE_INVALIDATED:
4583                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_invalidated_writes));
4584                         break;
4585                 case TASK_WRITE_METADATA:
4586                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_metadata_writes));
4587                         break;
4588         }
4589         return;
4590 }
4591
4592 queue_head_t *
4593 task_io_user_clients(task_t task)
4594 {
4595         return (&task->io_user_clients);
4596 }