osfmk/kern/task.c

   1 /*
   2  * Copyright (c) 2000-2010, 2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  *      File:   kern/task.c
  58  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
  59  *              David Black
  60  *
  61  *      Task management primitives implementation.
  62  */
  63 /*
  64  * Copyright (c) 1993 The University of Utah and
  65  * the Computer Systems Laboratory (CSL).  All rights reserved.
  66  *
  67  * Permission to use, copy, modify and distribute this software and its
  68  * documentation is hereby granted, provided that both the copyright
  69  * notice and this permission notice appear in all copies of the
  70  * software, derivative works or modified versions, and any portions
  71  * thereof, and that both notices appear in supporting documentation.
  72  *
  73  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  74  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  75  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  76  *
  77  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  78  * improvements that they make and grant CSL redistribution rights.
  79  *
  80  */
  81 /*
  82  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
  83  * support for mandatory and extensible security protections.  This notice
  84  * is included in support of clause 2.2 (b) of the Apple Public License,
  85  * Version 2.0.
  86  * Copyright (c) 2005 SPARTA, Inc.
  87  */
  88
  89 #include <mach/mach_types.h>
  90 #include <mach/boolean.h>
  91 #include <mach/host_priv.h>
  92 #include <mach/machine/vm_types.h>
  93 #include <mach/vm_param.h>
  94 #include <mach/mach_vm.h>
  95 #include <mach/semaphore.h>
  96 #include <mach/task_info.h>
  97 #include <mach/task_special_ports.h>
  98
  99 #include <ipc/ipc_importance.h>
 100 #include <ipc/ipc_types.h>
 101 #include <ipc/ipc_space.h>
 102 #include <ipc/ipc_entry.h>
 103 #include <ipc/ipc_hash.h>
 104
 105 #include <kern/kern_types.h>
 106 #include <kern/mach_param.h>
 107 #include <kern/misc_protos.h>
 108 #include <kern/task.h>
 109 #include <kern/thread.h>
 110 #include <kern/coalition.h>
 111 #include <kern/zalloc.h>
 112 #include <kern/kalloc.h>
 113 #include <kern/kern_cdata.h>
 114 #include <kern/processor.h>
 115 #include <kern/sched_prim.h>    /* for thread_wakeup */
 116 #include <kern/ipc_tt.h>
 117 #include <kern/host.h>
 118 #include <kern/clock.h>
 119 #include <kern/timer.h>
 120 #include <kern/assert.h>
 121 #include <kern/sync_lock.h>
 122 #include <kern/affinity.h>
 123 #include <kern/exc_resource.h>
 124 #include <kern/machine.h>
 125 #include <corpses/task_corpse.h>
 126 #if CONFIG_TELEMETRY
 127 #include <kern/telemetry.h>
 128 #endif
 129
 130 #include <vm/pmap.h>
 131 #include <vm/vm_map.h>
 132 #include <vm/vm_kern.h>         /* for kernel_map, ipc_kernel_map */
 133 #include <vm/vm_pageout.h>
 134 #include <vm/vm_protos.h>
 135 #include <vm/vm_purgeable_internal.h>
 136
 137 #include <sys/resource.h>
 138 #include <sys/signalvar.h> /* for coredump */
 139
 140 /*
 141  * Exported interfaces
 142  */
 143
 144 #include <mach/task_server.h>
 145 #include <mach/mach_host_server.h>
 146 #include <mach/host_security_server.h>
 147 #include <mach/mach_port_server.h>
 148
 149 #include <vm/vm_shared_region.h>
 150
 151 #include <libkern/OSDebug.h>
 152 #include <libkern/OSAtomic.h>
 153
 154 #if CONFIG_ATM
 155 #include <atm/atm_internal.h>
 156 #endif
 157
 158 #include <kern/sfi.h>
 159
 160 #if KPERF
 161 extern int kpc_force_all_ctrs(task_t, int);
 162 #endif
 163
 164 uint32_t qos_override_mode;
 165
 166 task_t                  kernel_task;
 167 zone_t                  task_zone;
 168 lck_attr_t      task_lck_attr;
 169 lck_grp_t       task_lck_grp;
 170 lck_grp_attr_t  task_lck_grp_attr;
 171
 172 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
 173 int audio_active = 0;
 174
 175 zinfo_usage_store_t tasks_tkm_private;
 176 zinfo_usage_store_t tasks_tkm_shared;
 177
 178 /* A container to accumulate statistics for expired tasks */
 179 expired_task_statistics_t               dead_task_statistics;
 180 lck_spin_t              dead_task_statistics_lock;
 181
 182 ledger_template_t task_ledger_template = NULL;
 183
 184 struct _task_ledger_indices task_ledgers __attribute__((used)) =
 185         {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
 186          { 0 /* initialized at runtime */},
 187 #ifdef CONFIG_BANK
 188          -1, -1,
 189 #endif
 190         };
 191
 192 /* System sleep state */
 193 boolean_t tasks_suspend_state;
 194
 195
 196 void init_task_ledgers(void);
 197 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 198 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 199 void __attribute__((noinline)) THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void);
 200 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb);
 201
 202 kern_return_t task_suspend_internal(task_t);
 203 kern_return_t task_resume_internal(task_t);
 204 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
 205
 206
 207 void proc_init_cpumon_params(void);
 208 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
 209
 210 // Warn tasks when they hit 80% of their memory limit.
 211 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
 212
 213 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT              150 /* wakeups per second */
 214 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL   300 /* in seconds. */
 215
 216 /*
 217  * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
 218  *
 219  * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
 220  *  stacktraces, aka micro-stackshots)
 221  */
 222 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER        70
 223
 224 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
 225 int task_wakeups_monitor_rate;     /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
 226
 227 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 228
 229 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
 230
 231 ledger_amount_t max_task_footprint = 0;  /* Per-task limit on physical memory consumption in bytes     */
 232 int max_task_footprint_mb = 0;  /* Per-task limit on physical memory consumption in megabytes */
 233
 234 #if MACH_ASSERT
 235 int pmap_ledgers_panic = 1;
 236 #endif /* MACH_ASSERT */
 237
 238 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
 239
 240 int hwm_user_cores = 0; /* high watermark violations generate user core files */
 241
 242 #ifdef MACH_BSD
 243 extern void     proc_getexecutableuuid(void *, unsigned char *, unsigned long);
 244 extern int      proc_pid(struct proc *p);
 245 extern int      proc_selfpid(void);
 246 extern char     *proc_name_address(struct proc *p);
 247 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 248 #if CONFIG_JETSAM
 249 extern void     proc_memstat_terminated(struct proc* p, boolean_t set);
 250 extern void     memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb);
 251 #endif
 252 #endif
 253 #if MACH_ASSERT
 254 extern int pmap_ledgers_panic;
 255 #endif /* MACH_ASSERT */
 256
 257 /* Forwards */
 258
 259 void            task_hold_locked(
 260                         task_t          task);
 261 void            task_wait_locked(
 262                         task_t          task,
 263                         boolean_t       until_not_runnable);
 264 void            task_release_locked(
 265                         task_t          task);
 266 void            task_free(
 267                         task_t          task );
 268 void            task_synchronizer_destroy_all(
 269                         task_t          task);
 270
 271 int check_for_tasksuspend(
 272                         task_t task);
 273
 274 void
 275 task_backing_store_privileged(
 276                         task_t task)
 277 {
 278         task_lock(task);
 279         task->priv_flags |= VM_BACKING_STORE_PRIV;
 280         task_unlock(task);
 281         return;
 282 }
 283
 284
 285 void
 286 task_set_64bit(
 287                 task_t task,
 288                 boolean_t is64bit)
 289 {
 290 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
 291         thread_t thread;
 292 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
 293
 294         task_lock(task);
 295
 296         if (is64bit) {
 297                 if (task_has_64BitAddr(task))
 298                         goto out;
 299                 task_set_64BitAddr(task);
 300         } else {
 301                 if ( !task_has_64BitAddr(task))
 302                         goto out;
 303                 task_clear_64BitAddr(task);
 304         }
 305         /* FIXME: On x86, the thread save state flavor can diverge from the
 306          * task's 64-bit feature flag due to the 32-bit/64-bit register save
 307          * state dichotomy. Since we can be pre-empted in this interval,
 308          * certain routines may observe the thread as being in an inconsistent
 309          * state with respect to its task's 64-bitness.
 310          */
 311
 312 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
 313         queue_iterate(&task->threads, thread, thread_t, task_threads) {
 314                 thread_mtx_lock(thread);
 315                 machine_thread_switch_addrmode(thread);
 316                 thread_mtx_unlock(thread);
 317         }
 318 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
 319
 320 out:
 321         task_unlock(task);
 322 }
 323
 324
 325 void
 326 task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size)
 327 {
 328         task_lock(task);
 329         task->all_image_info_addr = addr;
 330         task->all_image_info_size = size;
 331         task_unlock(task);
 332 }
 333
 334 void
 335 task_atm_reset(__unused task_t task) {
 336
 337 #if CONFIG_ATM
 338         if (task->atm_context != NULL) {
 339                  atm_task_descriptor_destroy(task->atm_context);
 340                  task->atm_context = NULL;
 341         }
 342 #endif
 343
 344 }
 345
 346 void
 347 task_bank_reset(__unused task_t task) {
 348
 349 #if CONFIG_BANK
 350         if (task->bank_context != NULL) {
 351                  bank_task_destroy(task);
 352         }
 353 #endif
 354
 355 }
 356
 357 /*
 358  * NOTE: This should only be called when the P_LINTRANSIT
 359  *       flag is set (the proc_trans lock is held) on the
 360  *       proc associated with the task.
 361  */
 362 void
 363 task_bank_init(__unused task_t task) {
 364
 365 #if CONFIG_BANK
 366         if (task->bank_context != NULL) {
 367                 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
 368         }
 369         bank_task_initialize(task);
 370 #endif
 371
 372 }
 373
 374 #if TASK_REFERENCE_LEAK_DEBUG
 375 #include <kern/btlog.h>
 376
 377 decl_simple_lock_data(static,task_ref_lock);
 378 static btlog_t *task_ref_btlog;
 379 #define TASK_REF_OP_INCR        0x1
 380 #define TASK_REF_OP_DECR        0x2
 381
 382 #define TASK_REF_BTDEPTH        7
 383
 384 static void
 385 task_ref_lock_lock(void *context)
 386 {
 387         simple_lock((simple_lock_t)context);
 388 }
 389 static void
 390 task_ref_lock_unlock(void *context)
 391 {
 392         simple_unlock((simple_lock_t)context);
 393 }
 394
 395 void
 396 task_reference_internal(task_t task)
 397 {
 398         void *       bt[TASK_REF_BTDEPTH];
 399         int             numsaved = 0;
 400
 401         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 402
 403         (void)hw_atomic_add(&(task)->ref_count, 1);
 404         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
 405                                         bt, numsaved);
 406 }
 407
 408 uint32_t
 409 task_deallocate_internal(task_t task)
 410 {
 411         void *       bt[TASK_REF_BTDEPTH];
 412         int             numsaved = 0;
 413
 414         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 415
 416         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
 417                                         bt, numsaved);
 418         return hw_atomic_sub(&(task)->ref_count, 1);
 419 }
 420
 421 #endif /* TASK_REFERENCE_LEAK_DEBUG */
 422
 423 void
 424 task_init(void)
 425 {
 426
 427         lck_grp_attr_setdefault(&task_lck_grp_attr);
 428         lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
 429         lck_attr_setdefault(&task_lck_attr);
 430         lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
 431
 432         task_zone = zinit(
 433                         sizeof(struct task),
 434                         task_max * sizeof(struct task),
 435                         TASK_CHUNK * sizeof(struct task),
 436                         "tasks");
 437
 438         zone_change(task_zone, Z_NOENCRYPT, TRUE);
 439
 440         /*
 441          * Configure per-task memory limit.
 442          * The boot-arg is interpreted as Megabytes,
 443          * and takes precedence over the device tree.
 444          * Setting the boot-arg to 0 disables task limits.
 445          */
 446         if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
 447                         sizeof (max_task_footprint_mb))) {
 448                 /*
 449                  * No limit was found in boot-args, so go look in the device tree.
 450                  */
 451                 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
 452                                 sizeof(max_task_footprint_mb))) {
 453                         /*
 454                          * No limit was found in device tree.
 455                          */
 456                         max_task_footprint_mb = 0;
 457                 }
 458         }
 459
 460         if (max_task_footprint_mb != 0) {
 461 #if CONFIG_JETSAM
 462                 if (max_task_footprint_mb < 50) {
 463                                 printf("Warning: max_task_pmem %d below minimum.\n",
 464                                 max_task_footprint_mb);
 465                                 max_task_footprint_mb = 50;
 466                 }
 467                 printf("Limiting task physical memory footprint to %d MB\n",
 468                         max_task_footprint_mb);
 469
 470                 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
 471 #else
 472                 printf("Warning: max_task_footprint specified, but jetsam not configured; ignoring.\n");
 473 #endif
 474         }
 475
 476 #if MACH_ASSERT
 477         PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
 478                           sizeof (pmap_ledgers_panic));
 479 #endif /* MACH_ASSERT */
 480
 481         if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
 482                         sizeof (hwm_user_cores))) {
 483                 hwm_user_cores = 0;
 484         }
 485
 486         if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
 487                 printf("QOS override mode: 0x%08x\n", qos_override_mode);
 488         } else {
 489                 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
 490         }
 491
 492         proc_init_cpumon_params();
 493
 494         if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
 495                 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
 496         }
 497
 498         if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
 499                 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
 500         }
 501
 502         if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
 503                 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
 504                 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
 505         }
 506
 507         if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
 508                 sizeof (disable_exc_resource))) {
 509                 disable_exc_resource = 0;
 510         }
 511
 512 /*
 513  * If we have coalitions, coalition_init() will call init_task_ledgers() as it
 514  * sets up the ledgers for the default coalition. If we don't have coalitions,
 515  * then we have to call it now.
 516  */
 517 #if CONFIG_COALITIONS
 518         assert(task_ledger_template);
 519 #else /* CONFIG_COALITIONS */
 520         init_task_ledgers();
 521 #endif /* CONFIG_COALITIONS */
 522
 523 #if TASK_REFERENCE_LEAK_DEBUG
 524         simple_lock_init(&task_ref_lock, 0);
 525         task_ref_btlog = btlog_create(100000,
 526                                                                   TASK_REF_BTDEPTH,
 527                                                                   task_ref_lock_lock,
 528                                                                   task_ref_lock_unlock,
 529                                                                   &task_ref_lock);
 530         assert(task_ref_btlog);
 531 #endif
 532
 533         /*
 534          * Create the kernel task as the first task.
 535          */
 536 #ifdef __LP64__
 537         if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS)
 538 #else
 539         if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS)
 540 #endif
 541                 panic("task_init\n");
 542
 543         vm_map_deallocate(kernel_task->map);
 544         kernel_task->map = kernel_map;
 545         lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
 546
 547 }
 548
 549 /*
 550  * Create a task running in the kernel address space.  It may
 551  * have its own map of size mem_size and may have ipc privileges.
 552  */
 553 kern_return_t
 554 kernel_task_create(
 555         __unused task_t         parent_task,
 556         __unused vm_offset_t            map_base,
 557         __unused vm_size_t              map_size,
 558         __unused task_t         *child_task)
 559 {
 560         return (KERN_INVALID_ARGUMENT);
 561 }
 562
 563 kern_return_t
 564 task_create(
 565         task_t                          parent_task,
 566         __unused ledger_port_array_t    ledger_ports,
 567         __unused mach_msg_type_number_t num_ledger_ports,
 568         __unused boolean_t              inherit_memory,
 569         __unused task_t                 *child_task)    /* OUT */
 570 {
 571         if (parent_task == TASK_NULL)
 572                 return(KERN_INVALID_ARGUMENT);
 573
 574         /*
 575          * No longer supported: too many calls assume that a task has a valid
 576          * process attached.
 577          */
 578         return(KERN_FAILURE);
 579 }
 580
 581 kern_return_t
 582 host_security_create_task_token(
 583         host_security_t                 host_security,
 584         task_t                          parent_task,
 585         __unused security_token_t       sec_token,
 586         __unused audit_token_t          audit_token,
 587         __unused host_priv_t            host_priv,
 588         __unused ledger_port_array_t    ledger_ports,
 589         __unused mach_msg_type_number_t num_ledger_ports,
 590         __unused boolean_t              inherit_memory,
 591         __unused task_t                 *child_task)    /* OUT */
 592 {
 593         if (parent_task == TASK_NULL)
 594                 return(KERN_INVALID_ARGUMENT);
 595
 596         if (host_security == HOST_NULL)
 597                 return(KERN_INVALID_SECURITY);
 598
 599         /*
 600          * No longer supported.
 601          */
 602         return(KERN_FAILURE);
 603 }
 604
 605 /*
 606  * Task ledgers
 607  * ------------
 608  *
 609  * phys_footprint
 610  *   Physical footprint: This is the sum of:
 611  *     + (internal - alternate_accounting)
 612  *     + (internal_compressed - alternate_accounting_compressed)
 613  *     + iokit_mapped
 614  *     + purgeable_nonvolatile
 615  *     + purgeable_nonvolatile_compressed
 616  *
 617  * internal
 618  *   The task's anonymous memory, which on iOS is always resident.
 619  *
 620  * internal_compressed
 621  *   Amount of this task's internal memory which is held by the compressor.
 622  *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
 623  *   and could be either decompressed back into memory, or paged out to storage, depending
 624  *   on our implementation.
 625  *
 626  * iokit_mapped
 627  *   IOKit mappings: The total size of all IOKit mappings in this task, regardless of
 628      clean/dirty or internal/external state].
 629  *
 630  * alternate_accounting
 631  *   The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
 632  *   are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
 633  *   double counting.
 634  */
 635 void
 636 init_task_ledgers(void)
 637 {
 638         ledger_template_t t;
 639
 640         assert(task_ledger_template == NULL);
 641         assert(kernel_task == TASK_NULL);
 642
 643         if ((t = ledger_template_create("Per-task ledger")) == NULL)
 644                 panic("couldn't create task ledger template");
 645
 646         task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
 647         task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
 648             "physmem", "bytes");
 649         task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
 650             "bytes");
 651         task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
 652             "bytes");
 653         task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
 654             "bytes");
 655         task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
 656             "bytes");
 657         task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
 658             "bytes");
 659         task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
 660             "bytes");
 661         task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
 662             "bytes");
 663         task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
 664             "bytes");
 665         task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
 666             "bytes");
 667         task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
 668         task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
 669         task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
 670         task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
 671         task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
 672             "count");
 673         task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
 674             "count");
 675
 676 #if CONFIG_SCHED_SFI
 677         sfi_class_id_t class_id, ledger_alias;
 678         for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
 679                 task_ledgers.sfi_wait_times[class_id] = -1;
 680         }
 681
 682         /* don't account for UNSPECIFIED */
 683         for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
 684                 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
 685                 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
 686                         /* Check to see if alias has been registered yet */
 687                         if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
 688                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
 689                         } else {
 690                                 /* Otherwise, initialize it first */
 691                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
 692                         }
 693                 } else {
 694                         task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
 695                 }
 696
 697                 if (task_ledgers.sfi_wait_times[class_id] < 0) {
 698                         panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
 699                 }
 700         }
 701
 702         assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
 703 #endif /* CONFIG_SCHED_SFI */
 704
 705 #ifdef CONFIG_BANK
 706         task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
 707         task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
 708 #endif
 709         if ((task_ledgers.cpu_time < 0) ||
 710             (task_ledgers.tkm_private < 0) ||
 711             (task_ledgers.tkm_shared < 0) ||
 712             (task_ledgers.phys_mem < 0) ||
 713             (task_ledgers.wired_mem < 0) ||
 714             (task_ledgers.internal < 0) ||
 715             (task_ledgers.iokit_mapped < 0) ||
 716             (task_ledgers.alternate_accounting < 0) ||
 717             (task_ledgers.alternate_accounting_compressed < 0) ||
 718             (task_ledgers.phys_footprint < 0) ||
 719             (task_ledgers.internal_compressed < 0) ||
 720             (task_ledgers.purgeable_volatile < 0) ||
 721             (task_ledgers.purgeable_nonvolatile < 0) ||
 722             (task_ledgers.purgeable_volatile_compressed < 0) ||
 723             (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
 724             (task_ledgers.platform_idle_wakeups < 0) ||
 725             (task_ledgers.interrupt_wakeups < 0)
 726 #ifdef CONFIG_BANK
 727             || (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0)
 728 #endif
 729             ) {
 730                 panic("couldn't create entries for task ledger template");
 731         }
 732
 733         ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
 734 #if MACH_ASSERT
 735         if (pmap_ledgers_panic) {
 736                 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
 737                 ledger_panic_on_negative(t, task_ledgers.internal);
 738                 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
 739                 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
 740                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
 741                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
 742                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
 743                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
 744                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
 745                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
 746         }
 747 #endif /* MACH_ASSERT */
 748
 749 #if CONFIG_JETSAM
 750         ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
 751 #endif
 752
 753         ledger_set_callback(t, task_ledgers.interrupt_wakeups,
 754                 task_wakeups_rate_exceeded, NULL, NULL);
 755
 756         task_ledger_template = t;
 757 }
 758
 759 kern_return_t
 760 task_create_internal(
 761         task_t          parent_task,
 762         coalition_t     *parent_coalitions __unused,
 763         boolean_t       inherit_memory,
 764         boolean_t       is_64bit,
 765         task_t          *child_task)            /* OUT */
 766 {
 767         task_t                  new_task;
 768         vm_shared_region_t      shared_region;
 769         ledger_t                ledger = NULL;
 770
 771         new_task = (task_t) zalloc(task_zone);
 772
 773         if (new_task == TASK_NULL)
 774                 return(KERN_RESOURCE_SHORTAGE);
 775
 776         /* one ref for just being alive; one for our caller */
 777         new_task->ref_count = 2;
 778
 779         /* allocate with active entries */
 780         assert(task_ledger_template != NULL);
 781         if ((ledger = ledger_instantiate(task_ledger_template,
 782                         LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
 783                 zfree(task_zone, new_task);
 784                 return(KERN_RESOURCE_SHORTAGE);
 785         }
 786
 787         new_task->ledger = ledger;
 788
 789 #if defined(CONFIG_SCHED_MULTIQ)
 790         new_task->sched_group = sched_group_create();
 791 #endif
 792
 793         /* if inherit_memory is true, parent_task MUST not be NULL */
 794         if (inherit_memory)
 795                 new_task->map = vm_map_fork(ledger, parent_task->map);
 796         else
 797                 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
 798                                 (vm_map_offset_t)(VM_MIN_ADDRESS),
 799                                 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
 800
 801         /* Inherit memlock limit from parent */
 802         if (parent_task)
 803                 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
 804
 805         lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
 806         queue_init(&new_task->threads);
 807         new_task->suspend_count = 0;
 808         new_task->thread_count = 0;
 809         new_task->active_thread_count = 0;
 810         new_task->user_stop_count = 0;
 811         new_task->legacy_stop_count = 0;
 812         new_task->active = TRUE;
 813         new_task->halting = FALSE;
 814         new_task->user_data = NULL;
 815         new_task->faults = 0;
 816         new_task->cow_faults = 0;
 817         new_task->pageins = 0;
 818         new_task->messages_sent = 0;
 819         new_task->messages_received = 0;
 820         new_task->syscalls_mach = 0;
 821         new_task->priv_flags = 0;
 822         new_task->syscalls_unix=0;
 823         new_task->c_switch = new_task->p_switch = new_task->ps_switch = 0;
 824         new_task->t_flags = 0;
 825         new_task->importance = 0;
 826
 827 #if CONFIG_ATM
 828         new_task->atm_context = NULL;
 829 #endif
 830 #if CONFIG_BANK
 831         new_task->bank_context = NULL;
 832 #endif
 833
 834         zinfo_task_init(new_task);
 835
 836 #ifdef MACH_BSD
 837         new_task->bsd_info = NULL;
 838         new_task->corpse_info = NULL;
 839 #endif /* MACH_BSD */
 840
 841 #if CONFIG_JETSAM
 842         if (max_task_footprint != 0) {
 843                 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
 844         }
 845 #endif
 846
 847         if (task_wakeups_monitor_rate != 0) {
 848                 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
 849                 int32_t  rate; // Ignored because of WAKEMON_SET_DEFAULTS
 850                 task_wakeups_monitor_ctl(new_task, &flags, &rate);
 851         }
 852
 853 #if defined(__i386__) || defined(__x86_64__)
 854         new_task->i386_ldt = 0;
 855 #endif
 856
 857         new_task->task_debug = NULL;
 858
 859         queue_init(&new_task->semaphore_list);
 860         new_task->semaphores_owned = 0;
 861
 862         ipc_task_init(new_task, parent_task);
 863
 864         new_task->total_user_time = 0;
 865         new_task->total_system_time = 0;
 866
 867         new_task->vtimers = 0;
 868
 869         new_task->shared_region = NULL;
 870
 871         new_task->affinity_space = NULL;
 872
 873         new_task->pidsuspended = FALSE;
 874         new_task->frozen = FALSE;
 875         new_task->changing_freeze_state = FALSE;
 876         new_task->rusage_cpu_flags = 0;
 877         new_task->rusage_cpu_percentage = 0;
 878         new_task->rusage_cpu_interval = 0;
 879         new_task->rusage_cpu_deadline = 0;
 880         new_task->rusage_cpu_callt = NULL;
 881 #if MACH_ASSERT
 882         new_task->suspends_outstanding = 0;
 883 #endif
 884
 885 #if HYPERVISOR
 886         new_task->hv_task_target = NULL;
 887 #endif /* HYPERVISOR */
 888
 889
 890         new_task->low_mem_notified_warn = 0;
 891         new_task->low_mem_notified_critical = 0;
 892         new_task->low_mem_privileged_listener = 0;
 893         new_task->purged_memory_warn = 0;
 894         new_task->purged_memory_critical = 0;
 895         new_task->mem_notify_reserved = 0;
 896 #if IMPORTANCE_INHERITANCE
 897         new_task->task_imp_base = NULL;
 898 #endif /* IMPORTANCE_INHERITANCE */
 899
 900 #if     defined(__x86_64__)
 901         new_task->uexc_range_start = new_task->uexc_range_size = new_task->uexc_handler = 0;
 902 #endif
 903
 904         new_task->requested_policy = default_task_requested_policy;
 905         new_task->effective_policy = default_task_effective_policy;
 906         new_task->pended_policy    = default_task_pended_policy;
 907
 908         if (parent_task != TASK_NULL) {
 909                 new_task->sec_token = parent_task->sec_token;
 910                 new_task->audit_token = parent_task->audit_token;
 911
 912                 /* inherit the parent's shared region */
 913                 shared_region = vm_shared_region_get(parent_task);
 914                 vm_shared_region_set(new_task, shared_region);
 915
 916                 if(task_has_64BitAddr(parent_task))
 917                         task_set_64BitAddr(new_task);
 918                 new_task->all_image_info_addr = parent_task->all_image_info_addr;
 919                 new_task->all_image_info_size = parent_task->all_image_info_size;
 920
 921 #if defined(__i386__) || defined(__x86_64__)
 922                 if (inherit_memory && parent_task->i386_ldt)
 923                         new_task->i386_ldt = user_ldt_copy(parent_task->i386_ldt);
 924 #endif
 925                 if (inherit_memory && parent_task->affinity_space)
 926                         task_affinity_create(parent_task, new_task);
 927
 928                 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
 929
 930 #if IMPORTANCE_INHERITANCE
 931                 ipc_importance_task_t new_task_imp = IIT_NULL;
 932
 933                 if (task_is_marked_importance_donor(parent_task)) {
 934                         new_task_imp = ipc_importance_for_task(new_task, FALSE);
 935                         assert(IIT_NULL != new_task_imp);
 936                         ipc_importance_task_mark_donor(new_task_imp, TRUE);
 937                 }
 938                 /* Embedded doesn't want this to inherit */
 939                 if (task_is_marked_importance_receiver(parent_task)) {
 940                         if (IIT_NULL == new_task_imp)
 941                                 new_task_imp = ipc_importance_for_task(new_task, FALSE);
 942                         assert(IIT_NULL != new_task_imp);
 943                         ipc_importance_task_mark_receiver(new_task_imp, TRUE);
 944                 }
 945                 if (task_is_marked_importance_denap_receiver(parent_task)) {
 946                         if (IIT_NULL == new_task_imp)
 947                                 new_task_imp = ipc_importance_for_task(new_task, FALSE);
 948                         assert(IIT_NULL != new_task_imp);
 949                         ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
 950                 }
 951
 952                 if (IIT_NULL != new_task_imp) {
 953                         assert(new_task->task_imp_base == new_task_imp);
 954                         ipc_importance_task_release(new_task_imp);
 955                 }
 956 #endif /* IMPORTANCE_INHERITANCE */
 957
 958                 new_task->priority = BASEPRI_DEFAULT;
 959                 new_task->max_priority = MAXPRI_USER;
 960
 961                 new_task->requested_policy.t_apptype     = parent_task->requested_policy.t_apptype;
 962
 963                 new_task->requested_policy.int_darwinbg  = parent_task->requested_policy.int_darwinbg;
 964                 new_task->requested_policy.ext_darwinbg  = parent_task->requested_policy.ext_darwinbg;
 965                 new_task->requested_policy.int_iotier    = parent_task->requested_policy.int_iotier;
 966                 new_task->requested_policy.ext_iotier    = parent_task->requested_policy.ext_iotier;
 967                 new_task->requested_policy.int_iopassive = parent_task->requested_policy.int_iopassive;
 968                 new_task->requested_policy.ext_iopassive = parent_task->requested_policy.ext_iopassive;
 969                 new_task->requested_policy.bg_iotier     = parent_task->requested_policy.bg_iotier;
 970                 new_task->requested_policy.terminated    = parent_task->requested_policy.terminated;
 971                 new_task->requested_policy.t_qos_clamp   = parent_task->requested_policy.t_qos_clamp;
 972
 973                 task_policy_create(new_task, parent_task->requested_policy.t_boosted);
 974         } else {
 975                 new_task->sec_token = KERNEL_SECURITY_TOKEN;
 976                 new_task->audit_token = KERNEL_AUDIT_TOKEN;
 977 #ifdef __LP64__
 978                 if(is_64bit)
 979                         task_set_64BitAddr(new_task);
 980 #endif
 981                 new_task->all_image_info_addr = (mach_vm_address_t)0;
 982                 new_task->all_image_info_size = (mach_vm_size_t)0;
 983
 984                 new_task->pset_hint = PROCESSOR_SET_NULL;
 985
 986                 if (kernel_task == TASK_NULL) {
 987                         new_task->priority = BASEPRI_KERNEL;
 988                         new_task->max_priority = MAXPRI_KERNEL;
 989                 } else {
 990                         new_task->priority = BASEPRI_DEFAULT;
 991                         new_task->max_priority = MAXPRI_USER;
 992                 }
 993         }
 994
 995         bzero(new_task->coalition, sizeof(new_task->coalition));
 996         for (int i = 0; i < COALITION_NUM_TYPES; i++)
 997                 queue_chain_init(new_task->task_coalition[i]);
 998
 999         /* Allocate I/O Statistics */
1000         new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1001         assert(new_task->task_io_stats != NULL);
1002         bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1003         new_task->task_immediate_writes = 0;
1004         new_task->task_deferred_writes = 0;
1005         new_task->task_invalidated_writes = 0;
1006         new_task->task_metadata_writes = 0;
1007
1008         bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats));
1009
1010         bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1011         new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0;
1012         new_task->task_gpu_ns = 0;
1013
1014 #if CONFIG_COALITIONS
1015
1016         /* TODO: there is no graceful failure path here... */
1017         if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1018                 coalitions_adopt_task(parent_coalitions, new_task);
1019         } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1020                 /*
1021                  * all tasks at least have a resource coalition, so
1022                  * if the parent has one then inherit all coalitions
1023                  * the parent is a part of
1024                  */
1025                 coalitions_adopt_task(parent_task->coalition, new_task);
1026         } else {
1027                 /* TODO: assert that new_task will be PID 1 (launchd) */
1028                 coalitions_adopt_init_task(new_task);
1029         }
1030
1031         if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1032                 panic("created task is not a member of a resource coalition");
1033         }
1034 #endif /* CONFIG_COALITIONS */
1035
1036         new_task->dispatchqueue_offset = 0;
1037         if (parent_task != NULL) {
1038                 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1039         }
1040
1041         if (vm_backing_store_low && parent_task != NULL)
1042                 new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
1043
1044         new_task->task_volatile_objects = 0;
1045         new_task->task_nonvolatile_objects = 0;
1046         new_task->task_purgeable_disowning = FALSE;
1047         new_task->task_purgeable_disowned = FALSE;
1048
1049         ipc_task_enable(new_task);
1050
1051         lck_mtx_lock(&tasks_threads_lock);
1052         queue_enter(&tasks, new_task, task_t, tasks);
1053         tasks_count++;
1054         if (tasks_suspend_state) {
1055             task_suspend_internal(new_task);
1056         }
1057         lck_mtx_unlock(&tasks_threads_lock);
1058
1059         *child_task = new_task;
1060         return(KERN_SUCCESS);
1061 }
1062
1063 int task_dropped_imp_count = 0;
1064
1065 /*
1066  *      task_deallocate:
1067  *
1068  *      Drop a reference on a task.
1069  */
1070 void
1071 task_deallocate(
1072         task_t          task)
1073 {
1074         ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1075         uint32_t refs;
1076
1077         if (task == TASK_NULL)
1078             return;
1079
1080         refs = task_deallocate_internal(task);
1081
1082 #if IMPORTANCE_INHERITANCE
1083         if (refs > 1)
1084                 return;
1085
1086         if (refs == 1) {
1087                 /*
1088                  * If last ref potentially comes from the task's importance,
1089                  * disconnect it.  But more task refs may be added before
1090                  * that completes, so wait for the reference to go to zero
1091                  * naturually (it may happen on a recursive task_deallocate()
1092                  * from the ipc_importance_disconnect_task() call).
1093                  */
1094                 if (IIT_NULL != task->task_imp_base)
1095                         ipc_importance_disconnect_task(task);
1096                 return;
1097         }
1098 #else
1099         if (refs > 0)
1100                 return;
1101 #endif /* IMPORTANCE_INHERITANCE */
1102
1103         lck_mtx_lock(&tasks_threads_lock);
1104         queue_remove(&terminated_tasks, task, task_t, tasks);
1105         terminated_tasks_count--;
1106         lck_mtx_unlock(&tasks_threads_lock);
1107
1108         /*
1109          * remove the reference on atm descriptor
1110          */
1111         task_atm_reset(task);
1112
1113         /*
1114          * remove the reference on bank context
1115          */
1116         task_bank_reset(task);
1117
1118         if (task->task_io_stats)
1119                 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1120
1121         /*
1122          *      Give the machine dependent code a chance
1123          *      to perform cleanup before ripping apart
1124          *      the task.
1125          */
1126         machine_task_terminate(task);
1127
1128         ipc_task_terminate(task);
1129
1130         if (task->affinity_space)
1131                 task_affinity_deallocate(task);
1132
1133 #if MACH_ASSERT
1134         if (task->ledger != NULL &&
1135             task->map != NULL &&
1136             task->map->pmap != NULL &&
1137             task->map->pmap->ledger != NULL) {
1138                 assert(task->ledger == task->map->pmap->ledger);
1139         }
1140 #endif /* MACH_ASSERT */
1141
1142         vm_purgeable_disown(task);
1143         assert(task->task_purgeable_disowned);
1144         if (task->task_volatile_objects != 0 ||
1145             task->task_nonvolatile_objects != 0) {
1146                 panic("task_deallocate(%p): "
1147                       "volatile_objects=%d nonvolatile_objects=%d\n",
1148                       task,
1149                       task->task_volatile_objects,
1150                       task->task_nonvolatile_objects);
1151         }
1152
1153         vm_map_deallocate(task->map);
1154         is_release(task->itk_space);
1155
1156         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1157                            &interrupt_wakeups, &debit);
1158         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1159                            &platform_idle_wakeups, &debit);
1160
1161 #if defined(CONFIG_SCHED_MULTIQ)
1162         sched_group_destroy(task->sched_group);
1163 #endif
1164
1165         /* Accumulate statistics for dead tasks */
1166         lck_spin_lock(&dead_task_statistics_lock);
1167         dead_task_statistics.total_user_time += task->total_user_time;
1168         dead_task_statistics.total_system_time += task->total_system_time;
1169
1170         dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1171         dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1172
1173         dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1174         dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1175
1176         lck_spin_unlock(&dead_task_statistics_lock);
1177         lck_mtx_destroy(&task->lock, &task_lck_grp);
1178
1179         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1180             &debit)) {
1181                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1182                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1183         }
1184         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1185             &debit)) {
1186                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1187                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1188         }
1189         ledger_dereference(task->ledger);
1190         zinfo_task_free(task);
1191
1192 #if TASK_REFERENCE_LEAK_DEBUG
1193         btlog_remove_entries_for_element(task_ref_btlog, task);
1194 #endif
1195
1196 #if CONFIG_COALITIONS
1197         if (!task->coalition[COALITION_TYPE_RESOURCE])
1198                 panic("deallocating task was not a member of a resource coalition");
1199         task_release_coalitions(task);
1200 #endif /* CONFIG_COALITIONS */
1201
1202         bzero(task->coalition, sizeof(task->coalition));
1203
1204 #if MACH_BSD
1205         /* clean up collected information since last reference to task is gone */
1206         if (task->corpse_info) {
1207                 task_crashinfo_destroy(task->corpse_info);
1208                 task->corpse_info = NULL;
1209         }
1210 #endif
1211
1212         zfree(task_zone, task);
1213 }
1214
1215 /*
1216  *      task_name_deallocate:
1217  *
1218  *      Drop a reference on a task name.
1219  */
1220 void
1221 task_name_deallocate(
1222         task_name_t             task_name)
1223 {
1224         return(task_deallocate((task_t)task_name));
1225 }
1226
1227 /*
1228  *      task_suspension_token_deallocate:
1229  *
1230  *      Drop a reference on a task suspension token.
1231  */
1232 void
1233 task_suspension_token_deallocate(
1234         task_suspension_token_t         token)
1235 {
1236         return(task_deallocate((task_t)token));
1237 }
1238
1239
1240 /*
1241  * task_collect_crash_info:
1242  *
1243  * collect crash info from bsd and mach based data
1244  */
1245 kern_return_t
1246 task_collect_crash_info(task_t task)
1247 {
1248         kern_return_t kr = KERN_SUCCESS;
1249
1250         kcdata_descriptor_t crash_data = NULL;
1251         kcdata_descriptor_t crash_data_release = NULL;
1252         mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1253         mach_vm_offset_t crash_data_user_ptr = 0;
1254
1255         if (!corpses_enabled()) {
1256                 return KERN_NOT_SUPPORTED;
1257         }
1258
1259         task_lock(task);
1260         assert(task->bsd_info != NULL);
1261         if (task->corpse_info == NULL && task->bsd_info != NULL) {
1262                 task_unlock(task);
1263                 /* map crash data memory in task's vm map */
1264                 kr = mach_vm_allocate(task->map, &crash_data_user_ptr, size, (VM_MAKE_TAG(VM_MEMORY_CORPSEINFO) | VM_FLAGS_ANYWHERE));
1265
1266                 if (kr != KERN_SUCCESS)
1267                         goto out_no_lock;
1268
1269                 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_user_ptr, size);
1270                 if (crash_data) {
1271                         task_lock(task);
1272                         crash_data_release = task->corpse_info;
1273                         task->corpse_info = crash_data;
1274                         task_unlock(task);
1275                         kr = KERN_SUCCESS;
1276                 } else {
1277                         /* if failed to create corpse info, free the mapping */
1278                         if (KERN_SUCCESS != mach_vm_deallocate(task->map, crash_data_user_ptr, size)) {
1279                                 printf("mach_vm_deallocate failed to clear corpse_data for pid %d.\n", task_pid(task));
1280                         }
1281                         kr = KERN_FAILURE;
1282                 }
1283
1284                 if (crash_data_release != NULL) {
1285                         task_crashinfo_destroy(crash_data_release);
1286                 }
1287         } else {
1288                 task_unlock(task);
1289         }
1290
1291 out_no_lock:
1292         return kr;
1293 }
1294
1295 /*
1296  * task_deliver_crash_notification:
1297  *
1298  * Makes outcall to registered host port for a corpse.
1299  */
1300 kern_return_t
1301 task_deliver_crash_notification(task_t task)
1302 {
1303         kcdata_descriptor_t crash_info = task->corpse_info;
1304         thread_t th_iter = NULL;
1305         kern_return_t kr = KERN_SUCCESS;
1306         wait_interrupt_t wsave;
1307         mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1308
1309         if (crash_info == NULL)
1310                 return KERN_FAILURE;
1311
1312         code[0] = crash_info->kcd_addr_begin;
1313         code[1] = crash_info->kcd_length;
1314
1315         task_lock(task);
1316         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1317         {
1318                 ipc_thread_reset(th_iter);
1319         }
1320         task_unlock(task);
1321
1322         wsave = thread_interrupt_level(THREAD_UNINT);
1323         kr = exception_triage(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX);
1324         if (kr != KERN_SUCCESS) {
1325                 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
1326         }
1327
1328         /*
1329          * crash reporting is done. Now release threads
1330          * for reaping by thread_terminate_daemon
1331          */
1332         task_lock(task);
1333         assert(task->active_thread_count == 0);
1334         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1335         {
1336                 thread_mtx_lock(th_iter);
1337                 assert(th_iter->inspection == TRUE);
1338                 th_iter->inspection = FALSE;
1339                 /* now that the corpse has been autopsied, dispose of the thread name */
1340                 uthread_cleanup_name(th_iter->uthread);
1341                 thread_mtx_unlock(th_iter);
1342         }
1343
1344         thread_terminate_crashed_threads();
1345         /* remove the pending corpse report flag */
1346         task_clear_corpse_pending_report(task);
1347
1348         task_unlock(task);
1349
1350         (void)thread_interrupt_level(wsave);
1351         task_terminate_internal(task);
1352
1353         return kr;
1354 }
1355
1356 /*
1357  *      task_terminate:
1358  *
1359  *      Terminate the specified task.  See comments on thread_terminate
1360  *      (kern/thread.c) about problems with terminating the "current task."
1361  */
1362
1363 kern_return_t
1364 task_terminate(
1365         task_t          task)
1366 {
1367         if (task == TASK_NULL)
1368                 return (KERN_INVALID_ARGUMENT);
1369
1370         if (task->bsd_info)
1371                 return (KERN_FAILURE);
1372
1373         return (task_terminate_internal(task));
1374 }
1375
1376 #if MACH_ASSERT
1377 extern int proc_pid(struct proc *);
1378 extern void proc_name_kdp(task_t t, char *buf, int size);
1379 #endif /* MACH_ASSERT */
1380
1381 #define VM_MAP_PARTIAL_REAP 0x54  /* 0x150 */
1382 static void
1383 __unused task_partial_reap(task_t task, __unused int pid)
1384 {
1385         unsigned int    reclaimed_resident = 0;
1386         unsigned int    reclaimed_compressed = 0;
1387         uint64_t        task_page_count;
1388
1389         task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1390
1391         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
1392                               pid, task_page_count, 0, 0, 0);
1393
1394         vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1395
1396         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
1397                               pid, reclaimed_resident, reclaimed_compressed, 0, 0);
1398 }
1399
1400 kern_return_t
1401 task_mark_corpse(task_t task)
1402 {
1403         kern_return_t kr = KERN_SUCCESS;
1404         thread_t self_thread;
1405         (void) self_thread;
1406         wait_interrupt_t wsave;
1407
1408         assert(task != kernel_task);
1409         assert(task == current_task());
1410         assert(!task_is_a_corpse(task));
1411
1412         kr = task_collect_crash_info(task);
1413         if (kr != KERN_SUCCESS) {
1414                 return kr;
1415         }
1416
1417         self_thread = current_thread();
1418
1419         wsave = thread_interrupt_level(THREAD_UNINT);
1420         task_lock(task);
1421
1422         task_set_corpse_pending_report(task);
1423         task_set_corpse(task);
1424
1425         kr = task_start_halt_locked(task, TRUE);
1426         assert(kr == KERN_SUCCESS);
1427         ipc_task_reset(task);
1428         ipc_task_enable(task);
1429
1430         task_unlock(task);
1431         /* terminate the ipc space */
1432         ipc_space_terminate(task->itk_space);
1433
1434         task_start_halt(task);
1435         thread_terminate_internal(self_thread);
1436         (void) thread_interrupt_level(wsave);
1437         assert(task->halting == TRUE);
1438         return kr;
1439 }
1440
1441 kern_return_t
1442 task_terminate_internal(
1443         task_t                  task)
1444 {
1445         thread_t                        thread, self;
1446         task_t                          self_task;
1447         boolean_t                       interrupt_save;
1448         int                             pid = 0;
1449
1450         assert(task != kernel_task);
1451
1452         self = current_thread();
1453         self_task = self->task;
1454
1455         /*
1456          *      Get the task locked and make sure that we are not racing
1457          *      with someone else trying to terminate us.
1458          */
1459         if (task == self_task)
1460                 task_lock(task);
1461         else
1462         if (task < self_task) {
1463                 task_lock(task);
1464                 task_lock(self_task);
1465         }
1466         else {
1467                 task_lock(self_task);
1468                 task_lock(task);
1469         }
1470
1471         if (!task->active) {
1472                 /*
1473                  *      Task is already being terminated.
1474                  *      Just return an error. If we are dying, this will
1475                  *      just get us to our AST special handler and that
1476                  *      will get us to finalize the termination of ourselves.
1477                  */
1478                 task_unlock(task);
1479                 if (self_task != task)
1480                         task_unlock(self_task);
1481
1482                 return (KERN_FAILURE);
1483         }
1484
1485         if (task_corpse_pending_report(task)) {
1486                 /*
1487                  *      Task is marked for reporting as corpse.
1488                  *      Just return an error. This will
1489                  *      just get us to our AST special handler and that
1490                  *      will get us to finish the path to death
1491                  */
1492                 task_unlock(task);
1493                 if (self_task != task)
1494                         task_unlock(self_task);
1495
1496                 return (KERN_FAILURE);
1497         }
1498
1499         if (self_task != task)
1500                 task_unlock(self_task);
1501
1502         /*
1503          * Make sure the current thread does not get aborted out of
1504          * the waits inside these operations.
1505          */
1506         interrupt_save = thread_interrupt_level(THREAD_UNINT);
1507
1508         /*
1509          *      Indicate that we want all the threads to stop executing
1510          *      at user space by holding the task (we would have held
1511          *      each thread independently in thread_terminate_internal -
1512          *      but this way we may be more likely to already find it
1513          *      held there).  Mark the task inactive, and prevent
1514          *      further task operations via the task port.
1515          */
1516         task_hold_locked(task);
1517         task->active = FALSE;
1518         ipc_task_disable(task);
1519
1520 #if CONFIG_TELEMETRY
1521         /*
1522          * Notify telemetry that this task is going away.
1523          */
1524         telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
1525 #endif
1526
1527         /*
1528          *      Terminate each thread in the task.
1529          */
1530         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1531                         thread_terminate_internal(thread);
1532         }
1533
1534 #ifdef MACH_BSD
1535         if (task->bsd_info != NULL) {
1536                 pid = proc_pid(task->bsd_info);
1537         }
1538 #endif /* MACH_BSD */
1539
1540         task_unlock(task);
1541
1542         proc_set_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE,
1543                              TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
1544
1545         /* Early object reap phase */
1546
1547 // PR-17045188: Revisit implementation
1548 //        task_partial_reap(task, pid);
1549
1550
1551         /*
1552          *      Destroy all synchronizers owned by the task.
1553          */
1554         task_synchronizer_destroy_all(task);
1555
1556         /*
1557          *      Destroy the IPC space, leaving just a reference for it.
1558          */
1559         ipc_space_terminate(task->itk_space);
1560
1561 #if 00
1562         /* if some ledgers go negative on tear-down again... */
1563         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1564                                          task_ledgers.phys_footprint);
1565         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1566                                          task_ledgers.internal);
1567         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1568                                          task_ledgers.internal_compressed);
1569         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1570                                          task_ledgers.iokit_mapped);
1571         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1572                                          task_ledgers.alternate_accounting);
1573         ledger_disable_panic_on_negative(task->map->pmap->ledger,
1574                                          task_ledgers.alternate_accounting_compressed);
1575 #endif
1576
1577         /*
1578          * If the current thread is a member of the task
1579          * being terminated, then the last reference to
1580          * the task will not be dropped until the thread
1581          * is finally reaped.  To avoid incurring the
1582          * expense of removing the address space regions
1583          * at reap time, we do it explictly here.
1584          */
1585
1586         vm_map_lock(task->map);
1587         vm_map_disable_hole_optimization(task->map);
1588         vm_map_unlock(task->map);
1589
1590         vm_map_remove(task->map,
1591                       task->map->min_offset,
1592                       task->map->max_offset,
1593                       /* no unnesting on final cleanup: */
1594                       VM_MAP_REMOVE_NO_UNNESTING);
1595
1596         /* release our shared region */
1597         vm_shared_region_set(task, NULL);
1598
1599
1600 #if MACH_ASSERT
1601         /*
1602          * Identify the pmap's process, in case the pmap ledgers drift
1603          * and we have to report it.
1604          */
1605         char procname[17];
1606         if (task->bsd_info) {
1607                 pid = proc_pid(task->bsd_info);
1608                 proc_name_kdp(task, procname, sizeof (procname));
1609         } else {
1610                 pid = 0;
1611                 strlcpy(procname, "<unknown>", sizeof (procname));
1612         }
1613         pmap_set_process(task->map->pmap, pid, procname);
1614 #endif /* MACH_ASSERT */
1615
1616         lck_mtx_lock(&tasks_threads_lock);
1617         queue_remove(&tasks, task, task_t, tasks);
1618         queue_enter(&terminated_tasks, task, task_t, tasks);
1619         tasks_count--;
1620         terminated_tasks_count++;
1621         lck_mtx_unlock(&tasks_threads_lock);
1622
1623         /*
1624          * We no longer need to guard against being aborted, so restore
1625          * the previous interruptible state.
1626          */
1627         thread_interrupt_level(interrupt_save);
1628
1629 #if KPERF
1630         /* force the task to release all ctrs */
1631         if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS)
1632                 kpc_force_all_ctrs(task, 0);
1633 #endif
1634
1635 #if CONFIG_COALITIONS
1636         /*
1637          * Leave our coalitions. (drop activation but not reference)
1638          */
1639         coalitions_remove_task(task);
1640 #endif
1641
1642         /*
1643          * Get rid of the task active reference on itself.
1644          */
1645         task_deallocate(task);
1646
1647         return (KERN_SUCCESS);
1648 }
1649
1650 void
1651 tasks_system_suspend(boolean_t suspend)
1652 {
1653         task_t task;
1654
1655         lck_mtx_lock(&tasks_threads_lock);
1656         assert(tasks_suspend_state != suspend);
1657         tasks_suspend_state = suspend;
1658         queue_iterate(&tasks, task, task_t, tasks) {
1659                 if (task == kernel_task) {
1660                         continue;
1661                 }
1662                 suspend ? task_suspend_internal(task) : task_resume_internal(task);
1663         }
1664         lck_mtx_unlock(&tasks_threads_lock);
1665 }
1666
1667 /*
1668  * task_start_halt:
1669  *
1670  *      Shut the current task down (except for the current thread) in
1671  *      preparation for dramatic changes to the task (probably exec).
1672  *      We hold the task and mark all other threads in the task for
1673  *      termination.
1674  */
1675 kern_return_t
1676 task_start_halt(task_t task)
1677 {
1678         kern_return_t kr = KERN_SUCCESS;
1679         task_lock(task);
1680         kr = task_start_halt_locked(task, FALSE);
1681         task_unlock(task);
1682         return kr;
1683 }
1684
1685 static kern_return_t
1686 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
1687 {
1688         thread_t thread, self;
1689         uint64_t dispatchqueue_offset;
1690
1691         assert(task != kernel_task);
1692
1693         self = current_thread();
1694
1695         if (task != self->task)
1696                 return (KERN_INVALID_ARGUMENT);
1697
1698         if (task->halting || !task->active || !self->active) {
1699                 /*
1700                  * Task or current thread is already being terminated.
1701                  * Hurry up and return out of the current kernel context
1702                  * so that we run our AST special handler to terminate
1703                  * ourselves.
1704                  */
1705                 return (KERN_FAILURE);
1706         }
1707
1708         task->halting = TRUE;
1709
1710         /*
1711          * Mark all the threads to keep them from starting any more
1712          * user-level execution.  The thread_terminate_internal code
1713          * would do this on a thread by thread basis anyway, but this
1714          * gives us a better chance of not having to wait there.
1715          */
1716         task_hold_locked(task);
1717         dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
1718
1719         /*
1720          * Terminate all the other threads in the task.
1721          */
1722         queue_iterate(&task->threads, thread, thread_t, task_threads)
1723         {
1724                 if (should_mark_corpse) {
1725                         thread_mtx_lock(thread);
1726                         thread->inspection = TRUE;
1727                         thread_mtx_unlock(thread);
1728                 }
1729                 if (thread != self)
1730                         thread_terminate_internal(thread);
1731         }
1732         task->dispatchqueue_offset = dispatchqueue_offset;
1733
1734         task_release_locked(task);
1735
1736         return KERN_SUCCESS;
1737 }
1738
1739
1740 /*
1741  * task_complete_halt:
1742  *
1743  *      Complete task halt by waiting for threads to terminate, then clean
1744  *      up task resources (VM, port namespace, etc...) and then let the
1745  *      current thread go in the (practically empty) task context.
1746  */
1747 void
1748 task_complete_halt(task_t task)
1749 {
1750         task_lock(task);
1751         assert(task->halting);
1752         assert(task == current_task());
1753
1754         /*
1755          *      Wait for the other threads to get shut down.
1756          *      When the last other thread is reaped, we'll be
1757          *      woken up.
1758          */
1759         if (task->thread_count > 1) {
1760                 assert_wait((event_t)&task->halting, THREAD_UNINT);
1761                 task_unlock(task);
1762                 thread_block(THREAD_CONTINUE_NULL);
1763         } else {
1764                 task_unlock(task);
1765         }
1766
1767         /*
1768          *      Give the machine dependent code a chance
1769          *      to perform cleanup of task-level resources
1770          *      associated with the current thread before
1771          *      ripping apart the task.
1772          */
1773         machine_task_terminate(task);
1774
1775         /*
1776          *      Destroy all synchronizers owned by the task.
1777          */
1778         task_synchronizer_destroy_all(task);
1779
1780         /*
1781          *      Destroy the contents of the IPC space, leaving just
1782          *      a reference for it.
1783          */
1784         ipc_space_clean(task->itk_space);
1785
1786         /*
1787          * Clean out the address space, as we are going to be
1788          * getting a new one.
1789          */
1790         vm_map_remove(task->map, task->map->min_offset,
1791                       task->map->max_offset,
1792                       /* no unnesting on final cleanup: */
1793                       VM_MAP_REMOVE_NO_UNNESTING);
1794
1795         task->halting = FALSE;
1796 }
1797
1798 /*
1799  *      task_hold_locked:
1800  *
1801  *      Suspend execution of the specified task.
1802  *      This is a recursive-style suspension of the task, a count of
1803  *      suspends is maintained.
1804  *
1805  *      CONDITIONS: the task is locked and active.
1806  */
1807 void
1808 task_hold_locked(
1809         register task_t         task)
1810 {
1811         register thread_t       thread;
1812
1813         assert(task->active);
1814
1815         if (task->suspend_count++ > 0)
1816                 return;
1817
1818         /*
1819          *      Iterate through all the threads and hold them.
1820          */
1821         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1822                 thread_mtx_lock(thread);
1823                 thread_hold(thread);
1824                 thread_mtx_unlock(thread);
1825         }
1826 }
1827
1828 /*
1829  *      task_hold:
1830  *
1831  *      Same as the internal routine above, except that is must lock
1832  *      and verify that the task is active.  This differs from task_suspend
1833  *      in that it places a kernel hold on the task rather than just a
1834  *      user-level hold.  This keeps users from over resuming and setting
1835  *      it running out from under the kernel.
1836  *
1837  *      CONDITIONS: the caller holds a reference on the task
1838  */
1839 kern_return_t
1840 task_hold(
1841         register task_t         task)
1842 {
1843         if (task == TASK_NULL)
1844                 return (KERN_INVALID_ARGUMENT);
1845
1846         task_lock(task);
1847
1848         if (!task->active) {
1849                 task_unlock(task);
1850
1851                 return (KERN_FAILURE);
1852         }
1853
1854         task_hold_locked(task);
1855         task_unlock(task);
1856
1857         return (KERN_SUCCESS);
1858 }
1859
1860 kern_return_t
1861 task_wait(
1862                 task_t          task,
1863                 boolean_t       until_not_runnable)
1864 {
1865         if (task == TASK_NULL)
1866                 return (KERN_INVALID_ARGUMENT);
1867
1868         task_lock(task);
1869
1870         if (!task->active) {
1871                 task_unlock(task);
1872
1873                 return (KERN_FAILURE);
1874         }
1875
1876         task_wait_locked(task, until_not_runnable);
1877         task_unlock(task);
1878
1879         return (KERN_SUCCESS);
1880 }
1881
1882 /*
1883  *      task_wait_locked:
1884  *
1885  *      Wait for all threads in task to stop.
1886  *
1887  * Conditions:
1888  *      Called with task locked, active, and held.
1889  */
1890 void
1891 task_wait_locked(
1892         register task_t         task,
1893         boolean_t               until_not_runnable)
1894 {
1895         register thread_t       thread, self;
1896
1897         assert(task->active);
1898         assert(task->suspend_count > 0);
1899
1900         self = current_thread();
1901
1902         /*
1903          *      Iterate through all the threads and wait for them to
1904          *      stop.  Do not wait for the current thread if it is within
1905          *      the task.
1906          */
1907         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1908                 if (thread != self)
1909                         thread_wait(thread, until_not_runnable);
1910         }
1911 }
1912
1913 /*
1914  *      task_release_locked:
1915  *
1916  *      Release a kernel hold on a task.
1917  *
1918  *      CONDITIONS: the task is locked and active
1919  */
1920 void
1921 task_release_locked(
1922         register task_t         task)
1923 {
1924         register thread_t       thread;
1925
1926         assert(task->active);
1927         assert(task->suspend_count > 0);
1928
1929         if (--task->suspend_count > 0)
1930                 return;
1931
1932         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1933                 thread_mtx_lock(thread);
1934                 thread_release(thread);
1935                 thread_mtx_unlock(thread);
1936         }
1937 }
1938
1939 /*
1940  *      task_release:
1941  *
1942  *      Same as the internal routine above, except that it must lock
1943  *      and verify that the task is active.
1944  *
1945  *      CONDITIONS: The caller holds a reference to the task
1946  */
1947 kern_return_t
1948 task_release(
1949         task_t          task)
1950 {
1951         if (task == TASK_NULL)
1952                 return (KERN_INVALID_ARGUMENT);
1953
1954         task_lock(task);
1955
1956         if (!task->active) {
1957                 task_unlock(task);
1958
1959                 return (KERN_FAILURE);
1960         }
1961
1962         task_release_locked(task);
1963         task_unlock(task);
1964
1965         return (KERN_SUCCESS);
1966 }
1967
1968 kern_return_t
1969 task_threads(
1970         task_t                                  task,
1971         thread_act_array_t              *threads_out,
1972         mach_msg_type_number_t  *count)
1973 {
1974         mach_msg_type_number_t  actual;
1975         thread_t                                *thread_list;
1976         thread_t                                thread;
1977         vm_size_t                               size, size_needed;
1978         void                                    *addr;
1979         unsigned int                    i, j;
1980
1981         if (task == TASK_NULL)
1982                 return (KERN_INVALID_ARGUMENT);
1983
1984         size = 0; addr = NULL;
1985
1986         for (;;) {
1987                 task_lock(task);
1988                 if (!task->active) {
1989                         task_unlock(task);
1990
1991                         if (size != 0)
1992                                 kfree(addr, size);
1993
1994                         return (KERN_FAILURE);
1995                 }
1996
1997                 actual = task->thread_count;
1998
1999                 /* do we have the memory we need? */
2000                 size_needed = actual * sizeof (mach_port_t);
2001                 if (size_needed <= size)
2002                         break;
2003
2004                 /* unlock the task and allocate more memory */
2005                 task_unlock(task);
2006
2007                 if (size != 0)
2008                         kfree(addr, size);
2009
2010                 assert(size_needed > 0);
2011                 size = size_needed;
2012
2013                 addr = kalloc(size);
2014                 if (addr == 0)
2015                         return (KERN_RESOURCE_SHORTAGE);
2016         }
2017
2018         /* OK, have memory and the task is locked & active */
2019         thread_list = (thread_t *)addr;
2020
2021         i = j = 0;
2022
2023         for (thread = (thread_t)queue_first(&task->threads); i < actual;
2024                                 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
2025                 thread_reference_internal(thread);
2026                 thread_list[j++] = thread;
2027         }
2028
2029         assert(queue_end(&task->threads, (queue_entry_t)thread));
2030
2031         actual = j;
2032         size_needed = actual * sizeof (mach_port_t);
2033
2034         /* can unlock task now that we've got the thread refs */
2035         task_unlock(task);
2036
2037         if (actual == 0) {
2038                 /* no threads, so return null pointer and deallocate memory */
2039
2040                 *threads_out = NULL;
2041                 *count = 0;
2042
2043                 if (size != 0)
2044                         kfree(addr, size);
2045         }
2046         else {
2047                 /* if we allocated too much, must copy */
2048
2049                 if (size_needed < size) {
2050                         void *newaddr;
2051
2052                         newaddr = kalloc(size_needed);
2053                         if (newaddr == 0) {
2054                                 for (i = 0; i < actual; ++i)
2055                                         thread_deallocate(thread_list[i]);
2056                                 kfree(addr, size);
2057                                 return (KERN_RESOURCE_SHORTAGE);
2058                         }
2059
2060                         bcopy(addr, newaddr, size_needed);
2061                         kfree(addr, size);
2062                         thread_list = (thread_t *)newaddr;
2063                 }
2064
2065                 *threads_out = thread_list;
2066                 *count = actual;
2067
2068                 /* do the conversion that Mig should handle */
2069
2070                 for (i = 0; i < actual; ++i)
2071                         ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
2072         }
2073
2074         return (KERN_SUCCESS);
2075 }
2076
2077 #define TASK_HOLD_NORMAL        0
2078 #define TASK_HOLD_PIDSUSPEND    1
2079 #define TASK_HOLD_LEGACY        2
2080 #define TASK_HOLD_LEGACY_ALL    3
2081
2082 static kern_return_t
2083 place_task_hold    (
2084         register task_t task,
2085         int mode)
2086 {
2087         if (!task->active) {
2088                 return (KERN_FAILURE);
2089         }
2090
2091         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2092             MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
2093             task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2094             task->user_stop_count, task->user_stop_count + 1, 0);
2095
2096 #if MACH_ASSERT
2097         current_task()->suspends_outstanding++;
2098 #endif
2099
2100         if (mode == TASK_HOLD_LEGACY)
2101                 task->legacy_stop_count++;
2102
2103         if (task->user_stop_count++ > 0) {
2104                 /*
2105                  *      If the stop count was positive, the task is
2106                  *      already stopped and we can exit.
2107                  */
2108                 return (KERN_SUCCESS);
2109         }
2110
2111         /*
2112          * Put a kernel-level hold on the threads in the task (all
2113          * user-level task suspensions added together represent a
2114          * single kernel-level hold).  We then wait for the threads
2115          * to stop executing user code.
2116          */
2117         task_hold_locked(task);
2118         task_wait_locked(task, FALSE);
2119
2120         return (KERN_SUCCESS);
2121 }
2122
2123 static kern_return_t
2124 release_task_hold    (
2125         register task_t         task,
2126         int                     mode)
2127 {
2128         register boolean_t release = FALSE;
2129
2130         if (!task->active) {
2131                 return (KERN_FAILURE);
2132         }
2133
2134         if (mode == TASK_HOLD_PIDSUSPEND) {
2135             if (task->pidsuspended == FALSE) {
2136                     return (KERN_FAILURE);
2137             }
2138             task->pidsuspended = FALSE;
2139         }
2140
2141         if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
2142
2143                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2144                     MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
2145                     task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2146                     task->user_stop_count, mode, task->legacy_stop_count);
2147
2148 #if MACH_ASSERT
2149                 /*
2150                  * This is obviously not robust; if we suspend one task and then resume a different one,
2151                  * we'll fly under the radar. This is only meant to catch the common case of a crashed
2152                  * or buggy suspender.
2153                  */
2154                 current_task()->suspends_outstanding--;
2155 #endif
2156
2157                 if (mode == TASK_HOLD_LEGACY_ALL) {
2158                         if (task->legacy_stop_count >= task->user_stop_count) {
2159                                 task->user_stop_count = 0;
2160                                 release = TRUE;
2161                         } else {
2162                                 task->user_stop_count -= task->legacy_stop_count;
2163                         }
2164                         task->legacy_stop_count = 0;
2165                 } else {
2166                         if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
2167                                 task->legacy_stop_count--;
2168                         if (--task->user_stop_count == 0)
2169                                 release = TRUE;
2170                 }
2171         }
2172         else {
2173                 return (KERN_FAILURE);
2174         }
2175
2176         /*
2177          *      Release the task if necessary.
2178          */
2179         if (release)
2180                 task_release_locked(task);
2181
2182     return (KERN_SUCCESS);
2183 }
2184
2185
2186 /*
2187  *      task_suspend:
2188  *
2189  *      Implement an (old-fashioned) user-level suspension on a task.
2190  *
2191  *      Because the user isn't expecting to have to manage a suspension
2192  *      token, we'll track it for him in the kernel in the form of a naked
2193  *      send right to the task's resume port.  All such send rights
2194  *      account for a single suspension against the task (unlike task_suspend2()
2195  *      where each caller gets a unique suspension count represented by a
2196  *      unique send-once right).
2197  *
2198  * Conditions:
2199  *      The caller holds a reference to the task
2200  */
2201 kern_return_t
2202 task_suspend(
2203         register task_t         task)
2204 {
2205         kern_return_t                   kr;
2206         mach_port_t                     port, send, old_notify;
2207         mach_port_name_t                name;
2208
2209         if (task == TASK_NULL || task == kernel_task)
2210                 return (KERN_INVALID_ARGUMENT);
2211
2212         task_lock(task);
2213
2214         /*
2215          * Claim a send right on the task resume port, and request a no-senders
2216          * notification on that port (if none outstanding).
2217          */
2218         if (task->itk_resume == IP_NULL) {
2219                 task->itk_resume = ipc_port_alloc_kernel();
2220                 if (!IP_VALID(task->itk_resume))
2221                         panic("failed to create resume port");
2222                 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
2223         }
2224
2225         port = task->itk_resume;
2226         ip_lock(port);
2227         assert(ip_active(port));
2228
2229         send = ipc_port_make_send_locked(port);
2230         assert(IP_VALID(send));
2231
2232         if (port->ip_nsrequest == IP_NULL) {
2233                 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2234                 assert(old_notify == IP_NULL);
2235                 /* port unlocked */
2236         } else {
2237                 ip_unlock(port);
2238         }
2239
2240         /*
2241          * place a legacy hold on the task.
2242          */
2243         kr = place_task_hold(task, TASK_HOLD_LEGACY);
2244         if (kr != KERN_SUCCESS) {
2245                 task_unlock(task);
2246                 ipc_port_release_send(send);
2247                 return kr;
2248         }
2249
2250         task_unlock(task);
2251
2252         /*
2253          * Copyout the send right into the calling task's IPC space.  It won't know it is there,
2254          * but we'll look it up when calling a traditional resume.  Any IPC operations that
2255          * deallocate the send right will auto-release the suspension.
2256          */
2257         if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
2258                 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
2259                 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
2260                                 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2261                                 task_pid(task), kr);
2262                 return (kr);
2263         }
2264
2265         return (kr);
2266 }
2267
2268 /*
2269  *      task_resume:
2270  *              Release a user hold on a task.
2271  *
2272  * Conditions:
2273  *              The caller holds a reference to the task
2274  */
2275 kern_return_t
2276 task_resume(
2277         register task_t task)
2278 {
2279         kern_return_t    kr;
2280         mach_port_name_t resume_port_name;
2281         ipc_entry_t              resume_port_entry;
2282         ipc_space_t              space = current_task()->itk_space;
2283
2284         if (task == TASK_NULL || task == kernel_task )
2285                 return (KERN_INVALID_ARGUMENT);
2286
2287         /* release a legacy task hold */
2288         task_lock(task);
2289         kr = release_task_hold(task, TASK_HOLD_LEGACY);
2290         task_unlock(task);
2291
2292         is_write_lock(space);
2293         if (is_active(space) && IP_VALID(task->itk_resume) &&
2294             ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
2295                 /*
2296                  * We found a suspension token in the caller's IPC space. Release a send right to indicate that
2297                  * we are holding one less legacy hold on the task from this caller.  If the release failed,
2298                  * go ahead and drop all the rights, as someone either already released our holds or the task
2299                  * is gone.
2300                  */
2301                 if (kr == KERN_SUCCESS)
2302                         ipc_right_dealloc(space, resume_port_name, resume_port_entry);
2303                 else
2304                         ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
2305                 /* space unlocked */
2306         } else {
2307                 is_write_unlock(space);
2308                 if (kr == KERN_SUCCESS)
2309                         printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
2310                                proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2311                                task_pid(task));
2312         }
2313
2314         return kr;
2315 }
2316
2317 /*
2318  * Suspend the target task.
2319  * Making/holding a token/reference/port is the callers responsibility.
2320  */
2321 kern_return_t
2322 task_suspend_internal(task_t task)
2323 {
2324         kern_return_t    kr;
2325
2326         if (task == TASK_NULL || task == kernel_task)
2327                 return (KERN_INVALID_ARGUMENT);
2328
2329         task_lock(task);
2330         kr = place_task_hold(task, TASK_HOLD_NORMAL);
2331         task_unlock(task);
2332         return (kr);
2333 }
2334
2335 /*
2336  * Suspend the target task, and return a suspension token. The token
2337  * represents a reference on the suspended task.
2338  */
2339 kern_return_t
2340 task_suspend2(
2341         register task_t                 task,
2342         task_suspension_token_t *suspend_token)
2343 {
2344         kern_return_t    kr;
2345
2346         kr = task_suspend_internal(task);
2347         if (kr != KERN_SUCCESS) {
2348                 *suspend_token = TASK_NULL;
2349                 return (kr);
2350         }
2351
2352         /*
2353          * Take a reference on the target task and return that to the caller
2354          * as a "suspension token," which can be converted into an SO right to
2355          * the now-suspended task's resume port.
2356          */
2357         task_reference_internal(task);
2358         *suspend_token = task;
2359
2360         return (KERN_SUCCESS);
2361 }
2362
2363 /*
2364  * Resume the task
2365  * (reference/token/port management is caller's responsibility).
2366  */
2367 kern_return_t
2368 task_resume_internal(
2369         register task_suspension_token_t                task)
2370 {
2371         kern_return_t kr;
2372
2373         if (task == TASK_NULL || task == kernel_task)
2374                 return (KERN_INVALID_ARGUMENT);
2375
2376         task_lock(task);
2377         kr = release_task_hold(task, TASK_HOLD_NORMAL);
2378         task_unlock(task);
2379         return (kr);
2380 }
2381
2382 /*
2383  * Resume the task using a suspension token. Consumes the token's ref.
2384  */
2385 kern_return_t
2386 task_resume2(
2387         register task_suspension_token_t                task)
2388 {
2389         kern_return_t kr;
2390
2391         kr = task_resume_internal(task);
2392         task_suspension_token_deallocate(task);
2393
2394         return (kr);
2395 }
2396
2397 boolean_t
2398 task_suspension_notify(mach_msg_header_t *request_header)
2399 {
2400         ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
2401         task_t task = convert_port_to_task_suspension_token(port);
2402         mach_msg_type_number_t not_count;
2403
2404         if (task == TASK_NULL || task == kernel_task)
2405                 return TRUE;  /* nothing to do */
2406
2407         switch (request_header->msgh_id) {
2408
2409         case MACH_NOTIFY_SEND_ONCE:
2410                 /* release the hold held by this specific send-once right */
2411                 task_lock(task);
2412                 release_task_hold(task, TASK_HOLD_NORMAL);
2413                 task_unlock(task);
2414                 break;
2415
2416         case MACH_NOTIFY_NO_SENDERS:
2417                 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
2418
2419                 task_lock(task);
2420                 ip_lock(port);
2421                 if (port->ip_mscount == not_count) {
2422
2423                         /* release all the [remaining] outstanding legacy holds */
2424                         assert(port->ip_nsrequest == IP_NULL);
2425                         ip_unlock(port);
2426                         release_task_hold(task, TASK_HOLD_LEGACY_ALL);
2427                         task_unlock(task);
2428
2429                 } else if (port->ip_nsrequest == IP_NULL) {
2430                         ipc_port_t old_notify;
2431
2432                         task_unlock(task);
2433                         /* new send rights, re-arm notification at current make-send count */
2434                         ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2435                         assert(old_notify == IP_NULL);
2436                         /* port unlocked */
2437                 } else {
2438                         ip_unlock(port);
2439                         task_unlock(task);
2440                 }
2441                 break;
2442
2443         default:
2444                 break;
2445         }
2446
2447         task_suspension_token_deallocate(task); /* drop token reference */
2448         return TRUE;
2449 }
2450
2451 kern_return_t
2452 task_pidsuspend_locked(task_t task)
2453 {
2454         kern_return_t kr;
2455
2456         if (task->pidsuspended) {
2457                 kr = KERN_FAILURE;
2458                 goto out;
2459         }
2460
2461         task->pidsuspended = TRUE;
2462
2463         kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
2464         if (kr != KERN_SUCCESS) {
2465                 task->pidsuspended = FALSE;
2466         }
2467 out:
2468         return(kr);
2469 }
2470
2471
2472 /*
2473  *      task_pidsuspend:
2474  *
2475  *      Suspends a task by placing a hold on its threads.
2476  *
2477  * Conditions:
2478  *      The caller holds a reference to the task
2479  */
2480 kern_return_t
2481 task_pidsuspend(
2482         register task_t         task)
2483 {
2484         kern_return_t    kr;
2485
2486         if (task == TASK_NULL || task == kernel_task)
2487                 return (KERN_INVALID_ARGUMENT);
2488
2489         task_lock(task);
2490
2491         kr = task_pidsuspend_locked(task);
2492
2493         task_unlock(task);
2494
2495         return (kr);
2496 }
2497
2498 /* If enabled, we bring all the frozen pages back in prior to resumption; otherwise, they're faulted back in on demand */
2499 #define THAW_ON_RESUME 1
2500
2501 /*
2502  *      task_pidresume:
2503  *              Resumes a previously suspended task.
2504  *
2505  * Conditions:
2506  *              The caller holds a reference to the task
2507  */
2508 kern_return_t
2509 task_pidresume(
2510         register task_t task)
2511 {
2512         kern_return_t    kr;
2513
2514         if (task == TASK_NULL || task == kernel_task)
2515                 return (KERN_INVALID_ARGUMENT);
2516
2517         task_lock(task);
2518
2519 #if (CONFIG_FREEZE && THAW_ON_RESUME)
2520
2521         while (task->changing_freeze_state) {
2522
2523                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2524                 task_unlock(task);
2525                 thread_block(THREAD_CONTINUE_NULL);
2526
2527                 task_lock(task);
2528         }
2529         task->changing_freeze_state = TRUE;
2530 #endif
2531
2532         kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
2533
2534         task_unlock(task);
2535
2536 #if (CONFIG_FREEZE && THAW_ON_RESUME)
2537         if ((kr == KERN_SUCCESS) && (task->frozen == TRUE)) {
2538
2539                 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2540
2541                         kr = KERN_SUCCESS;
2542                 } else {
2543
2544                         kr = vm_map_thaw(task->map);
2545                 }
2546         }
2547         task_lock(task);
2548
2549         if (kr == KERN_SUCCESS)
2550                 task->frozen = FALSE;
2551         task->changing_freeze_state = FALSE;
2552         thread_wakeup(&task->changing_freeze_state);
2553
2554         task_unlock(task);
2555 #endif
2556
2557         return (kr);
2558 }
2559
2560 #if CONFIG_FREEZE
2561
2562 /*
2563  *      task_freeze:
2564  *
2565  *      Freeze a task.
2566  *
2567  * Conditions:
2568  *      The caller holds a reference to the task
2569  */
2570 extern void             vm_wake_compactor_swapper();
2571 extern queue_head_t     c_swapout_list_head;
2572
2573 kern_return_t
2574 task_freeze(
2575         register task_t    task,
2576         uint32_t           *purgeable_count,
2577         uint32_t           *wired_count,
2578         uint32_t           *clean_count,
2579         uint32_t           *dirty_count,
2580         uint32_t           dirty_budget,
2581         boolean_t          *shared,
2582         boolean_t          walk_only)
2583 {
2584         kern_return_t kr;
2585
2586         if (task == TASK_NULL || task == kernel_task)
2587                 return (KERN_INVALID_ARGUMENT);
2588
2589         task_lock(task);
2590
2591         while (task->changing_freeze_state) {
2592
2593                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2594                 task_unlock(task);
2595                 thread_block(THREAD_CONTINUE_NULL);
2596
2597                 task_lock(task);
2598         }
2599         if (task->frozen) {
2600                 task_unlock(task);
2601                 return (KERN_FAILURE);
2602         }
2603         task->changing_freeze_state = TRUE;
2604
2605         task_unlock(task);
2606
2607         if (walk_only) {
2608                 kr = vm_map_freeze_walk(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2609         } else {
2610                 kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2611         }
2612
2613         task_lock(task);
2614
2615         if (walk_only == FALSE && kr == KERN_SUCCESS)
2616                 task->frozen = TRUE;
2617         task->changing_freeze_state = FALSE;
2618         thread_wakeup(&task->changing_freeze_state);
2619
2620         task_unlock(task);
2621
2622         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2623                 vm_wake_compactor_swapper();
2624                 /*
2625                  * We do an explicit wakeup of the swapout thread here
2626                  * because the compact_and_swap routines don't have
2627                  * knowledge about these kind of "per-task packed c_segs"
2628                  * and so will not be evaluating whether we need to do
2629                  * a wakeup there.
2630                  */
2631                 thread_wakeup((event_t)&c_swapout_list_head);
2632         }
2633
2634         return (kr);
2635 }
2636
2637 /*
2638  *      task_thaw:
2639  *
2640  *      Thaw a currently frozen task.
2641  *
2642  * Conditions:
2643  *      The caller holds a reference to the task
2644  */
2645 kern_return_t
2646 task_thaw(
2647         register task_t         task)
2648 {
2649         kern_return_t kr;
2650
2651         if (task == TASK_NULL || task == kernel_task)
2652                 return (KERN_INVALID_ARGUMENT);
2653
2654         task_lock(task);
2655
2656         while (task->changing_freeze_state) {
2657
2658                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2659                 task_unlock(task);
2660                 thread_block(THREAD_CONTINUE_NULL);
2661
2662                 task_lock(task);
2663         }
2664         if (!task->frozen) {
2665                 task_unlock(task);
2666                 return (KERN_FAILURE);
2667         }
2668         task->changing_freeze_state = TRUE;
2669
2670         if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) {
2671                 task_unlock(task);
2672
2673                 kr = vm_map_thaw(task->map);
2674
2675                 task_lock(task);
2676
2677                 if (kr == KERN_SUCCESS)
2678                         task->frozen = FALSE;
2679         } else {
2680                 task->frozen = FALSE;
2681                 kr = KERN_SUCCESS;
2682         }
2683
2684         task->changing_freeze_state = FALSE;
2685         thread_wakeup(&task->changing_freeze_state);
2686
2687         task_unlock(task);
2688
2689         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2690                 vm_wake_compactor_swapper();
2691         }
2692
2693         return (kr);
2694 }
2695
2696 #endif /* CONFIG_FREEZE */
2697
2698 kern_return_t
2699 host_security_set_task_token(
2700         host_security_t  host_security,
2701         task_t           task,
2702         security_token_t sec_token,
2703         audit_token_t    audit_token,
2704         host_priv_t      host_priv)
2705 {
2706         ipc_port_t       host_port;
2707         kern_return_t    kr;
2708
2709         if (task == TASK_NULL)
2710                 return(KERN_INVALID_ARGUMENT);
2711
2712         if (host_security == HOST_NULL)
2713                 return(KERN_INVALID_SECURITY);
2714
2715         task_lock(task);
2716         task->sec_token = sec_token;
2717         task->audit_token = audit_token;
2718
2719         task_unlock(task);
2720
2721         if (host_priv != HOST_PRIV_NULL) {
2722                 kr = host_get_host_priv_port(host_priv, &host_port);
2723         } else {
2724                 kr = host_get_host_port(host_priv_self(), &host_port);
2725         }
2726         assert(kr == KERN_SUCCESS);
2727         kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
2728         return(kr);
2729 }
2730
2731 kern_return_t
2732 task_send_trace_memory(
2733         task_t        target_task,
2734         __unused uint32_t pid,
2735         __unused uint64_t uniqueid)
2736 {
2737         kern_return_t kr = KERN_INVALID_ARGUMENT;
2738         if (target_task == TASK_NULL)
2739                 return (KERN_INVALID_ARGUMENT);
2740
2741 #if CONFIG_ATM
2742         kr = atm_send_proc_inspect_notification(target_task,
2743                                   pid,
2744                                   uniqueid);
2745
2746 #endif
2747         return (kr);
2748 }
2749 /*
2750  * This routine was added, pretty much exclusively, for registering the
2751  * RPC glue vector for in-kernel short circuited tasks.  Rather than
2752  * removing it completely, I have only disabled that feature (which was
2753  * the only feature at the time).  It just appears that we are going to
2754  * want to add some user data to tasks in the future (i.e. bsd info,
2755  * task names, etc...), so I left it in the formal task interface.
2756  */
2757 kern_return_t
2758 task_set_info(
2759         task_t          task,
2760         task_flavor_t   flavor,
2761         __unused task_info_t    task_info_in,           /* pointer to IN array */
2762         __unused mach_msg_type_number_t task_info_count)
2763 {
2764         if (task == TASK_NULL)
2765                 return(KERN_INVALID_ARGUMENT);
2766
2767         switch (flavor) {
2768
2769 #if CONFIG_ATM
2770                 case TASK_TRACE_MEMORY_INFO:
2771                 {
2772                         if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
2773                                 return (KERN_INVALID_ARGUMENT);
2774
2775                         assert(task_info_in != NULL);
2776                         task_trace_memory_info_t mem_info;
2777                         mem_info = (task_trace_memory_info_t) task_info_in;
2778                         kern_return_t kr = atm_register_trace_memory(task,
2779                                                 mem_info->user_memory_address,
2780                                                 mem_info->buffer_size);
2781                         return kr;
2782                         break;
2783                 }
2784
2785 #endif
2786             default:
2787                 return (KERN_INVALID_ARGUMENT);
2788         }
2789         return (KERN_SUCCESS);
2790 }
2791
2792 int radar_20146450 = 1;
2793 kern_return_t
2794 task_info(
2795         task_t                  task,
2796         task_flavor_t           flavor,
2797         task_info_t             task_info_out,
2798         mach_msg_type_number_t  *task_info_count)
2799 {
2800         kern_return_t error = KERN_SUCCESS;
2801
2802         if (task == TASK_NULL)
2803                 return (KERN_INVALID_ARGUMENT);
2804
2805         task_lock(task);
2806
2807         if ((task != current_task()) && (!task->active)) {
2808                 task_unlock(task);
2809                 return (KERN_INVALID_ARGUMENT);
2810         }
2811
2812         switch (flavor) {
2813
2814         case TASK_BASIC_INFO_32:
2815         case TASK_BASIC2_INFO_32:
2816         {
2817                 task_basic_info_32_t    basic_info;
2818                 vm_map_t                                map;
2819                 clock_sec_t                             secs;
2820                 clock_usec_t                    usecs;
2821
2822                 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
2823                     error = KERN_INVALID_ARGUMENT;
2824                     break;
2825                 }
2826
2827                 basic_info = (task_basic_info_32_t)task_info_out;
2828
2829                 map = (task == kernel_task)? kernel_map: task->map;
2830                 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
2831                 if (flavor == TASK_BASIC2_INFO_32) {
2832                         /*
2833                          * The "BASIC2" flavor gets the maximum resident
2834                          * size instead of the current resident size...
2835                          */
2836                         basic_info->resident_size = pmap_resident_max(map->pmap);
2837                 } else {
2838                         basic_info->resident_size = pmap_resident_count(map->pmap);
2839                 }
2840                 basic_info->resident_size *= PAGE_SIZE;
2841
2842                 basic_info->policy = ((task != kernel_task)?
2843                                                                                   POLICY_TIMESHARE: POLICY_RR);
2844                 basic_info->suspend_count = task->user_stop_count;
2845
2846                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2847                 basic_info->user_time.seconds =
2848                         (typeof(basic_info->user_time.seconds))secs;
2849                 basic_info->user_time.microseconds = usecs;
2850
2851                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2852                 basic_info->system_time.seconds =
2853                         (typeof(basic_info->system_time.seconds))secs;
2854                 basic_info->system_time.microseconds = usecs;
2855
2856                 *task_info_count = TASK_BASIC_INFO_32_COUNT;
2857                 break;
2858         }
2859
2860         case TASK_BASIC_INFO_64:
2861         {
2862                 task_basic_info_64_t    basic_info;
2863                 vm_map_t                                map;
2864                 clock_sec_t                             secs;
2865                 clock_usec_t                    usecs;
2866
2867                 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
2868                     error = KERN_INVALID_ARGUMENT;
2869                     break;
2870                 }
2871
2872                 basic_info = (task_basic_info_64_t)task_info_out;
2873
2874                 map = (task == kernel_task)? kernel_map: task->map;
2875                 basic_info->virtual_size  = map->size;
2876                 basic_info->resident_size =
2877                         (mach_vm_size_t)(pmap_resident_count(map->pmap))
2878                         * PAGE_SIZE_64;
2879
2880                 basic_info->policy = ((task != kernel_task)?
2881                                                                                   POLICY_TIMESHARE: POLICY_RR);
2882                 basic_info->suspend_count = task->user_stop_count;
2883
2884                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2885                 basic_info->user_time.seconds =
2886                         (typeof(basic_info->user_time.seconds))secs;
2887                 basic_info->user_time.microseconds = usecs;
2888
2889                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2890                 basic_info->system_time.seconds =
2891                         (typeof(basic_info->system_time.seconds))secs;
2892                 basic_info->system_time.microseconds = usecs;
2893
2894                 *task_info_count = TASK_BASIC_INFO_64_COUNT;
2895                 break;
2896         }
2897
2898         case MACH_TASK_BASIC_INFO:
2899         {
2900                 mach_task_basic_info_t  basic_info;
2901                 vm_map_t                map;
2902                 clock_sec_t             secs;
2903                 clock_usec_t            usecs;
2904
2905                 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
2906                     error = KERN_INVALID_ARGUMENT;
2907                     break;
2908                 }
2909
2910                 basic_info = (mach_task_basic_info_t)task_info_out;
2911
2912                 map = (task == kernel_task) ? kernel_map : task->map;
2913
2914                 basic_info->virtual_size  = map->size;
2915
2916                 basic_info->resident_size =
2917                     (mach_vm_size_t)(pmap_resident_count(map->pmap));
2918                 basic_info->resident_size *= PAGE_SIZE_64;
2919
2920                 basic_info->resident_size_max =
2921                     (mach_vm_size_t)(pmap_resident_max(map->pmap));
2922                 basic_info->resident_size_max *= PAGE_SIZE_64;
2923
2924                 basic_info->policy = ((task != kernel_task) ?
2925                                       POLICY_TIMESHARE : POLICY_RR);
2926
2927                 basic_info->suspend_count = task->user_stop_count;
2928
2929                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2930                 basic_info->user_time.seconds =
2931                     (typeof(basic_info->user_time.seconds))secs;
2932                 basic_info->user_time.microseconds = usecs;
2933
2934                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2935                 basic_info->system_time.seconds =
2936                     (typeof(basic_info->system_time.seconds))secs;
2937                 basic_info->system_time.microseconds = usecs;
2938
2939                 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
2940                 break;
2941         }
2942
2943         case TASK_THREAD_TIMES_INFO:
2944         {
2945                 register task_thread_times_info_t       times_info;
2946                 register thread_t                                       thread;
2947
2948                 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
2949                     error = KERN_INVALID_ARGUMENT;
2950                     break;
2951                 }
2952
2953                 times_info = (task_thread_times_info_t) task_info_out;
2954                 times_info->user_time.seconds = 0;
2955                 times_info->user_time.microseconds = 0;
2956                 times_info->system_time.seconds = 0;
2957                 times_info->system_time.microseconds = 0;
2958
2959
2960                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2961                         time_value_t    user_time, system_time;
2962
2963                         if (thread->options & TH_OPT_IDLE_THREAD)
2964                                 continue;
2965
2966                         thread_read_times(thread, &user_time, &system_time);
2967
2968                         time_value_add(&times_info->user_time, &user_time);
2969                         time_value_add(&times_info->system_time, &system_time);
2970                 }
2971
2972                 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
2973                 break;
2974         }
2975
2976         case TASK_ABSOLUTETIME_INFO:
2977         {
2978                 task_absolutetime_info_t        info;
2979                 register thread_t                       thread;
2980
2981                 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
2982                         error = KERN_INVALID_ARGUMENT;
2983                         break;
2984                 }
2985
2986                 info = (task_absolutetime_info_t)task_info_out;
2987                 info->threads_user = info->threads_system = 0;
2988
2989
2990                 info->total_user = task->total_user_time;
2991                 info->total_system = task->total_system_time;
2992
2993                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2994                         uint64_t        tval;
2995                         spl_t           x;
2996
2997                         if (thread->options & TH_OPT_IDLE_THREAD)
2998                                 continue;
2999
3000                         x = splsched();
3001                         thread_lock(thread);
3002
3003                         tval = timer_grab(&thread->user_timer);
3004                         info->threads_user += tval;
3005                         info->total_user += tval;
3006
3007                         tval = timer_grab(&thread->system_timer);
3008                         if (thread->precise_user_kernel_time) {
3009                                 info->threads_system += tval;
3010                                 info->total_system += tval;
3011                         } else {
3012                                 /* system_timer may represent either sys or user */
3013                                 info->threads_user += tval;
3014                                 info->total_user += tval;
3015                         }
3016
3017                         thread_unlock(thread);
3018                         splx(x);
3019                 }
3020
3021
3022                 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
3023                 break;
3024         }
3025
3026         case TASK_DYLD_INFO:
3027         {
3028                 task_dyld_info_t info;
3029
3030                 /*
3031                  * We added the format field to TASK_DYLD_INFO output.  For
3032                  * temporary backward compatibility, accept the fact that
3033                  * clients may ask for the old version - distinquished by the
3034                  * size of the expected result structure.
3035                  */
3036 #define TASK_LEGACY_DYLD_INFO_COUNT \
3037                 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
3038
3039                 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
3040                         error = KERN_INVALID_ARGUMENT;
3041                         break;
3042                 }
3043
3044                 info = (task_dyld_info_t)task_info_out;
3045                 info->all_image_info_addr = task->all_image_info_addr;
3046                 info->all_image_info_size = task->all_image_info_size;
3047
3048                 /* only set format on output for those expecting it */
3049                 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
3050                         info->all_image_info_format = task_has_64BitAddr(task) ?
3051                                                  TASK_DYLD_ALL_IMAGE_INFO_64 :
3052                                                  TASK_DYLD_ALL_IMAGE_INFO_32 ;
3053                         *task_info_count = TASK_DYLD_INFO_COUNT;
3054                 } else {
3055                         *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
3056                 }
3057                 break;
3058         }
3059
3060         case TASK_EXTMOD_INFO:
3061         {
3062                 task_extmod_info_t info;
3063                 void *p;
3064
3065                 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
3066                         error = KERN_INVALID_ARGUMENT;
3067                         break;
3068                 }
3069
3070                 info = (task_extmod_info_t)task_info_out;
3071
3072                 p = get_bsdtask_info(task);
3073                 if (p) {
3074                         proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
3075                 } else {
3076                         bzero(info->task_uuid, sizeof(info->task_uuid));
3077                 }
3078                 info->extmod_statistics = task->extmod_statistics;
3079                 *task_info_count = TASK_EXTMOD_INFO_COUNT;
3080
3081                 break;
3082         }
3083
3084         case TASK_KERNELMEMORY_INFO:
3085         {
3086                 task_kernelmemory_info_t        tkm_info;
3087                 ledger_amount_t                 credit, debit;
3088
3089                 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
3090                    error = KERN_INVALID_ARGUMENT;
3091                    break;
3092                 }
3093
3094                 tkm_info = (task_kernelmemory_info_t) task_info_out;
3095                 tkm_info->total_palloc = 0;
3096                 tkm_info->total_pfree = 0;
3097                 tkm_info->total_salloc = 0;
3098                 tkm_info->total_sfree = 0;
3099
3100                 if (task == kernel_task) {
3101                         /*
3102                          * All shared allocs/frees from other tasks count against
3103                          * the kernel private memory usage.  If we are looking up
3104                          * info for the kernel task, gather from everywhere.
3105                          */
3106                         task_unlock(task);
3107
3108                         /* start by accounting for all the terminated tasks against the kernel */
3109                         tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
3110                         tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
3111
3112                         /* count all other task/thread shared alloc/free against the kernel */
3113                         lck_mtx_lock(&tasks_threads_lock);
3114
3115                         /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
3116                         queue_iterate(&tasks, task, task_t, tasks) {
3117                                 if (task == kernel_task) {
3118                                         if (ledger_get_entries(task->ledger,
3119                                             task_ledgers.tkm_private, &credit,
3120                                             &debit) == KERN_SUCCESS) {
3121                                                 tkm_info->total_palloc += credit;
3122                                                 tkm_info->total_pfree += debit;
3123                                         }
3124                                 }
3125                                 if (!ledger_get_entries(task->ledger,
3126                                     task_ledgers.tkm_shared, &credit, &debit)) {
3127                                         tkm_info->total_palloc += credit;
3128                                         tkm_info->total_pfree += debit;
3129                                 }
3130                         }
3131                         lck_mtx_unlock(&tasks_threads_lock);
3132                 } else {
3133                         if (!ledger_get_entries(task->ledger,
3134                             task_ledgers.tkm_private, &credit, &debit)) {
3135                                 tkm_info->total_palloc = credit;
3136                                 tkm_info->total_pfree = debit;
3137                         }
3138                         if (!ledger_get_entries(task->ledger,
3139                             task_ledgers.tkm_shared, &credit, &debit)) {
3140                                 tkm_info->total_salloc = credit;
3141                                 tkm_info->total_sfree = debit;
3142                         }
3143                         task_unlock(task);
3144                 }
3145
3146                 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
3147                 return KERN_SUCCESS;
3148         }
3149
3150         /* OBSOLETE */
3151         case TASK_SCHED_FIFO_INFO:
3152         {
3153
3154                 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
3155                         error = KERN_INVALID_ARGUMENT;
3156                         break;
3157                 }
3158
3159                 error = KERN_INVALID_POLICY;
3160                 break;
3161         }
3162
3163         /* OBSOLETE */
3164         case TASK_SCHED_RR_INFO:
3165         {
3166                 register policy_rr_base_t       rr_base;
3167                 uint32_t quantum_time;
3168                 uint64_t quantum_ns;
3169
3170                 if (*task_info_count < POLICY_RR_BASE_COUNT) {
3171                         error = KERN_INVALID_ARGUMENT;
3172                         break;
3173                 }
3174
3175                 rr_base = (policy_rr_base_t) task_info_out;
3176
3177                 if (task != kernel_task) {
3178                         error = KERN_INVALID_POLICY;
3179                         break;
3180                 }
3181
3182                 rr_base->base_priority = task->priority;
3183
3184                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
3185                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
3186
3187                 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
3188
3189                 *task_info_count = POLICY_RR_BASE_COUNT;
3190                 break;
3191         }
3192
3193         /* OBSOLETE */
3194         case TASK_SCHED_TIMESHARE_INFO:
3195         {
3196                 register policy_timeshare_base_t        ts_base;
3197
3198                 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
3199                         error = KERN_INVALID_ARGUMENT;
3200                         break;
3201                 }
3202
3203                 ts_base = (policy_timeshare_base_t) task_info_out;
3204
3205                 if (task == kernel_task) {
3206                         error = KERN_INVALID_POLICY;
3207                         break;
3208                 }
3209
3210                 ts_base->base_priority = task->priority;
3211
3212                 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
3213                 break;
3214         }
3215
3216         case TASK_SECURITY_TOKEN:
3217         {
3218                 register security_token_t       *sec_token_p;
3219
3220                 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
3221                     error = KERN_INVALID_ARGUMENT;
3222                     break;
3223                 }
3224
3225                 sec_token_p = (security_token_t *) task_info_out;
3226
3227                 *sec_token_p = task->sec_token;
3228
3229                 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
3230                 break;
3231         }
3232
3233         case TASK_AUDIT_TOKEN:
3234         {
3235                 register audit_token_t  *audit_token_p;
3236
3237                 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
3238                     error = KERN_INVALID_ARGUMENT;
3239                     break;
3240                 }
3241
3242                 audit_token_p = (audit_token_t *) task_info_out;
3243
3244                 *audit_token_p = task->audit_token;
3245
3246                 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
3247                 break;
3248         }
3249
3250         case TASK_SCHED_INFO:
3251                 error = KERN_INVALID_ARGUMENT;
3252                 break;
3253
3254         case TASK_EVENTS_INFO:
3255         {
3256                 register task_events_info_t     events_info;
3257                 register thread_t                       thread;
3258
3259                 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
3260                    error = KERN_INVALID_ARGUMENT;
3261                    break;
3262                 }
3263
3264                 events_info = (task_events_info_t) task_info_out;
3265
3266
3267                 events_info->faults = task->faults;
3268                 events_info->pageins = task->pageins;
3269                 events_info->cow_faults = task->cow_faults;
3270                 events_info->messages_sent = task->messages_sent;
3271                 events_info->messages_received = task->messages_received;
3272                 events_info->syscalls_mach = task->syscalls_mach;
3273                 events_info->syscalls_unix = task->syscalls_unix;
3274
3275                 events_info->csw = task->c_switch;
3276
3277                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3278                         events_info->csw           += thread->c_switch;
3279                         events_info->syscalls_mach += thread->syscalls_mach;
3280                         events_info->syscalls_unix += thread->syscalls_unix;
3281                 }
3282
3283
3284                 *task_info_count = TASK_EVENTS_INFO_COUNT;
3285                 break;
3286         }
3287         case TASK_AFFINITY_TAG_INFO:
3288         {
3289                 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
3290                     error = KERN_INVALID_ARGUMENT;
3291                     break;
3292                 }
3293
3294                 error = task_affinity_info(task, task_info_out, task_info_count);
3295                 break;
3296         }
3297         case TASK_POWER_INFO:
3298         {
3299                 if (*task_info_count < TASK_POWER_INFO_COUNT) {
3300                         error = KERN_INVALID_ARGUMENT;
3301                         break;
3302                 }
3303
3304                 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL);
3305                 break;
3306         }
3307
3308         case TASK_POWER_INFO_V2:
3309         {
3310                 if (*task_info_count < TASK_POWER_INFO_V2_COUNT) {
3311                         error = KERN_INVALID_ARGUMENT;
3312                         break;
3313                 }
3314                 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
3315                 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy);
3316                 break;
3317         }
3318
3319         case TASK_VM_INFO:
3320         case TASK_VM_INFO_PURGEABLE:
3321         {
3322                 task_vm_info_t          vm_info;
3323                 vm_map_t                map;
3324
3325                 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
3326                     error = KERN_INVALID_ARGUMENT;
3327                     break;
3328                 }
3329
3330                 vm_info = (task_vm_info_t)task_info_out;
3331
3332                 if (task == kernel_task) {
3333                         map = kernel_map;
3334                         /* no lock */
3335                 } else {
3336                         map = task->map;
3337                         vm_map_lock_read(map);
3338                 }
3339
3340                 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
3341                 vm_info->region_count = map->hdr.nentries;
3342                 vm_info->page_size = vm_map_page_size(map);
3343
3344                 vm_info->resident_size = pmap_resident_count(map->pmap);
3345                 vm_info->resident_size *= PAGE_SIZE;
3346                 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
3347                 vm_info->resident_size_peak *= PAGE_SIZE;
3348
3349 #define _VM_INFO(_name) \
3350         vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
3351
3352                 _VM_INFO(device);
3353                 _VM_INFO(device_peak);
3354                 _VM_INFO(external);
3355                 _VM_INFO(external_peak);
3356                 _VM_INFO(internal);
3357                 _VM_INFO(internal_peak);
3358                 _VM_INFO(reusable);
3359                 _VM_INFO(reusable_peak);
3360                 _VM_INFO(compressed);
3361                 _VM_INFO(compressed_peak);
3362                 _VM_INFO(compressed_lifetime);
3363
3364                 vm_info->purgeable_volatile_pmap = 0;
3365                 vm_info->purgeable_volatile_resident = 0;
3366                 vm_info->purgeable_volatile_virtual = 0;
3367                 if (task == kernel_task) {
3368                         /*
3369                          * We do not maintain the detailed stats for the
3370                          * kernel_pmap, so just count everything as
3371                          * "internal"...
3372                          */
3373                         vm_info->internal = vm_info->resident_size;
3374                         /*
3375                          * ... but since the memory held by the VM compressor
3376                          * in the kernel address space ought to be attributed
3377                          * to user-space tasks, we subtract it from "internal"
3378                          * to give memory reporting tools a more accurate idea
3379                          * of what the kernel itself is actually using, instead
3380                          * of making it look like the kernel is leaking memory
3381                          * when the system is under memory pressure.
3382                          */
3383                         vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
3384                                               PAGE_SIZE);
3385                 } else {
3386                         mach_vm_size_t  volatile_virtual_size;
3387                         mach_vm_size_t  volatile_resident_size;
3388                         mach_vm_size_t  volatile_compressed_size;
3389                         mach_vm_size_t  volatile_pmap_size;
3390                         mach_vm_size_t  volatile_compressed_pmap_size;
3391                         kern_return_t   kr;
3392
3393                         if (flavor == TASK_VM_INFO_PURGEABLE) {
3394                                 kr = vm_map_query_volatile(
3395                                         map,
3396                                         &volatile_virtual_size,
3397                                         &volatile_resident_size,
3398                                         &volatile_compressed_size,
3399                                         &volatile_pmap_size,
3400                                         &volatile_compressed_pmap_size);
3401                                 if (kr == KERN_SUCCESS) {
3402                                         vm_info->purgeable_volatile_pmap =
3403                                                 volatile_pmap_size;
3404                                         if (radar_20146450) {
3405                                         vm_info->compressed -=
3406                                                 volatile_compressed_pmap_size;
3407                                         }
3408                                         vm_info->purgeable_volatile_resident =
3409                                                 volatile_resident_size;
3410                                         vm_info->purgeable_volatile_virtual =
3411                                                 volatile_virtual_size;
3412                                 }
3413                         }
3414                         vm_map_unlock_read(map);
3415                 }
3416
3417                 if (*task_info_count >= TASK_VM_INFO_COUNT) {
3418                         vm_info->phys_footprint = 0;
3419                         *task_info_count = TASK_VM_INFO_COUNT;
3420                 } else {
3421                         *task_info_count = TASK_VM_INFO_REV0_COUNT;
3422                 }
3423
3424                 break;
3425         }
3426
3427         case TASK_WAIT_STATE_INFO:
3428         {
3429                 /*
3430                  * Deprecated flavor. Currently allowing some results until all users
3431                  * stop calling it. The results may not be accurate.
3432          */
3433                 task_wait_state_info_t  wait_state_info;
3434                 uint64_t total_sfi_ledger_val = 0;
3435
3436                 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
3437                    error = KERN_INVALID_ARGUMENT;
3438                    break;
3439                 }
3440
3441                 wait_state_info = (task_wait_state_info_t) task_info_out;
3442
3443                 wait_state_info->total_wait_state_time = 0;
3444                 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
3445
3446 #if CONFIG_SCHED_SFI
3447                 int i, prev_lentry = -1;
3448                 int64_t  val_credit, val_debit;
3449
3450                 for (i = 0; i < MAX_SFI_CLASS_ID; i++){
3451                         val_credit =0;
3452                         /*
3453                          * checking with prev_lentry != entry ensures adjacent classes
3454                          * which share the same ledger do not add wait times twice.
3455                          * Note: Use ledger() call to get data for each individual sfi class.
3456                          */
3457                         if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
3458                                 KERN_SUCCESS == ledger_get_entries(task->ledger,
3459                                                 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
3460                                 total_sfi_ledger_val += val_credit;
3461                         }
3462                         prev_lentry = task_ledgers.sfi_wait_times[i];
3463                 }
3464
3465 #endif /* CONFIG_SCHED_SFI */
3466                 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
3467                 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
3468
3469                 break;
3470         }
3471         case TASK_VM_INFO_PURGEABLE_ACCOUNT:
3472         {
3473 #if DEVELOPMENT || DEBUG
3474                 pvm_account_info_t      acnt_info;
3475
3476                 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
3477                         error = KERN_INVALID_ARGUMENT;
3478                         break;
3479                 }
3480
3481                 if (task_info_out == NULL) {
3482                         error = KERN_INVALID_ARGUMENT;
3483                         break;
3484                 }
3485
3486                 acnt_info = (pvm_account_info_t) task_info_out;
3487
3488                 error = vm_purgeable_account(task, acnt_info);
3489
3490                 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
3491
3492                 break;
3493 #else /* DEVELOPMENT || DEBUG */
3494                 error = KERN_NOT_SUPPORTED;
3495                 break;
3496 #endif /* DEVELOPMENT || DEBUG */
3497         }
3498         case TASK_FLAGS_INFO:
3499         {
3500                 task_flags_info_t               flags_info;
3501
3502                 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
3503                     error = KERN_INVALID_ARGUMENT;
3504                     break;
3505                 }
3506
3507                 flags_info = (task_flags_info_t)task_info_out;
3508
3509                 /* only publish the 64-bit flag of the task */
3510                 flags_info->flags = task->t_flags & TF_64B_ADDR;
3511
3512                 *task_info_count = TASK_FLAGS_INFO_COUNT;
3513                 break;
3514         }
3515
3516         case TASK_DEBUG_INFO_INTERNAL:
3517         {
3518 #if DEVELOPMENT || DEBUG
3519                 task_debug_info_internal_t dbg_info;
3520                 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
3521                         error = KERN_NOT_SUPPORTED;
3522                         break;
3523                 }
3524
3525                 if (task_info_out == NULL) {
3526                         error = KERN_INVALID_ARGUMENT;
3527                         break;
3528                 }
3529                 dbg_info = (task_debug_info_internal_t) task_info_out;
3530                 dbg_info->ipc_space_size = 0;
3531                 if (task->itk_space){
3532                         dbg_info->ipc_space_size = task->itk_space->is_table_size;
3533                 }
3534
3535                 error = KERN_SUCCESS;
3536                 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
3537                 break;
3538 #else /* DEVELOPMENT || DEBUG */
3539                 error = KERN_NOT_SUPPORTED;
3540                 break;
3541 #endif /* DEVELOPMENT || DEBUG */
3542         }
3543         default:
3544                 error = KERN_INVALID_ARGUMENT;
3545         }
3546
3547         task_unlock(task);
3548         return (error);
3549 }
3550
3551 /*
3552  *      task_power_info
3553  *
3554  *      Returns power stats for the task.
3555  *      Note: Called with task locked.
3556  */
3557 void
3558 task_power_info_locked(
3559         task_t                  task,
3560         task_power_info_t       info,
3561         gpu_energy_data_t       ginfo)
3562 {
3563         thread_t                thread;
3564         ledger_amount_t         tmp;
3565
3566         task_lock_assert_owned(task);
3567
3568         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
3569                 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
3570         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
3571                 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
3572
3573         info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
3574         info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
3575
3576         info->total_user = task->total_user_time;
3577         info->total_system = task->total_system_time;
3578
3579         if (ginfo) {
3580                 ginfo->task_gpu_utilisation = task->task_gpu_ns;
3581         }
3582
3583         queue_iterate(&task->threads, thread, thread_t, task_threads) {
3584                 uint64_t        tval;
3585                 spl_t           x;
3586
3587                 if (thread->options & TH_OPT_IDLE_THREAD)
3588                         continue;
3589
3590                 x = splsched();
3591                 thread_lock(thread);
3592
3593                 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
3594                 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
3595
3596                 tval = timer_grab(&thread->user_timer);
3597                 info->total_user += tval;
3598
3599                 tval = timer_grab(&thread->system_timer);
3600                 if (thread->precise_user_kernel_time) {
3601                         info->total_system += tval;
3602                 } else {
3603                         /* system_timer may represent either sys or user */
3604                         info->total_user += tval;
3605                 }
3606
3607                 if (ginfo) {
3608                         ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
3609                 }
3610                 thread_unlock(thread);
3611                 splx(x);
3612         }
3613 }
3614
3615 /*
3616  *      task_gpu_utilisation
3617  *
3618  *      Returns the total gpu time used by the all the threads of the task
3619  *  (both dead and alive)
3620  */
3621 uint64_t
3622 task_gpu_utilisation(
3623         task_t  task)
3624 {
3625         uint64_t gpu_time = 0;
3626         thread_t thread;
3627
3628         task_lock(task);
3629         gpu_time += task->task_gpu_ns;
3630
3631         queue_iterate(&task->threads, thread, thread_t, task_threads) {
3632                 spl_t x;
3633                 x = splsched();
3634                 thread_lock(thread);
3635                 gpu_time += ml_gpu_stat(thread);
3636                 thread_unlock(thread);
3637                 splx(x);
3638         }
3639
3640         task_unlock(task);
3641         return gpu_time;
3642 }
3643
3644 kern_return_t
3645 task_purgable_info(
3646         task_t                  task,
3647         task_purgable_info_t    *stats)
3648 {
3649         if (task == TASK_NULL || stats == NULL)
3650                 return KERN_INVALID_ARGUMENT;
3651         /* Take task reference */
3652         task_reference(task);
3653         vm_purgeable_stats((vm_purgeable_info_t)stats, task);
3654         /* Drop task reference */
3655         task_deallocate(task);
3656         return KERN_SUCCESS;
3657 }
3658
3659 void
3660 task_vtimer_set(
3661         task_t          task,
3662         integer_t       which)
3663 {
3664         thread_t        thread;
3665         spl_t           x;
3666
3667         /* assert(task == current_task()); */ /* bogus assert 4803227 4807483 */
3668
3669         task_lock(task);
3670
3671         task->vtimers |= which;
3672
3673         switch (which) {
3674
3675         case TASK_VTIMER_USER:
3676                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3677                         x = splsched();
3678                         thread_lock(thread);
3679                         if (thread->precise_user_kernel_time)
3680                                 thread->vtimer_user_save = timer_grab(&thread->user_timer);
3681                         else
3682                                 thread->vtimer_user_save = timer_grab(&thread->system_timer);
3683                         thread_unlock(thread);
3684                         splx(x);
3685                 }
3686                 break;
3687
3688         case TASK_VTIMER_PROF:
3689                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3690                         x = splsched();
3691                         thread_lock(thread);
3692                         thread->vtimer_prof_save = timer_grab(&thread->user_timer);
3693                         thread->vtimer_prof_save += timer_grab(&thread->system_timer);
3694                         thread_unlock(thread);
3695                         splx(x);
3696                 }
3697                 break;
3698
3699         case TASK_VTIMER_RLIM:
3700                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3701                         x = splsched();
3702                         thread_lock(thread);
3703                         thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
3704                         thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
3705                         thread_unlock(thread);
3706                         splx(x);
3707                 }
3708                 break;
3709         }
3710
3711         task_unlock(task);
3712 }
3713
3714 void
3715 task_vtimer_clear(
3716         task_t          task,
3717         integer_t       which)
3718 {
3719         assert(task == current_task());
3720
3721         task_lock(task);
3722
3723         task->vtimers &= ~which;
3724
3725         task_unlock(task);
3726 }
3727
3728 void
3729 task_vtimer_update(
3730 __unused
3731         task_t          task,
3732         integer_t       which,
3733         uint32_t        *microsecs)
3734 {
3735         thread_t        thread = current_thread();
3736         uint32_t        tdelt;
3737         clock_sec_t     secs;
3738         uint64_t        tsum;
3739
3740         assert(task == current_task());
3741
3742         assert(task->vtimers & which);
3743
3744         secs = tdelt = 0;
3745
3746         switch (which) {
3747
3748         case TASK_VTIMER_USER:
3749                 if (thread->precise_user_kernel_time) {
3750                         tdelt = (uint32_t)timer_delta(&thread->user_timer,
3751                                                                 &thread->vtimer_user_save);
3752                 } else {
3753                         tdelt = (uint32_t)timer_delta(&thread->system_timer,
3754                                                                 &thread->vtimer_user_save);
3755                 }
3756                 absolutetime_to_microtime(tdelt, &secs, microsecs);
3757                 break;
3758
3759         case TASK_VTIMER_PROF:
3760                 tsum = timer_grab(&thread->user_timer);
3761                 tsum += timer_grab(&thread->system_timer);
3762                 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
3763                 absolutetime_to_microtime(tdelt, &secs, microsecs);
3764                 /* if the time delta is smaller than a usec, ignore */
3765                 if (*microsecs != 0)
3766                         thread->vtimer_prof_save = tsum;
3767                 break;
3768
3769         case TASK_VTIMER_RLIM:
3770                 tsum = timer_grab(&thread->user_timer);
3771                 tsum += timer_grab(&thread->system_timer);
3772                 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
3773                 thread->vtimer_rlim_save = tsum;
3774                 absolutetime_to_microtime(tdelt, &secs, microsecs);
3775                 break;
3776         }
3777
3778 }
3779
3780 /*
3781  *      task_assign:
3782  *
3783  *      Change the assigned processor set for the task
3784  */
3785 kern_return_t
3786 task_assign(
3787         __unused task_t         task,
3788         __unused processor_set_t        new_pset,
3789         __unused boolean_t      assign_threads)
3790 {
3791         return(KERN_FAILURE);
3792 }
3793
3794 /*
3795  *      task_assign_default:
3796  *
3797  *      Version of task_assign to assign to default processor set.
3798  */
3799 kern_return_t
3800 task_assign_default(
3801         task_t          task,
3802         boolean_t       assign_threads)
3803 {
3804     return (task_assign(task, &pset0, assign_threads));
3805 }
3806
3807 /*
3808  *      task_get_assignment
3809  *
3810  *      Return name of processor set that task is assigned to.
3811  */
3812 kern_return_t
3813 task_get_assignment(
3814         task_t          task,
3815         processor_set_t *pset)
3816 {
3817         if (!task->active)
3818                 return(KERN_FAILURE);
3819
3820         *pset = &pset0;
3821
3822         return (KERN_SUCCESS);
3823 }
3824
3825 uint64_t
3826 get_task_dispatchqueue_offset(
3827                 task_t          task)
3828 {
3829         return task->dispatchqueue_offset;
3830 }
3831
3832 /*
3833  *      task_policy
3834  *
3835  *      Set scheduling policy and parameters, both base and limit, for
3836  *      the given task. Policy must be a policy which is enabled for the
3837  *      processor set. Change contained threads if requested.
3838  */
3839 kern_return_t
3840 task_policy(
3841         __unused task_t                 task,
3842         __unused policy_t                       policy_id,
3843         __unused policy_base_t          base,
3844         __unused mach_msg_type_number_t count,
3845         __unused boolean_t                      set_limit,
3846         __unused boolean_t                      change)
3847 {
3848         return(KERN_FAILURE);
3849 }
3850
3851 /*
3852  *      task_set_policy
3853  *
3854  *      Set scheduling policy and parameters, both base and limit, for
3855  *      the given task. Policy can be any policy implemented by the
3856  *      processor set, whether enabled or not. Change contained threads
3857  *      if requested.
3858  */
3859 kern_return_t
3860 task_set_policy(
3861         __unused task_t                 task,
3862         __unused processor_set_t                pset,
3863         __unused policy_t                       policy_id,
3864         __unused policy_base_t          base,
3865         __unused mach_msg_type_number_t base_count,
3866         __unused policy_limit_t         limit,
3867         __unused mach_msg_type_number_t limit_count,
3868         __unused boolean_t                      change)
3869 {
3870         return(KERN_FAILURE);
3871 }
3872
3873 kern_return_t
3874 task_set_ras_pc(
3875         __unused task_t task,
3876         __unused vm_offset_t    pc,
3877         __unused vm_offset_t    endpc)
3878 {
3879         return KERN_FAILURE;
3880 }
3881
3882 void
3883 task_synchronizer_destroy_all(task_t task)
3884 {
3885         /*
3886          *  Destroy owned semaphores
3887          */
3888         semaphore_destroy_all(task);
3889 }
3890
3891 /*
3892  * Install default (machine-dependent) initial thread state
3893  * on the task.  Subsequent thread creation will have this initial
3894  * state set on the thread by machine_thread_inherit_taskwide().
3895  * Flavors and structures are exactly the same as those to thread_set_state()
3896  */
3897 kern_return_t
3898 task_set_state(
3899         task_t task,
3900         int flavor,
3901         thread_state_t state,
3902         mach_msg_type_number_t state_count)
3903 {
3904         kern_return_t ret;
3905
3906         if (task == TASK_NULL) {
3907                 return (KERN_INVALID_ARGUMENT);
3908         }
3909
3910         task_lock(task);
3911
3912         if (!task->active) {
3913                 task_unlock(task);
3914                 return (KERN_FAILURE);
3915         }
3916
3917         ret = machine_task_set_state(task, flavor, state, state_count);
3918
3919         task_unlock(task);
3920         return ret;
3921 }
3922
3923 /*
3924  * Examine the default (machine-dependent) initial thread state
3925  * on the task, as set by task_set_state().  Flavors and structures
3926  * are exactly the same as those passed to thread_get_state().
3927  */
3928 kern_return_t
3929 task_get_state(
3930         task_t  task,
3931         int     flavor,
3932         thread_state_t state,
3933         mach_msg_type_number_t *state_count)
3934 {
3935         kern_return_t ret;
3936
3937         if (task == TASK_NULL) {
3938                 return (KERN_INVALID_ARGUMENT);
3939         }
3940
3941         task_lock(task);
3942
3943         if (!task->active) {
3944                 task_unlock(task);
3945                 return (KERN_FAILURE);
3946         }
3947
3948         ret = machine_task_get_state(task, flavor, state, state_count);
3949
3950         task_unlock(task);
3951         return ret;
3952 }
3953
3954 #if CONFIG_JETSAM
3955 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
3956
3957 void __attribute__((noinline))
3958 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb)
3959 {
3960         task_t                                          task            = current_task();
3961         int                                                     pid         = 0;
3962         const char                                      *procname       = "unknown";
3963         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
3964
3965 #ifdef MACH_BSD
3966         pid = proc_selfpid();
3967
3968         if (pid == 1) {
3969                 /*
3970                  * Cannot have ReportCrash analyzing
3971                  * a suspended initproc.
3972                  */
3973                 return;
3974         }
3975
3976         if (task->bsd_info != NULL)
3977                 procname = proc_name_address(current_task()->bsd_info);
3978 #endif
3979
3980         if (hwm_user_cores) {
3981                 int                             error;
3982                 uint64_t                starttime, end;
3983                 clock_sec_t             secs = 0;
3984                 uint32_t                microsecs = 0;
3985
3986                 starttime = mach_absolute_time();
3987                 /*
3988                  * Trigger a coredump of this process. Don't proceed unless we know we won't
3989                  * be filling up the disk; and ignore the core size resource limit for this
3990                  * core file.
3991                  */
3992                 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
3993                         printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
3994                 }
3995                 /*
3996                 * coredump() leaves the task suspended.
3997                 */
3998                 task_resume_internal(current_task());
3999
4000                 end = mach_absolute_time();
4001                 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
4002                 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
4003                        proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
4004         }
4005
4006         if (disable_exc_resource) {
4007                 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
4008                         "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
4009                 return;
4010         }
4011
4012         /*
4013          * A task that has triggered an EXC_RESOURCE, should not be
4014          * jetsammed when the device is under memory pressure.  Here
4015          * we set the P_MEMSTAT_TERMINATED flag so that the process
4016          * will be skipped if the memorystatus_thread wakes up.
4017          */
4018         proc_memstat_terminated(current_task()->bsd_info, TRUE);
4019
4020         printf("process %s[%d] crossed memory high watermark (%d MB); sending "
4021                 "EXC_RESOURCE.\n", procname, pid, max_footprint_mb);
4022
4023         code[0] = code[1] = 0;
4024         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
4025         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
4026         EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
4027
4028         /*
4029          * Use the _internal_ variant so that no user-space
4030          * process can resume our task from under us.
4031          */
4032         task_suspend_internal(task);
4033         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
4034         task_resume_internal(task);
4035
4036         /*
4037          * After the EXC_RESOURCE has been handled, we must clear the
4038          * P_MEMSTAT_TERMINATED flag so that the process can again be
4039          * considered for jetsam if the memorystatus_thread wakes up.
4040          */
4041         proc_memstat_terminated(current_task()->bsd_info, FALSE);  /* clear the flag */
4042 }
4043
4044 /*
4045  * Callback invoked when a task exceeds its physical footprint limit.
4046  */
4047 void
4048 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4049 {
4050         ledger_amount_t max_footprint, max_footprint_mb;
4051         ledger_amount_t footprint_after_purge;
4052         task_t task;
4053
4054         if (warning == LEDGER_WARNING_DIPPED_BELOW) {
4055                 /*
4056                  * Task memory limits only provide a warning on the way up.
4057                  */
4058                 return;
4059         }
4060
4061         task = current_task();
4062
4063         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
4064         max_footprint_mb = max_footprint >> 20;
4065
4066         /*
4067          * Try and purge all "volatile" memory in that task first.
4068          */
4069         (void) task_purge_volatile_memory(task);
4070         /* are we still over the limit ? */
4071         ledger_get_balance(task->ledger,
4072                            task_ledgers.phys_footprint,
4073                            &footprint_after_purge);
4074         if ((!warning &&
4075              footprint_after_purge <= max_footprint) ||
4076             (warning &&
4077              footprint_after_purge <= ((max_footprint *
4078                                         PHYS_FOOTPRINT_WARNING_LEVEL) / 100))) {
4079                 /* all better now */
4080                 ledger_reset_callback_state(task->ledger,
4081                                             task_ledgers.phys_footprint);
4082                 return;
4083         }
4084         /* still over the limit after purging... */
4085
4086         /*
4087          * If this an actual violation (not a warning),
4088          * generate a non-fatal high watermark EXC_RESOURCE.
4089          */
4090         if ((warning == 0) && (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) {
4091                 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb);
4092         }
4093
4094         memorystatus_on_ledger_footprint_exceeded((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE,
4095                 (int)max_footprint_mb);
4096 }
4097
4098 extern int proc_check_footprint_priv(void);
4099
4100 kern_return_t
4101 task_set_phys_footprint_limit(
4102         task_t task,
4103         int new_limit_mb,
4104         int *old_limit_mb)
4105 {
4106         kern_return_t error;
4107
4108         if ((error = proc_check_footprint_priv())) {
4109                 return (KERN_NO_ACCESS);
4110         }
4111
4112         return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, FALSE);
4113 }
4114
4115 kern_return_t
4116 task_convert_phys_footprint_limit(
4117         int limit_mb,
4118         int *converted_limit_mb)
4119 {
4120         if (limit_mb == -1) {
4121                 /*
4122                  * No limit
4123                  */
4124                 if (max_task_footprint != 0) {
4125                         *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024);   /* bytes to MB */
4126                 } else {
4127                         *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
4128                 }
4129         } else {
4130                 /* nothing to convert */
4131                 *converted_limit_mb = limit_mb;
4132         }
4133         return (KERN_SUCCESS);
4134 }
4135
4136
4137 kern_return_t
4138 task_set_phys_footprint_limit_internal(
4139         task_t task,
4140         int new_limit_mb,
4141         int *old_limit_mb,
4142         boolean_t trigger_exception)
4143 {
4144         ledger_amount_t old;
4145
4146         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
4147
4148         if (old_limit_mb) {
4149                 /*
4150                  * Check that limit >> 20 will not give an "unexpected" 32-bit
4151                  * result. There are, however, implicit assumptions that -1 mb limit
4152                  * equates to LEDGER_LIMIT_INFINITY.
4153                  */
4154                 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
4155                 *old_limit_mb = (int)(old >> 20);
4156         }
4157
4158         if (new_limit_mb == -1) {
4159                 /*
4160                  * Caller wishes to remove the limit.
4161                  */
4162                 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4163                                  max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
4164                                  max_task_footprint ? PHYS_FOOTPRINT_WARNING_LEVEL : 0);
4165                 return (KERN_SUCCESS);
4166         }
4167
4168 #ifdef CONFIG_NOMONITORS
4169         return (KERN_SUCCESS);
4170 #endif /* CONFIG_NOMONITORS */
4171
4172         task_lock(task);
4173
4174         if (trigger_exception) {
4175                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4176         } else {
4177                 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4178         }
4179
4180         ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4181                 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
4182
4183         if (task == current_task()) {
4184                 ledger_check_new_balance(task->ledger, task_ledgers.phys_footprint);
4185         }
4186
4187         task_unlock(task);
4188
4189         return (KERN_SUCCESS);
4190 }
4191
4192 kern_return_t
4193 task_get_phys_footprint_limit(
4194         task_t task,
4195         int *limit_mb)
4196 {
4197         ledger_amount_t limit;
4198
4199         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
4200         /*
4201          * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
4202          * result. There are, however, implicit assumptions that -1 mb limit
4203          * equates to LEDGER_LIMIT_INFINITY.
4204          */
4205         assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
4206         *limit_mb = (int)(limit >> 20);
4207
4208         return (KERN_SUCCESS);
4209 }
4210 #else /* CONFIG_JETSAM */
4211 kern_return_t
4212 task_set_phys_footprint_limit(
4213         __unused task_t task,
4214         __unused int new_limit_mb,
4215         __unused int *old_limit_mb)
4216 {
4217         return (KERN_FAILURE);
4218 }
4219
4220 kern_return_t
4221 task_get_phys_footprint_limit(
4222         __unused task_t task,
4223         __unused int *limit_mb)
4224 {
4225         return (KERN_FAILURE);
4226 }
4227 #endif /* CONFIG_JETSAM */
4228
4229 /*
4230  * We need to export some functions to other components that
4231  * are currently implemented in macros within the osfmk
4232  * component.  Just export them as functions of the same name.
4233  */
4234 boolean_t is_kerneltask(task_t t)
4235 {
4236         if (t == kernel_task)
4237                 return (TRUE);
4238
4239         return (FALSE);
4240 }
4241
4242 int
4243 check_for_tasksuspend(task_t task)
4244 {
4245
4246         if (task == TASK_NULL)
4247                 return (0);
4248
4249         return (task->suspend_count > 0);
4250 }
4251
4252 #undef current_task
4253 task_t current_task(void);
4254 task_t current_task(void)
4255 {
4256         return (current_task_fast());
4257 }
4258
4259 #undef task_reference
4260 void task_reference(task_t task);
4261 void
4262 task_reference(
4263         task_t          task)
4264 {
4265         if (task != TASK_NULL)
4266                 task_reference_internal(task);
4267 }
4268
4269 /* defined in bsd/kern/kern_prot.c */
4270 extern int get_audit_token_pid(audit_token_t *audit_token);
4271
4272 int task_pid(task_t task)
4273 {
4274         if (task)
4275                 return get_audit_token_pid(&task->audit_token);
4276         return -1;
4277 }
4278
4279
4280 /*
4281  * This routine is called always with task lock held.
4282  * And it returns a thread handle without reference as the caller
4283  * operates on it under the task lock held.
4284  */
4285 thread_t
4286 task_findtid(task_t task, uint64_t tid)
4287 {
4288         thread_t thread= THREAD_NULL;
4289
4290         queue_iterate(&task->threads, thread, thread_t, task_threads) {
4291                         if (thread->thread_id == tid)
4292                                 return(thread);
4293         }
4294         return(THREAD_NULL);
4295 }
4296
4297 /*
4298  * Control the CPU usage monitor for a task.
4299  */
4300 kern_return_t
4301 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
4302 {
4303         int error = KERN_SUCCESS;
4304
4305         if (*flags & CPUMON_MAKE_FATAL) {
4306                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
4307         } else {
4308                 error = KERN_INVALID_ARGUMENT;
4309         }
4310
4311         return error;
4312 }
4313
4314 /*
4315  * Control the wakeups monitor for a task.
4316  */
4317 kern_return_t
4318 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
4319 {
4320         ledger_t ledger = task->ledger;
4321
4322         task_lock(task);
4323         if (*flags & WAKEMON_GET_PARAMS) {
4324                 ledger_amount_t limit;
4325                 uint64_t                period;
4326
4327                 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
4328                 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
4329
4330                 if (limit != LEDGER_LIMIT_INFINITY) {
4331                         /*
4332                          * An active limit means the wakeups monitor is enabled.
4333                          */
4334                         *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
4335                         *flags = WAKEMON_ENABLE;
4336                         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
4337                                 *flags |= WAKEMON_MAKE_FATAL;
4338                         }
4339                 } else {
4340                         *flags = WAKEMON_DISABLE;
4341                         *rate_hz = -1;
4342                 }
4343
4344                 /*
4345                  * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
4346                  */
4347                 task_unlock(task);
4348                 return KERN_SUCCESS;
4349         }
4350
4351         if (*flags & WAKEMON_ENABLE) {
4352                 if (*flags & WAKEMON_SET_DEFAULTS) {
4353                         *rate_hz = task_wakeups_monitor_rate;
4354                 }
4355
4356 #ifndef CONFIG_NOMONITORS
4357                 if (*flags & WAKEMON_MAKE_FATAL) {
4358                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
4359                 }
4360 #endif /* CONFIG_NOMONITORS */
4361
4362                 if (*rate_hz < 0) {
4363                         task_unlock(task);
4364                         return KERN_INVALID_ARGUMENT;
4365                 }
4366
4367 #ifndef CONFIG_NOMONITORS
4368                 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
4369                         task_wakeups_monitor_ustackshots_trigger_pct);
4370                 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
4371                 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
4372 #endif /* CONFIG_NOMONITORS */
4373         } else if (*flags & WAKEMON_DISABLE) {
4374                 /*
4375                  * Caller wishes to disable wakeups monitor on the task.
4376                  *
4377                  * Disable telemetry if it was triggered by the wakeups monitor, and
4378                  * remove the limit & callback on the wakeups ledger entry.
4379                  */
4380 #if CONFIG_TELEMETRY
4381                 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
4382 #endif
4383                 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
4384                 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
4385         }
4386
4387         task_unlock(task);
4388         return KERN_SUCCESS;
4389 }
4390
4391 void
4392 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4393 {
4394         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
4395 #if CONFIG_TELEMETRY
4396                 /*
4397                  * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
4398                  * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
4399                  */
4400                 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
4401 #endif
4402                 return;
4403         }
4404
4405 #if CONFIG_TELEMETRY
4406         /*
4407          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
4408          * exceeded the limit, turn telemetry off for the task.
4409          */
4410         telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
4411 #endif
4412
4413         if (warning == 0) {
4414                 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE();
4415         }
4416 }
4417
4418 void __attribute__((noinline))
4419 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void)
4420 {
4421         task_t                                          task            = current_task();
4422         int                                                     pid         = 0;
4423         const char                                      *procname       = "unknown";
4424         uint64_t                                        observed_wakeups_rate;
4425         uint64_t                                        permitted_wakeups_rate;
4426         uint64_t                                        observation_interval;
4427         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
4428         struct ledger_entry_info        lei;
4429
4430 #ifdef MACH_BSD
4431         pid = proc_selfpid();
4432         if (task->bsd_info != NULL)
4433                 procname = proc_name_address(current_task()->bsd_info);
4434 #endif
4435
4436         ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
4437
4438         /*
4439          * Disable the exception notification so we don't overwhelm
4440          * the listener with an endless stream of redundant exceptions.
4441          */
4442         uint32_t flags = WAKEMON_DISABLE;
4443         task_wakeups_monitor_ctl(task, &flags, NULL);
4444
4445         observed_wakeups_rate = (lei.lei_balance * (int64_t)NSEC_PER_SEC) / lei.lei_last_refill;
4446         permitted_wakeups_rate = lei.lei_limit / task_wakeups_monitor_interval;
4447         observation_interval = lei.lei_refill_period / NSEC_PER_SEC;
4448
4449         if (disable_exc_resource) {
4450                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
4451                         "supressed by a boot-arg\n", procname, pid);
4452                 return;
4453         }
4454         if (audio_active) {
4455                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
4456                        "supressed due to audio playback\n", procname, pid);
4457                 return;
4458         }
4459         printf("process %s[%d] caught causing excessive wakeups. Observed wakeups rate "
4460                 "(per sec): %lld; Maximum permitted wakeups rate (per sec): %lld; Observation "
4461                 "period: %lld seconds; Task lifetime number of wakeups: %lld\n",
4462                 procname, pid, observed_wakeups_rate, permitted_wakeups_rate,
4463                 observation_interval, lei.lei_credit);
4464
4465         code[0] = code[1] = 0;
4466         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
4467         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
4468         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0], task_wakeups_monitor_rate);
4469         EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0], observation_interval);
4470         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1], lei.lei_balance * (int64_t)NSEC_PER_SEC / lei.lei_last_refill);
4471         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
4472
4473         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
4474                 task_terminate_internal(task);
4475         }
4476 }
4477
4478 kern_return_t
4479 task_purge_volatile_memory(
4480         task_t  task)
4481 {
4482         vm_map_t        map;
4483         int             num_object_purged;
4484
4485         if (task == TASK_NULL)
4486                 return KERN_INVALID_TASK;
4487
4488         task_lock(task);
4489
4490         if (!task->active) {
4491                 task_unlock(task);
4492                 return KERN_INVALID_TASK;
4493         }
4494         map = task->map;
4495         if (map == VM_MAP_NULL) {
4496                 task_unlock(task);
4497                 return KERN_INVALID_TASK;
4498         }
4499         vm_map_reference(task->map);
4500
4501         task_unlock(task);
4502
4503         num_object_purged = vm_map_purge(map);
4504         vm_map_deallocate(map);
4505
4506         return KERN_SUCCESS;
4507 }
4508
4509 /* Placeholders for the task set/get voucher interfaces */
4510 kern_return_t
4511 task_get_mach_voucher(
4512         task_t                  task,
4513         mach_voucher_selector_t __unused which,
4514         ipc_voucher_t           *voucher)
4515 {
4516         if (TASK_NULL == task)
4517                 return KERN_INVALID_TASK;
4518
4519         *voucher = NULL;
4520         return KERN_SUCCESS;
4521 }
4522
4523 kern_return_t
4524 task_set_mach_voucher(
4525         task_t                  task,
4526         ipc_voucher_t           __unused voucher)
4527 {
4528         if (TASK_NULL == task)
4529                 return KERN_INVALID_TASK;
4530
4531         return KERN_SUCCESS;
4532 }
4533
4534 kern_return_t
4535 task_swap_mach_voucher(
4536         task_t                  task,
4537         ipc_voucher_t           new_voucher,
4538         ipc_voucher_t           *in_out_old_voucher)
4539 {
4540         if (TASK_NULL == task)
4541                 return KERN_INVALID_TASK;
4542
4543         *in_out_old_voucher = new_voucher;
4544         return KERN_SUCCESS;
4545 }
4546
4547 void task_set_gpu_denied(task_t task, boolean_t denied)
4548 {
4549         task_lock(task);
4550
4551         if (denied) {
4552                 task->t_flags |= TF_GPU_DENIED;
4553         } else {
4554                 task->t_flags &= ~TF_GPU_DENIED;
4555         }
4556
4557         task_unlock(task);
4558 }
4559
4560 boolean_t task_is_gpu_denied(task_t task)
4561 {
4562         /* We don't need the lock to read this flag */
4563         return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
4564 }
4565
4566 void task_update_logical_writes(task_t task, uint32_t io_size, int flags)
4567 {
4568         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE, task_pid(task), io_size, flags, 0, 0);
4569         switch(flags) {
4570                 case TASK_WRITE_IMMEDIATE:
4571                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_immediate_writes));
4572                         break;
4573                 case TASK_WRITE_DEFERRED:
4574                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_deferred_writes));
4575                         break;
4576                 case TASK_WRITE_INVALIDATED:
4577                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_invalidated_writes));
4578                         break;
4579                 case TASK_WRITE_METADATA:
4580                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_metadata_writes));
4581                         break;
4582         }
4583         return;
4584 }