osfmk/kern/task.c

   1 /*
   2  * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  *      File:   kern/task.c
  58  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
  59  *              David Black
  60  *
  61  *      Task management primitives implementation.
  62  */
  63 /*
  64  * Copyright (c) 1993 The University of Utah and
  65  * the Computer Systems Laboratory (CSL).  All rights reserved.
  66  *
  67  * Permission to use, copy, modify and distribute this software and its
  68  * documentation is hereby granted, provided that both the copyright
  69  * notice and this permission notice appear in all copies of the
  70  * software, derivative works or modified versions, and any portions
  71  * thereof, and that both notices appear in supporting documentation.
  72  *
  73  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  74  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  75  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  76  *
  77  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  78  * improvements that they make and grant CSL redistribution rights.
  79  *
  80  */
  81 /*
  82  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
  83  * support for mandatory and extensible security protections.  This notice
  84  * is included in support of clause 2.2 (b) of the Apple Public License,
  85  * Version 2.0.
  86  * Copyright (c) 2005 SPARTA, Inc.
  87  */
  88
  89 #include <mach/mach_types.h>
  90 #include <mach/boolean.h>
  91 #include <mach/host_priv.h>
  92 #include <mach/machine/vm_types.h>
  93 #include <mach/vm_param.h>
  94 #include <mach/mach_vm.h>
  95 #include <mach/semaphore.h>
  96 #include <mach/task_info.h>
  97 #include <mach/task_inspect.h>
  98 #include <mach/task_special_ports.h>
  99 #include <mach/sdt.h>
 100
 101 #include <ipc/ipc_importance.h>
 102 #include <ipc/ipc_types.h>
 103 #include <ipc/ipc_space.h>
 104 #include <ipc/ipc_entry.h>
 105 #include <ipc/ipc_hash.h>
 106
 107 #include <kern/kern_types.h>
 108 #include <kern/mach_param.h>
 109 #include <kern/misc_protos.h>
 110 #include <kern/task.h>
 111 #include <kern/thread.h>
 112 #include <kern/coalition.h>
 113 #include <kern/zalloc.h>
 114 #include <kern/kalloc.h>
 115 #include <kern/kern_cdata.h>
 116 #include <kern/processor.h>
 117 #include <kern/sched_prim.h>    /* for thread_wakeup */
 118 #include <kern/ipc_tt.h>
 119 #include <kern/host.h>
 120 #include <kern/clock.h>
 121 #include <kern/timer.h>
 122 #include <kern/assert.h>
 123 #include <kern/sync_lock.h>
 124 #include <kern/affinity.h>
 125 #include <kern/exc_resource.h>
 126 #include <kern/machine.h>
 127 #include <kern/policy_internal.h>
 128
 129 #include <corpses/task_corpse.h>
 130 #if CONFIG_TELEMETRY
 131 #include <kern/telemetry.h>
 132 #endif
 133
 134 #if MONOTONIC
 135 #include <kern/monotonic.h>
 136 #include <machine/monotonic.h>
 137 #endif /* MONOTONIC */
 138
 139 #include <os/log.h>
 140
 141 #include <vm/pmap.h>
 142 #include <vm/vm_map.h>
 143 #include <vm/vm_kern.h>         /* for kernel_map, ipc_kernel_map */
 144 #include <vm/vm_pageout.h>
 145 #include <vm/vm_protos.h>
 146 #include <vm/vm_purgeable_internal.h>
 147
 148 #include <sys/resource.h>
 149 #include <sys/signalvar.h> /* for coredump */
 150
 151 /*
 152  * Exported interfaces
 153  */
 154
 155 #include <mach/task_server.h>
 156 #include <mach/mach_host_server.h>
 157 #include <mach/host_security_server.h>
 158 #include <mach/mach_port_server.h>
 159
 160 #include <vm/vm_shared_region.h>
 161
 162 #include <libkern/OSDebug.h>
 163 #include <libkern/OSAtomic.h>
 164
 165 #if CONFIG_ATM
 166 #include <atm/atm_internal.h>
 167 #endif
 168
 169 #include <kern/sfi.h>           /* picks up ledger.h */
 170
 171 #if CONFIG_MACF
 172 #include <security/mac_mach_internal.h>
 173 #endif
 174
 175 #if KPERF
 176 extern int kpc_force_all_ctrs(task_t, int);
 177 #endif
 178
 179 task_t                  kernel_task;
 180 zone_t                  task_zone;
 181 lck_attr_t      task_lck_attr;
 182 lck_grp_t       task_lck_grp;
 183 lck_grp_attr_t  task_lck_grp_attr;
 184
 185 extern int exc_via_corpse_forking;
 186 extern int corpse_for_fatal_memkill;
 187
 188 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
 189 int audio_active = 0;
 190
 191 zinfo_usage_store_t tasks_tkm_private;
 192 zinfo_usage_store_t tasks_tkm_shared;
 193
 194 /* A container to accumulate statistics for expired tasks */
 195 expired_task_statistics_t               dead_task_statistics;
 196 lck_spin_t              dead_task_statistics_lock;
 197
 198 ledger_template_t task_ledger_template = NULL;
 199
 200 struct _task_ledger_indices task_ledgers __attribute__((used)) =
 201         {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
 202 #if !CONFIG_EMBEDDED
 203          { 0 /* initialized at runtime */},
 204 #endif /* !CONFIG_EMBEDDED */
 205          -1, -1,
 206          -1, -1,
 207          -1, -1,
 208         };
 209
 210 /* System sleep state */
 211 boolean_t tasks_suspend_state;
 212
 213
 214 void init_task_ledgers(void);
 215 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 216 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 217 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
 218 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
 219 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
 220 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
 221
 222 kern_return_t task_suspend_internal(task_t);
 223 kern_return_t task_resume_internal(task_t);
 224 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
 225
 226 extern kern_return_t iokit_task_terminate(task_t task);
 227
 228 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
 229 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
 230 extern kern_return_t thread_resume(thread_t thread);
 231
 232 // Warn tasks when they hit 80% of their memory limit.
 233 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
 234
 235 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT              150 /* wakeups per second */
 236 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL   300 /* in seconds. */
 237
 238 /*
 239  * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
 240  *
 241  * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
 242  *  stacktraces, aka micro-stackshots)
 243  */
 244 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER        70
 245
 246 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
 247 int task_wakeups_monitor_rate;     /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
 248
 249 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 250
 251 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
 252
 253 ledger_amount_t max_task_footprint = 0;  /* Per-task limit on physical memory consumption in bytes     */
 254 int max_task_footprint_warning_level = 0;  /* Per-task limit warning percentage */
 255 int max_task_footprint_mb = 0;  /* Per-task limit on physical memory consumption in megabytes */
 256
 257 /* I/O Monitor Limits */
 258 #define IOMON_DEFAULT_LIMIT                     (20480ull)      /* MB of logical/physical I/O */
 259 #define IOMON_DEFAULT_INTERVAL                  (86400ull)      /* in seconds */
 260
 261 uint64_t task_iomon_limit_mb;           /* Per-task I/O monitor limit in MBs */
 262 uint64_t task_iomon_interval_secs;      /* Per-task I/O monitor interval in secs */
 263
 264 #define IO_TELEMETRY_DEFAULT_LIMIT              (10ll * 1024ll * 1024ll)
 265 int64_t io_telemetry_limit;                     /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
 266 int64_t global_logical_writes_count = 0;        /* Global count for logical writes */
 267 static boolean_t global_update_logical_writes(int64_t);
 268
 269 #if MACH_ASSERT
 270 int pmap_ledgers_panic = 1;
 271 #endif /* MACH_ASSERT */
 272
 273 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
 274
 275 #if CONFIG_COREDUMP
 276 int hwm_user_cores = 0; /* high watermark violations generate user core files */
 277 #endif
 278
 279 #ifdef MACH_BSD
 280 extern void     proc_getexecutableuuid(void *, unsigned char *, unsigned long);
 281 extern int      proc_pid(struct proc *p);
 282 extern int      proc_selfpid(void);
 283 extern char     *proc_name_address(struct proc *p);
 284 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 285 extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, int bufsize);
 286
 287 #if CONFIG_MEMORYSTATUS
 288 extern void     proc_memstat_terminated(struct proc* p, boolean_t set);
 289 extern void     memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
 290 extern void     memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
 291 extern boolean_t memorystatus_allowed_vm_map_fork(__unused task_t task);
 292 #endif /* CONFIG_MEMORYSTATUS */
 293
 294 #endif /* MACH_BSD */
 295
 296 /* Forwards */
 297
 298 static void task_hold_locked(task_t task);
 299 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
 300 static void task_release_locked(task_t task);
 301
 302 static void task_synchronizer_destroy_all(task_t task);
 303
 304 void
 305 task_backing_store_privileged(
 306                         task_t task)
 307 {
 308         task_lock(task);
 309         task->priv_flags |= VM_BACKING_STORE_PRIV;
 310         task_unlock(task);
 311         return;
 312 }
 313
 314
 315 void
 316 task_set_64bit(
 317                 task_t task,
 318                 boolean_t is64bit)
 319 {
 320 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
 321         thread_t thread;
 322 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
 323
 324         task_lock(task);
 325
 326         if (is64bit) {
 327                 if (task_has_64BitAddr(task))
 328                         goto out;
 329                 task_set_64BitAddr(task);
 330         } else {
 331                 if ( !task_has_64BitAddr(task))
 332                         goto out;
 333                 task_clear_64BitAddr(task);
 334         }
 335         /* FIXME: On x86, the thread save state flavor can diverge from the
 336          * task's 64-bit feature flag due to the 32-bit/64-bit register save
 337          * state dichotomy. Since we can be pre-empted in this interval,
 338          * certain routines may observe the thread as being in an inconsistent
 339          * state with respect to its task's 64-bitness.
 340          */
 341
 342 #if defined(__x86_64__) || defined(__arm64__)
 343         queue_iterate(&task->threads, thread, thread_t, task_threads) {
 344                 thread_mtx_lock(thread);
 345                 machine_thread_switch_addrmode(thread);
 346                 thread_mtx_unlock(thread);
 347
 348 #if defined(__arm64__)
 349                 /* specifically, if running on H9 */
 350                 if (thread == current_thread()) {
 351                         uint64_t arg1, arg2;
 352                         int urgency;
 353                         spl_t spl = splsched();
 354                         /*
 355                          * This call tell that the current thread changed it's 32bitness.
 356                          * Other thread were no more on core when 32bitness was changed,
 357                          * but current_thread() is on core and the previous call to
 358                          * machine_thread_going_on_core() gave 32bitness which is now wrong.
 359                          *
 360                          * This is needed for bring-up, a different callback should be used
 361                          * in the future.
 362                          *
 363                          * TODO: Remove this callout when we no longer support 32-bit code on H9
 364                          */
 365                         thread_lock(thread);
 366                         urgency = thread_get_urgency(thread, &arg1, &arg2);
 367                         machine_thread_going_on_core(thread, urgency, 0, 0, mach_approximate_time());
 368                         thread_unlock(thread);
 369                         splx(spl);
 370                 }
 371 #endif /* defined(__arm64__) */
 372         }
 373 #endif /* defined(__x86_64__) || defined(__arm64__) */
 374
 375 out:
 376         task_unlock(task);
 377 }
 378
 379 void
 380 task_set_platform_binary(
 381                 task_t task,
 382                 boolean_t is_platform)
 383 {
 384         task_lock(task);
 385         if (is_platform) {
 386                 task->t_flags |= TF_PLATFORM;
 387         } else {
 388                 task->t_flags &= ~(TF_PLATFORM);
 389         }
 390         task_unlock(task);
 391 }
 392
 393 void
 394 task_set_dyld_info(
 395     task_t task,
 396     mach_vm_address_t addr,
 397     mach_vm_size_t size)
 398 {
 399         task_lock(task);
 400         task->all_image_info_addr = addr;
 401         task->all_image_info_size = size;
 402     task_unlock(task);
 403 }
 404
 405 void
 406 task_atm_reset(__unused task_t task) {
 407
 408 #if CONFIG_ATM
 409         if (task->atm_context != NULL) {
 410                  atm_task_descriptor_destroy(task->atm_context);
 411                  task->atm_context = NULL;
 412         }
 413 #endif
 414
 415 }
 416
 417 void
 418 task_bank_reset(__unused task_t task) {
 419
 420         if (task->bank_context != NULL) {
 421                  bank_task_destroy(task);
 422         }
 423 }
 424
 425 /*
 426  * NOTE: This should only be called when the P_LINTRANSIT
 427  *       flag is set (the proc_trans lock is held) on the
 428  *       proc associated with the task.
 429  */
 430 void
 431 task_bank_init(__unused task_t task) {
 432
 433         if (task->bank_context != NULL) {
 434                 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
 435         }
 436         bank_task_initialize(task);
 437 }
 438
 439 void
 440 task_set_did_exec_flag(task_t task)
 441 {
 442         task->t_procflags |= TPF_DID_EXEC;
 443 }
 444
 445 void
 446 task_clear_exec_copy_flag(task_t task)
 447 {
 448         task->t_procflags &= ~TPF_EXEC_COPY;
 449 }
 450
 451 /*
 452  * This wait event is t_procflags instead of t_flags because t_flags is volatile
 453  *
 454  * TODO: store the flags in the same place as the event
 455  * rdar://problem/28501994
 456  */
 457 event_t
 458 task_get_return_wait_event(task_t task)
 459 {
 460         return (event_t)&task->t_procflags;
 461 }
 462
 463 void
 464 task_clear_return_wait(task_t task)
 465 {
 466         task_lock(task);
 467
 468         task->t_flags &= ~TF_LRETURNWAIT;
 469
 470         if (task->t_flags & TF_LRETURNWAITER) {
 471                 thread_wakeup(task_get_return_wait_event(task));
 472                 task->t_flags &= ~TF_LRETURNWAITER;
 473         }
 474
 475         task_unlock(task);
 476 }
 477
 478 void
 479 task_wait_to_return(void)
 480 {
 481         task_t task;
 482
 483         task = current_task();
 484         task_lock(task);
 485
 486         if (task->t_flags & TF_LRETURNWAIT) {
 487                 do {
 488                         task->t_flags |= TF_LRETURNWAITER;
 489                         assert_wait(task_get_return_wait_event(task), THREAD_UNINT);
 490                         task_unlock(task);
 491
 492                         thread_block(THREAD_CONTINUE_NULL);
 493
 494                         task_lock(task);
 495                 } while (task->t_flags & TF_LRETURNWAIT);
 496         }
 497
 498         task_unlock(task);
 499
 500         thread_bootstrap_return();
 501 }
 502
 503 boolean_t
 504 task_is_exec_copy(task_t task)
 505 {
 506         return task_is_exec_copy_internal(task);
 507 }
 508
 509 boolean_t
 510 task_did_exec(task_t task)
 511 {
 512         return task_did_exec_internal(task);
 513 }
 514
 515 boolean_t
 516 task_is_active(task_t task)
 517 {
 518         return task->active;
 519 }
 520
 521 boolean_t
 522 task_is_halting(task_t task)
 523 {
 524         return task->halting;
 525 }
 526
 527 #if TASK_REFERENCE_LEAK_DEBUG
 528 #include <kern/btlog.h>
 529
 530 static btlog_t *task_ref_btlog;
 531 #define TASK_REF_OP_INCR        0x1
 532 #define TASK_REF_OP_DECR        0x2
 533
 534 #define TASK_REF_NUM_RECORDS    100000
 535 #define TASK_REF_BTDEPTH        7
 536
 537 void
 538 task_reference_internal(task_t task)
 539 {
 540         void *       bt[TASK_REF_BTDEPTH];
 541         int             numsaved = 0;
 542
 543         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 544
 545         (void)hw_atomic_add(&(task)->ref_count, 1);
 546         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
 547                                         bt, numsaved);
 548 }
 549
 550 uint32_t
 551 task_deallocate_internal(task_t task)
 552 {
 553         void *       bt[TASK_REF_BTDEPTH];
 554         int             numsaved = 0;
 555
 556         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 557
 558         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
 559                                         bt, numsaved);
 560         return hw_atomic_sub(&(task)->ref_count, 1);
 561 }
 562
 563 #endif /* TASK_REFERENCE_LEAK_DEBUG */
 564
 565 void
 566 task_init(void)
 567 {
 568
 569         lck_grp_attr_setdefault(&task_lck_grp_attr);
 570         lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
 571         lck_attr_setdefault(&task_lck_attr);
 572         lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
 573         lck_mtx_init(&tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
 574
 575         task_zone = zinit(
 576                         sizeof(struct task),
 577                         task_max * sizeof(struct task),
 578                         TASK_CHUNK * sizeof(struct task),
 579                         "tasks");
 580
 581         zone_change(task_zone, Z_NOENCRYPT, TRUE);
 582
 583 #if CONFIG_EMBEDDED
 584         task_watch_init();
 585 #endif /* CONFIG_EMBEDDED */
 586
 587         /*
 588          * Configure per-task memory limit.
 589          * The boot-arg is interpreted as Megabytes,
 590          * and takes precedence over the device tree.
 591          * Setting the boot-arg to 0 disables task limits.
 592          */
 593         if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
 594                         sizeof (max_task_footprint_mb))) {
 595                 /*
 596                  * No limit was found in boot-args, so go look in the device tree.
 597                  */
 598                 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
 599                                 sizeof(max_task_footprint_mb))) {
 600                         /*
 601                          * No limit was found in device tree.
 602                          */
 603                         max_task_footprint_mb = 0;
 604                 }
 605         }
 606
 607         if (max_task_footprint_mb != 0) {
 608 #if CONFIG_MEMORYSTATUS
 609                 if (max_task_footprint_mb < 50) {
 610                                 printf("Warning: max_task_pmem %d below minimum.\n",
 611                                 max_task_footprint_mb);
 612                                 max_task_footprint_mb = 50;
 613                 }
 614                 printf("Limiting task physical memory footprint to %d MB\n",
 615                         max_task_footprint_mb);
 616
 617                 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
 618
 619                 /*
 620                  * Configure the per-task memory limit warning level.
 621                  * This is computed as a percentage.
 622                  */
 623                 max_task_footprint_warning_level = 0;
 624
 625                 if (max_mem < 0x40000000) {
 626                         /*
 627                          * On devices with < 1GB of memory:
 628                          *    -- set warnings to 50MB below the per-task limit.
 629                          */
 630                         if (max_task_footprint_mb > 50) {
 631                                 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
 632                         }
 633                 } else {
 634                         /*
 635                          * On devices with >= 1GB of memory:
 636                          *    -- set warnings to 100MB below the per-task limit.
 637                          */
 638                         if (max_task_footprint_mb > 100) {
 639                                 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
 640                         }
 641                 }
 642
 643                 /*
 644                  * Never allow warning level to land below the default.
 645                  */
 646                 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
 647                         max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
 648                 }
 649
 650                 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
 651
 652 #else
 653                 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
 654 #endif /* CONFIG_MEMORYSTATUS */
 655         }
 656
 657 #if MACH_ASSERT
 658         PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
 659                           sizeof (pmap_ledgers_panic));
 660 #endif /* MACH_ASSERT */
 661
 662 #if CONFIG_COREDUMP
 663         if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
 664                         sizeof (hwm_user_cores))) {
 665                 hwm_user_cores = 0;
 666         }
 667 #endif
 668
 669         proc_init_cpumon_params();
 670
 671         if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
 672                 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
 673         }
 674
 675         if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
 676                 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
 677         }
 678
 679         if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
 680                 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
 681                 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
 682         }
 683
 684         if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
 685                 sizeof (disable_exc_resource))) {
 686                 disable_exc_resource = 0;
 687         }
 688
 689         if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof (task_iomon_limit_mb))) {
 690                 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
 691         }
 692
 693         if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof (task_iomon_interval_secs))) {
 694                 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
 695         }
 696
 697         if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof (io_telemetry_limit))) {
 698                 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
 699         }
 700
 701 /*
 702  * If we have coalitions, coalition_init() will call init_task_ledgers() as it
 703  * sets up the ledgers for the default coalition. If we don't have coalitions,
 704  * then we have to call it now.
 705  */
 706 #if CONFIG_COALITIONS
 707         assert(task_ledger_template);
 708 #else /* CONFIG_COALITIONS */
 709         init_task_ledgers();
 710 #endif /* CONFIG_COALITIONS */
 711
 712 #if TASK_REFERENCE_LEAK_DEBUG
 713         task_ref_btlog = btlog_create(TASK_REF_NUM_RECORDS, TASK_REF_BTDEPTH, TRUE /* caller_will_remove_entries_for_element? */);
 714         assert(task_ref_btlog);
 715 #endif
 716
 717         /*
 718          * Create the kernel task as the first task.
 719          */
 720 #ifdef __LP64__
 721         if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)
 722 #else
 723         if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)
 724 #endif
 725                 panic("task_init\n");
 726
 727
 728         vm_map_deallocate(kernel_task->map);
 729         kernel_task->map = kernel_map;
 730         lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
 731 }
 732
 733 /*
 734  * Create a task running in the kernel address space.  It may
 735  * have its own map of size mem_size and may have ipc privileges.
 736  */
 737 kern_return_t
 738 kernel_task_create(
 739         __unused task_t         parent_task,
 740         __unused vm_offset_t            map_base,
 741         __unused vm_size_t              map_size,
 742         __unused task_t         *child_task)
 743 {
 744         return (KERN_INVALID_ARGUMENT);
 745 }
 746
 747 kern_return_t
 748 task_create(
 749         task_t                          parent_task,
 750         __unused ledger_port_array_t    ledger_ports,
 751         __unused mach_msg_type_number_t num_ledger_ports,
 752         __unused boolean_t              inherit_memory,
 753         __unused task_t                 *child_task)    /* OUT */
 754 {
 755         if (parent_task == TASK_NULL)
 756                 return(KERN_INVALID_ARGUMENT);
 757
 758         /*
 759          * No longer supported: too many calls assume that a task has a valid
 760          * process attached.
 761          */
 762         return(KERN_FAILURE);
 763 }
 764
 765 kern_return_t
 766 host_security_create_task_token(
 767         host_security_t                 host_security,
 768         task_t                          parent_task,
 769         __unused security_token_t       sec_token,
 770         __unused audit_token_t          audit_token,
 771         __unused host_priv_t            host_priv,
 772         __unused ledger_port_array_t    ledger_ports,
 773         __unused mach_msg_type_number_t num_ledger_ports,
 774         __unused boolean_t              inherit_memory,
 775         __unused task_t                 *child_task)    /* OUT */
 776 {
 777         if (parent_task == TASK_NULL)
 778                 return(KERN_INVALID_ARGUMENT);
 779
 780         if (host_security == HOST_NULL)
 781                 return(KERN_INVALID_SECURITY);
 782
 783         /*
 784          * No longer supported.
 785          */
 786         return(KERN_FAILURE);
 787 }
 788
 789 /*
 790  * Task ledgers
 791  * ------------
 792  *
 793  * phys_footprint
 794  *   Physical footprint: This is the sum of:
 795  *     + (internal - alternate_accounting)
 796  *     + (internal_compressed - alternate_accounting_compressed)
 797  *     + iokit_mapped
 798  *     + purgeable_nonvolatile
 799  *     + purgeable_nonvolatile_compressed
 800  *     + page_table
 801  *
 802  * internal
 803  *   The task's anonymous memory, which on iOS is always resident.
 804  *
 805  * internal_compressed
 806  *   Amount of this task's internal memory which is held by the compressor.
 807  *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
 808  *   and could be either decompressed back into memory, or paged out to storage, depending
 809  *   on our implementation.
 810  *
 811  * iokit_mapped
 812  *   IOKit mappings: The total size of all IOKit mappings in this task, regardless of
 813      clean/dirty or internal/external state].
 814  *
 815  * alternate_accounting
 816  *   The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
 817  *   are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
 818  *   double counting.
 819  */
 820 void
 821 init_task_ledgers(void)
 822 {
 823         ledger_template_t t;
 824
 825         assert(task_ledger_template == NULL);
 826         assert(kernel_task == TASK_NULL);
 827
 828 #if MACH_ASSERT
 829         PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
 830                           sizeof (pmap_ledgers_panic));
 831 #endif /* MACH_ASSERT */
 832
 833         if ((t = ledger_template_create("Per-task ledger")) == NULL)
 834                 panic("couldn't create task ledger template");
 835
 836         task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
 837         task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
 838             "physmem", "bytes");
 839         task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
 840             "bytes");
 841         task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
 842             "bytes");
 843         task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
 844             "bytes");
 845         task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
 846             "bytes");
 847         task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
 848             "bytes");
 849         task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
 850             "bytes");
 851         task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
 852             "bytes");
 853         task_ledgers.page_table = ledger_entry_add(t, "page_table", "physmem",
 854             "bytes");
 855         task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
 856             "bytes");
 857         task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
 858             "bytes");
 859         task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
 860         task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
 861         task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
 862         task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
 863         task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
 864             "count");
 865         task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
 866             "count");
 867
 868 #if CONFIG_SCHED_SFI
 869         sfi_class_id_t class_id, ledger_alias;
 870         for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
 871                 task_ledgers.sfi_wait_times[class_id] = -1;
 872         }
 873
 874         /* don't account for UNSPECIFIED */
 875         for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
 876                 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
 877                 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
 878                         /* Check to see if alias has been registered yet */
 879                         if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
 880                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
 881                         } else {
 882                                 /* Otherwise, initialize it first */
 883                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
 884                         }
 885                 } else {
 886                         task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
 887                 }
 888
 889                 if (task_ledgers.sfi_wait_times[class_id] < 0) {
 890                         panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
 891                 }
 892         }
 893
 894         assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
 895 #endif /* CONFIG_SCHED_SFI */
 896
 897         task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
 898         task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
 899         task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
 900         task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
 901         task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
 902         task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
 903
 904         if ((task_ledgers.cpu_time < 0) ||
 905             (task_ledgers.tkm_private < 0) ||
 906             (task_ledgers.tkm_shared < 0) ||
 907             (task_ledgers.phys_mem < 0) ||
 908             (task_ledgers.wired_mem < 0) ||
 909             (task_ledgers.internal < 0) ||
 910             (task_ledgers.iokit_mapped < 0) ||
 911             (task_ledgers.alternate_accounting < 0) ||
 912             (task_ledgers.alternate_accounting_compressed < 0) ||
 913             (task_ledgers.page_table < 0) ||
 914             (task_ledgers.phys_footprint < 0) ||
 915             (task_ledgers.internal_compressed < 0) ||
 916             (task_ledgers.purgeable_volatile < 0) ||
 917             (task_ledgers.purgeable_nonvolatile < 0) ||
 918             (task_ledgers.purgeable_volatile_compressed < 0) ||
 919             (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
 920             (task_ledgers.platform_idle_wakeups < 0) ||
 921             (task_ledgers.interrupt_wakeups < 0) ||
 922             (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
 923             (task_ledgers.physical_writes < 0) ||
 924             (task_ledgers.logical_writes < 0) ||
 925             (task_ledgers.energy_billed_to_me < 0) ||
 926             (task_ledgers.energy_billed_to_others < 0)
 927             ) {
 928                 panic("couldn't create entries for task ledger template");
 929         }
 930
 931         ledger_track_credit_only(t, task_ledgers.phys_footprint);
 932         ledger_track_credit_only(t, task_ledgers.page_table);
 933         ledger_track_credit_only(t, task_ledgers.internal);
 934         ledger_track_credit_only(t, task_ledgers.internal_compressed);
 935         ledger_track_credit_only(t, task_ledgers.iokit_mapped);
 936         ledger_track_credit_only(t, task_ledgers.alternate_accounting);
 937         ledger_track_credit_only(t, task_ledgers.alternate_accounting_compressed);
 938         ledger_track_credit_only(t, task_ledgers.purgeable_volatile);
 939         ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile);
 940         ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed);
 941         ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed);
 942
 943         ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
 944 #if MACH_ASSERT
 945         if (pmap_ledgers_panic) {
 946                 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
 947                 ledger_panic_on_negative(t, task_ledgers.page_table);
 948                 ledger_panic_on_negative(t, task_ledgers.internal);
 949                 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
 950                 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
 951                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
 952                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
 953                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
 954                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
 955                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
 956                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
 957         }
 958 #endif /* MACH_ASSERT */
 959
 960 #if CONFIG_MEMORYSTATUS
 961         ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
 962 #endif /* CONFIG_MEMORYSTATUS */
 963
 964         ledger_set_callback(t, task_ledgers.interrupt_wakeups,
 965                 task_wakeups_rate_exceeded, NULL, NULL);
 966         ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
 967         ledger_set_callback(t, task_ledgers.logical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_LOGICAL_WRITES, NULL);
 968
 969         ledger_template_complete(t);
 970         task_ledger_template = t;
 971 }
 972
 973 kern_return_t
 974 task_create_internal(
 975         task_t          parent_task,
 976         coalition_t     *parent_coalitions __unused,
 977         boolean_t       inherit_memory,
 978         __unused boolean_t      is_64bit,
 979         uint32_t        t_flags,
 980         uint32_t        t_procflags,
 981         task_t          *child_task)            /* OUT */
 982 {
 983         task_t                  new_task;
 984         vm_shared_region_t      shared_region;
 985         ledger_t                ledger = NULL;
 986
 987         new_task = (task_t) zalloc(task_zone);
 988
 989         if (new_task == TASK_NULL)
 990                 return(KERN_RESOURCE_SHORTAGE);
 991
 992         /* one ref for just being alive; one for our caller */
 993         new_task->ref_count = 2;
 994
 995         /* allocate with active entries */
 996         assert(task_ledger_template != NULL);
 997         if ((ledger = ledger_instantiate(task_ledger_template,
 998                         LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
 999                 zfree(task_zone, new_task);
1000                 return(KERN_RESOURCE_SHORTAGE);
1001         }
1002
1003
1004         new_task->ledger = ledger;
1005
1006 #if defined(CONFIG_SCHED_MULTIQ)
1007         new_task->sched_group = sched_group_create();
1008 #endif
1009
1010         /* if inherit_memory is true, parent_task MUST not be NULL */
1011         if (!(t_flags & TF_CORPSE_FORK) && inherit_memory)
1012                 new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1013         else
1014                 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
1015                                 (vm_map_offset_t)(VM_MIN_ADDRESS),
1016                                 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
1017
1018         /* Inherit memlock limit from parent */
1019         if (parent_task)
1020                 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
1021
1022         lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1023         queue_init(&new_task->threads);
1024         new_task->suspend_count = 0;
1025         new_task->thread_count = 0;
1026         new_task->active_thread_count = 0;
1027         new_task->user_stop_count = 0;
1028         new_task->legacy_stop_count = 0;
1029         new_task->active = TRUE;
1030         new_task->halting = FALSE;
1031         new_task->user_data = NULL;
1032         new_task->priv_flags = 0;
1033         new_task->t_flags = t_flags;
1034         new_task->t_procflags = t_procflags;
1035         new_task->importance = 0;
1036         new_task->crashed_thread_id = 0;
1037         new_task->exec_token = 0;
1038
1039 #if CONFIG_ATM
1040         new_task->atm_context = NULL;
1041 #endif
1042         new_task->bank_context = NULL;
1043
1044 #ifdef MACH_BSD
1045         new_task->bsd_info = NULL;
1046         new_task->corpse_info = NULL;
1047 #endif /* MACH_BSD */
1048
1049 #if CONFIG_MACF
1050         new_task->crash_label = NULL;
1051 #endif
1052
1053 #if CONFIG_MEMORYSTATUS
1054         if (max_task_footprint != 0) {
1055                 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1056         }
1057 #endif /* CONFIG_MEMORYSTATUS */
1058
1059         if (task_wakeups_monitor_rate != 0) {
1060                 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1061                 int32_t  rate; // Ignored because of WAKEMON_SET_DEFAULTS
1062                 task_wakeups_monitor_ctl(new_task, &flags, &rate);
1063         }
1064
1065 #if CONFIG_IO_ACCOUNTING
1066         uint32_t flags = IOMON_ENABLE;
1067         task_io_monitor_ctl(new_task, &flags);
1068 #endif /* CONFIG_IO_ACCOUNTING */
1069
1070         machine_task_init(new_task, parent_task, inherit_memory);
1071
1072         new_task->task_debug = NULL;
1073
1074 #if DEVELOPMENT || DEBUG
1075         new_task->task_unnested = FALSE;
1076         new_task->task_disconnected_count = 0;
1077 #endif
1078         queue_init(&new_task->semaphore_list);
1079         new_task->semaphores_owned = 0;
1080
1081         ipc_task_init(new_task, parent_task);
1082
1083         new_task->vtimers = 0;
1084
1085         new_task->shared_region = NULL;
1086
1087         new_task->affinity_space = NULL;
1088
1089         new_task->t_chud = 0;
1090
1091         new_task->pidsuspended = FALSE;
1092         new_task->frozen = FALSE;
1093         new_task->changing_freeze_state = FALSE;
1094         new_task->rusage_cpu_flags = 0;
1095         new_task->rusage_cpu_percentage = 0;
1096         new_task->rusage_cpu_interval = 0;
1097         new_task->rusage_cpu_deadline = 0;
1098         new_task->rusage_cpu_callt = NULL;
1099 #if MACH_ASSERT
1100         new_task->suspends_outstanding = 0;
1101 #endif
1102
1103 #if HYPERVISOR
1104         new_task->hv_task_target = NULL;
1105 #endif /* HYPERVISOR */
1106
1107 #if CONFIG_EMBEDDED
1108         queue_init(&new_task->task_watchers);
1109         new_task->num_taskwatchers  = 0;
1110         new_task->watchapplying  = 0;
1111 #endif /* CONFIG_EMBEDDED */
1112
1113         new_task->mem_notify_reserved = 0;
1114         new_task->memlimit_attrs_reserved = 0;
1115 #if IMPORTANCE_INHERITANCE
1116         new_task->task_imp_base = NULL;
1117 #endif /* IMPORTANCE_INHERITANCE */
1118
1119         new_task->requested_policy = default_task_requested_policy;
1120         new_task->effective_policy = default_task_effective_policy;
1121
1122         if (parent_task != TASK_NULL) {
1123                 new_task->sec_token = parent_task->sec_token;
1124                 new_task->audit_token = parent_task->audit_token;
1125
1126                 /* inherit the parent's shared region */
1127                 shared_region = vm_shared_region_get(parent_task);
1128                 vm_shared_region_set(new_task, shared_region);
1129
1130                 if(task_has_64BitAddr(parent_task))
1131                         task_set_64BitAddr(new_task);
1132                 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1133                 new_task->all_image_info_size = parent_task->all_image_info_size;
1134
1135                 if (inherit_memory && parent_task->affinity_space)
1136                         task_affinity_create(parent_task, new_task);
1137
1138                 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1139
1140 #if IMPORTANCE_INHERITANCE
1141                 ipc_importance_task_t new_task_imp = IIT_NULL;
1142                 boolean_t inherit_receive = TRUE;
1143
1144                 if (task_is_marked_importance_donor(parent_task)) {
1145                         new_task_imp = ipc_importance_for_task(new_task, FALSE);
1146                         assert(IIT_NULL != new_task_imp);
1147                         ipc_importance_task_mark_donor(new_task_imp, TRUE);
1148                 }
1149 #if CONFIG_EMBEDDED
1150                 /* Embedded only wants to inherit for exec copy task */
1151                 if ((t_procflags & TPF_EXEC_COPY) == 0) {
1152                         inherit_receive = FALSE;
1153                 }
1154 #endif /* CONFIG_EMBEDDED */
1155
1156                 if (inherit_receive) {
1157                         if (task_is_marked_importance_receiver(parent_task)) {
1158                                 if (IIT_NULL == new_task_imp)
1159                                         new_task_imp = ipc_importance_for_task(new_task, FALSE);
1160                                 assert(IIT_NULL != new_task_imp);
1161                                 ipc_importance_task_mark_receiver(new_task_imp, TRUE);
1162                         }
1163                         if (task_is_marked_importance_denap_receiver(parent_task)) {
1164                                 if (IIT_NULL == new_task_imp)
1165                                         new_task_imp = ipc_importance_for_task(new_task, FALSE);
1166                                 assert(IIT_NULL != new_task_imp);
1167                                 ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
1168                         }
1169                 }
1170
1171                 if (IIT_NULL != new_task_imp) {
1172                         assert(new_task->task_imp_base == new_task_imp);
1173                         ipc_importance_task_release(new_task_imp);
1174                 }
1175 #endif /* IMPORTANCE_INHERITANCE */
1176
1177                 new_task->priority = BASEPRI_DEFAULT;
1178                 new_task->max_priority = MAXPRI_USER;
1179
1180                 task_policy_create(new_task, parent_task);
1181         } else {
1182                 new_task->sec_token = KERNEL_SECURITY_TOKEN;
1183                 new_task->audit_token = KERNEL_AUDIT_TOKEN;
1184 #ifdef __LP64__
1185                 if(is_64bit)
1186                         task_set_64BitAddr(new_task);
1187 #endif
1188                 new_task->all_image_info_addr = (mach_vm_address_t)0;
1189                 new_task->all_image_info_size = (mach_vm_size_t)0;
1190
1191                 new_task->pset_hint = PROCESSOR_SET_NULL;
1192
1193                 if (kernel_task == TASK_NULL) {
1194                         new_task->priority = BASEPRI_KERNEL;
1195                         new_task->max_priority = MAXPRI_KERNEL;
1196                 } else {
1197                         new_task->priority = BASEPRI_DEFAULT;
1198                         new_task->max_priority = MAXPRI_USER;
1199                 }
1200         }
1201
1202         bzero(new_task->coalition, sizeof(new_task->coalition));
1203         for (int i = 0; i < COALITION_NUM_TYPES; i++)
1204                 queue_chain_init(new_task->task_coalition[i]);
1205
1206         /* Allocate I/O Statistics */
1207         new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1208         assert(new_task->task_io_stats != NULL);
1209         bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1210
1211         bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats));
1212
1213         bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1214
1215         /* Copy resource acc. info from Parent for Corpe Forked task. */
1216         if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1217                 task_rollup_accounting_info(new_task, parent_task);
1218         } else {
1219                 /* Initialize to zero for standard fork/spawn case */
1220                 new_task->total_user_time = 0;
1221                 new_task->total_system_time = 0;
1222                 new_task->total_ptime = 0;
1223                 new_task->faults = 0;
1224                 new_task->pageins = 0;
1225                 new_task->cow_faults = 0;
1226                 new_task->messages_sent = 0;
1227                 new_task->messages_received = 0;
1228                 new_task->syscalls_mach = 0;
1229                 new_task->syscalls_unix = 0;
1230                 new_task->c_switch = 0;
1231                 new_task->p_switch = 0;
1232                 new_task->ps_switch = 0;
1233                 new_task->low_mem_notified_warn = 0;
1234                 new_task->low_mem_notified_critical = 0;
1235                 new_task->purged_memory_warn = 0;
1236                 new_task->purged_memory_critical = 0;
1237                 new_task->low_mem_privileged_listener = 0;
1238                 new_task->memlimit_is_active = 0;
1239                 new_task->memlimit_is_fatal = 0;
1240                 new_task->memlimit_active_exc_resource = 0;
1241                 new_task->memlimit_inactive_exc_resource = 0;
1242                 new_task->task_timer_wakeups_bin_1 = 0;
1243                 new_task->task_timer_wakeups_bin_2 = 0;
1244                 new_task->task_gpu_ns = 0;
1245                 new_task->task_immediate_writes = 0;
1246                 new_task->task_deferred_writes = 0;
1247                 new_task->task_invalidated_writes = 0;
1248                 new_task->task_metadata_writes = 0;
1249                 new_task->task_energy = 0;
1250 #if MONOTONIC
1251                 memset(&new_task->task_monotonic, 0, sizeof(new_task->task_monotonic));
1252 #endif /* MONOTONIC */
1253         }
1254
1255
1256 #if CONFIG_COALITIONS
1257         if (!(t_flags & TF_CORPSE_FORK)) {
1258                 /* TODO: there is no graceful failure path here... */
1259                 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1260                         coalitions_adopt_task(parent_coalitions, new_task);
1261                 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1262                         /*
1263                          * all tasks at least have a resource coalition, so
1264                          * if the parent has one then inherit all coalitions
1265                          * the parent is a part of
1266                          */
1267                         coalitions_adopt_task(parent_task->coalition, new_task);
1268                 } else {
1269                         /* TODO: assert that new_task will be PID 1 (launchd) */
1270                         coalitions_adopt_init_task(new_task);
1271                 }
1272                 /*
1273                  * on exec, we need to transfer the coalition roles from the
1274                  * parent task to the exec copy task.
1275                  */
1276                 if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1277                         int coal_roles[COALITION_NUM_TYPES];
1278                         task_coalition_roles(parent_task, coal_roles);
1279                         (void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1280                 }
1281         } else {
1282                 coalitions_adopt_corpse_task(new_task);
1283         }
1284
1285         if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1286                 panic("created task is not a member of a resource coalition");
1287         }
1288 #endif /* CONFIG_COALITIONS */
1289
1290         new_task->dispatchqueue_offset = 0;
1291         if (parent_task != NULL) {
1292                 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1293         }
1294
1295         if (vm_backing_store_low && parent_task != NULL)
1296                 new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
1297
1298         new_task->task_volatile_objects = 0;
1299         new_task->task_nonvolatile_objects = 0;
1300         new_task->task_purgeable_disowning = FALSE;
1301         new_task->task_purgeable_disowned = FALSE;
1302
1303 #if CONFIG_SECLUDED_MEMORY
1304         new_task->task_can_use_secluded_mem = FALSE;
1305         new_task->task_could_use_secluded_mem = FALSE;
1306         new_task->task_could_also_use_secluded_mem = FALSE;
1307 #endif /* CONFIG_SECLUDED_MEMORY */
1308
1309         queue_init(&new_task->io_user_clients);
1310
1311         ipc_task_enable(new_task);
1312
1313         lck_mtx_lock(&tasks_threads_lock);
1314         queue_enter(&tasks, new_task, task_t, tasks);
1315         tasks_count++;
1316         if (tasks_suspend_state) {
1317             task_suspend_internal(new_task);
1318         }
1319         lck_mtx_unlock(&tasks_threads_lock);
1320
1321         *child_task = new_task;
1322         return(KERN_SUCCESS);
1323 }
1324
1325 /*
1326  *      task_rollup_accounting_info
1327  *
1328  *      Roll up accounting stats. Used to rollup stats
1329  *      for exec copy task and corpse fork.
1330  */
1331 void
1332 task_rollup_accounting_info(task_t to_task, task_t from_task)
1333 {
1334         assert(from_task != to_task);
1335
1336         to_task->total_user_time = from_task->total_user_time;
1337         to_task->total_system_time = from_task->total_system_time;
1338         to_task->total_ptime = from_task->total_ptime;
1339         to_task->faults = from_task->faults;
1340         to_task->pageins = from_task->pageins;
1341         to_task->cow_faults = from_task->cow_faults;
1342         to_task->messages_sent = from_task->messages_sent;
1343         to_task->messages_received = from_task->messages_received;
1344         to_task->syscalls_mach = from_task->syscalls_mach;
1345         to_task->syscalls_unix = from_task->syscalls_unix;
1346         to_task->c_switch = from_task->c_switch;
1347         to_task->p_switch = from_task->p_switch;
1348         to_task->ps_switch = from_task->ps_switch;
1349         to_task->extmod_statistics = from_task->extmod_statistics;
1350         to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1351         to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1352         to_task->purged_memory_warn = from_task->purged_memory_warn;
1353         to_task->purged_memory_critical = from_task->purged_memory_critical;
1354         to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1355         *to_task->task_io_stats = *from_task->task_io_stats;
1356         to_task->cpu_time_qos_stats = from_task->cpu_time_qos_stats;
1357         to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1358         to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1359         to_task->task_gpu_ns = from_task->task_gpu_ns;
1360         to_task->task_immediate_writes = from_task->task_immediate_writes;
1361         to_task->task_deferred_writes = from_task->task_deferred_writes;
1362         to_task->task_invalidated_writes = from_task->task_invalidated_writes;
1363         to_task->task_metadata_writes = from_task->task_metadata_writes;
1364         to_task->task_energy = from_task->task_energy;
1365
1366         /* Skip ledger roll up for memory accounting entries */
1367         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1368         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1369         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1370 #if CONFIG_SCHED_SFI
1371         for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1372                 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1373         }
1374 #endif
1375         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1376         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1377         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1378         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1379         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1380         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
1381 }
1382
1383 int task_dropped_imp_count = 0;
1384
1385 /*
1386  *      task_deallocate:
1387  *
1388  *      Drop a reference on a task.
1389  */
1390 void
1391 task_deallocate(
1392         task_t          task)
1393 {
1394         ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1395         uint32_t refs;
1396
1397         if (task == TASK_NULL)
1398             return;
1399
1400         refs = task_deallocate_internal(task);
1401
1402 #if IMPORTANCE_INHERITANCE
1403         if (refs > 1)
1404                 return;
1405
1406         atomic_load_explicit(&task->ref_count, memory_order_acquire);
1407
1408         if (refs == 1) {
1409                 /*
1410                  * If last ref potentially comes from the task's importance,
1411                  * disconnect it.  But more task refs may be added before
1412                  * that completes, so wait for the reference to go to zero
1413                  * naturually (it may happen on a recursive task_deallocate()
1414                  * from the ipc_importance_disconnect_task() call).
1415                  */
1416                 if (IIT_NULL != task->task_imp_base)
1417                         ipc_importance_disconnect_task(task);
1418                 return;
1419         }
1420 #else
1421         if (refs > 0)
1422                 return;
1423
1424         atomic_load_explicit(&task->ref_count, memory_order_acquire);
1425
1426 #endif /* IMPORTANCE_INHERITANCE */
1427
1428         lck_mtx_lock(&tasks_threads_lock);
1429         queue_remove(&terminated_tasks, task, task_t, tasks);
1430         terminated_tasks_count--;
1431         lck_mtx_unlock(&tasks_threads_lock);
1432
1433         /*
1434          * remove the reference on atm descriptor
1435          */
1436         task_atm_reset(task);
1437
1438         /*
1439          * remove the reference on bank context
1440          */
1441         task_bank_reset(task);
1442
1443         if (task->task_io_stats)
1444                 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1445
1446         /*
1447          *      Give the machine dependent code a chance
1448          *      to perform cleanup before ripping apart
1449          *      the task.
1450          */
1451         machine_task_terminate(task);
1452
1453         ipc_task_terminate(task);
1454
1455         /* let iokit know */
1456         iokit_task_terminate(task);
1457
1458         if (task->affinity_space)
1459                 task_affinity_deallocate(task);
1460
1461 #if MACH_ASSERT
1462         if (task->ledger != NULL &&
1463             task->map != NULL &&
1464             task->map->pmap != NULL &&
1465             task->map->pmap->ledger != NULL) {
1466                 assert(task->ledger == task->map->pmap->ledger);
1467         }
1468 #endif /* MACH_ASSERT */
1469
1470         vm_purgeable_disown(task);
1471         assert(task->task_purgeable_disowned);
1472         if (task->task_volatile_objects != 0 ||
1473             task->task_nonvolatile_objects != 0) {
1474                 panic("task_deallocate(%p): "
1475                       "volatile_objects=%d nonvolatile_objects=%d\n",
1476                       task,
1477                       task->task_volatile_objects,
1478                       task->task_nonvolatile_objects);
1479         }
1480
1481         vm_map_deallocate(task->map);
1482         is_release(task->itk_space);
1483
1484         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1485                            &interrupt_wakeups, &debit);
1486         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1487                            &platform_idle_wakeups, &debit);
1488
1489 #if defined(CONFIG_SCHED_MULTIQ)
1490         sched_group_destroy(task->sched_group);
1491 #endif
1492
1493         /* Accumulate statistics for dead tasks */
1494         lck_spin_lock(&dead_task_statistics_lock);
1495         dead_task_statistics.total_user_time += task->total_user_time;
1496         dead_task_statistics.total_system_time += task->total_system_time;
1497
1498         dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1499         dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1500
1501         dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1502         dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1503         dead_task_statistics.total_ptime += task->total_ptime;
1504         dead_task_statistics.total_pset_switches += task->ps_switch;
1505         dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
1506         dead_task_statistics.task_energy += task->task_energy;
1507
1508         lck_spin_unlock(&dead_task_statistics_lock);
1509         lck_mtx_destroy(&task->lock, &task_lck_grp);
1510
1511         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1512             &debit)) {
1513                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1514                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1515         }
1516         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1517             &debit)) {
1518                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1519                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1520         }
1521         ledger_dereference(task->ledger);
1522
1523 #if TASK_REFERENCE_LEAK_DEBUG
1524         btlog_remove_entries_for_element(task_ref_btlog, task);
1525 #endif
1526
1527 #if CONFIG_COALITIONS
1528         task_release_coalitions(task);
1529 #endif /* CONFIG_COALITIONS */
1530
1531         bzero(task->coalition, sizeof(task->coalition));
1532
1533 #if MACH_BSD
1534         /* clean up collected information since last reference to task is gone */
1535         if (task->corpse_info) {
1536                 void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
1537                 task_crashinfo_destroy(task->corpse_info);
1538                 task->corpse_info = NULL;
1539                 if (corpse_info_kernel) {
1540                         kfree(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
1541                 }
1542         }
1543 #endif
1544
1545 #if CONFIG_MACF
1546         if (task->crash_label) {
1547                 mac_exc_free_label(task->crash_label);
1548                 task->crash_label = NULL;
1549         }
1550 #endif
1551
1552         zfree(task_zone, task);
1553 }
1554
1555 /*
1556  *      task_name_deallocate:
1557  *
1558  *      Drop a reference on a task name.
1559  */
1560 void
1561 task_name_deallocate(
1562         task_name_t             task_name)
1563 {
1564         return(task_deallocate((task_t)task_name));
1565 }
1566
1567 /*
1568  *      task_inspect_deallocate:
1569  *
1570  *      Drop a task inspection reference.
1571  */
1572 void
1573 task_inspect_deallocate(
1574         task_inspect_t          task_inspect)
1575 {
1576         return(task_deallocate((task_t)task_inspect));
1577 }
1578
1579 /*
1580  *      task_suspension_token_deallocate:
1581  *
1582  *      Drop a reference on a task suspension token.
1583  */
1584 void
1585 task_suspension_token_deallocate(
1586         task_suspension_token_t         token)
1587 {
1588         return(task_deallocate((task_t)token));
1589 }
1590
1591
1592 /*
1593  * task_collect_crash_info:
1594  *
1595  * collect crash info from bsd and mach based data
1596  */
1597 kern_return_t
1598 task_collect_crash_info(
1599         task_t task,
1600 #ifdef CONFIG_MACF
1601         struct label *crash_label,
1602 #endif
1603         int is_corpse_fork)
1604 {
1605         kern_return_t kr = KERN_SUCCESS;
1606
1607         kcdata_descriptor_t crash_data = NULL;
1608         kcdata_descriptor_t crash_data_release = NULL;
1609         mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1610         mach_vm_offset_t crash_data_ptr = 0;
1611         void *crash_data_kernel = NULL;
1612         void *crash_data_kernel_release = NULL;
1613 #if CONFIG_MACF
1614         struct label *label, *free_label;
1615 #endif
1616
1617         if (!corpses_enabled()) {
1618                 return KERN_NOT_SUPPORTED;
1619         }
1620
1621 #if CONFIG_MACF
1622         free_label = label = mac_exc_create_label();
1623 #endif
1624
1625         task_lock(task);
1626
1627         assert(is_corpse_fork || task->bsd_info != NULL);
1628         if (task->corpse_info == NULL && (is_corpse_fork || task->bsd_info != NULL)) {
1629 #if CONFIG_MACF
1630                 /* Set the crash label, used by the exception delivery mac hook */
1631                 free_label = task->crash_label; // Most likely NULL.
1632                 task->crash_label = label;
1633                 mac_exc_update_task_crash_label(task, crash_label);
1634 #endif
1635                 task_unlock(task);
1636
1637                 crash_data_kernel = (void *) kalloc(CORPSEINFO_ALLOCATION_SIZE);
1638                 if (crash_data_kernel == NULL) {
1639                         kr = KERN_RESOURCE_SHORTAGE;
1640                         goto out_no_lock;
1641                 }
1642                 bzero(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1643                 crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
1644
1645                 /* Do not get a corpse ref for corpse fork */
1646                 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
1647                                 is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
1648                                 KCFLAG_USE_MEMCOPY);
1649                 if (crash_data) {
1650                         task_lock(task);
1651                         crash_data_release = task->corpse_info;
1652                         crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
1653                         task->corpse_info = crash_data;
1654
1655                         task_unlock(task);
1656                         kr = KERN_SUCCESS;
1657                 } else {
1658                         kfree(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1659                         kr = KERN_FAILURE;
1660                 }
1661
1662                 if (crash_data_release != NULL) {
1663                         task_crashinfo_destroy(crash_data_release);
1664                 }
1665                 if (crash_data_kernel_release != NULL) {
1666                         kfree(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
1667                 }
1668         } else {
1669                 task_unlock(task);
1670         }
1671
1672 out_no_lock:
1673 #if CONFIG_MACF
1674         if (free_label != NULL) {
1675                 mac_exc_free_label(free_label);
1676         }
1677 #endif
1678         return kr;
1679 }
1680
1681 /*
1682  * task_deliver_crash_notification:
1683  *
1684  * Makes outcall to registered host port for a corpse.
1685  */
1686 kern_return_t
1687 task_deliver_crash_notification(
1688         task_t task,
1689         thread_t thread,
1690         exception_type_t etype,
1691         mach_exception_subcode_t subcode)
1692 {
1693         kcdata_descriptor_t crash_info = task->corpse_info;
1694         thread_t th_iter = NULL;
1695         kern_return_t kr = KERN_SUCCESS;
1696         wait_interrupt_t wsave;
1697         mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1698         ipc_port_t task_port, old_notify;
1699
1700         if (crash_info == NULL)
1701                 return KERN_FAILURE;
1702
1703         task_lock(task);
1704         if (task_is_a_corpse_fork(task)) {
1705                 /* Populate code with EXC_{RESOURCE,GUARD} for corpse fork */
1706                 code[0] = etype;
1707                 code[1] = subcode;
1708         } else {
1709                 /* Populate code with EXC_CRASH for corpses */
1710                 code[0] = EXC_CRASH;
1711                 code[1] = 0;
1712                 /* Update the code[1] if the boot-arg corpse_for_fatal_memkill is set */
1713                 if (corpse_for_fatal_memkill) {
1714                         code[1] = subcode;
1715                 }
1716         }
1717
1718         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1719         {
1720                 if (th_iter->corpse_dup == FALSE) {
1721                         ipc_thread_reset(th_iter);
1722                 }
1723         }
1724         task_unlock(task);
1725
1726         /* Arm the no-sender notification for taskport */
1727         task_reference(task);
1728         task_port = convert_task_to_port(task);
1729         ip_lock(task_port);
1730         assert(ip_active(task_port));
1731         ipc_port_nsrequest(task_port, task_port->ip_mscount, ipc_port_make_sonce_locked(task_port), &old_notify);
1732         /* port unlocked */
1733         assert(IP_NULL == old_notify);
1734
1735         wsave = thread_interrupt_level(THREAD_UNINT);
1736         kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
1737         if (kr != KERN_SUCCESS) {
1738                 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
1739         }
1740
1741         (void)thread_interrupt_level(wsave);
1742
1743         /*
1744          * Drop the send right on task port, will fire the
1745          * no-sender notification if exception deliver failed.
1746          */
1747         ipc_port_release_send(task_port);
1748         return kr;
1749 }
1750
1751 /*
1752  *      task_terminate:
1753  *
1754  *      Terminate the specified task.  See comments on thread_terminate
1755  *      (kern/thread.c) about problems with terminating the "current task."
1756  */
1757
1758 kern_return_t
1759 task_terminate(
1760         task_t          task)
1761 {
1762         if (task == TASK_NULL)
1763                 return (KERN_INVALID_ARGUMENT);
1764
1765         if (task->bsd_info)
1766                 return (KERN_FAILURE);
1767
1768         return (task_terminate_internal(task));
1769 }
1770
1771 #if MACH_ASSERT
1772 extern int proc_pid(struct proc *);
1773 extern void proc_name_kdp(task_t t, char *buf, int size);
1774 #endif /* MACH_ASSERT */
1775
1776 #define VM_MAP_PARTIAL_REAP 0x54  /* 0x150 */
1777 static void
1778 __unused task_partial_reap(task_t task, __unused int pid)
1779 {
1780         unsigned int    reclaimed_resident = 0;
1781         unsigned int    reclaimed_compressed = 0;
1782         uint64_t        task_page_count;
1783
1784         task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1785
1786         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
1787                               pid, task_page_count, 0, 0, 0);
1788
1789         vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1790
1791         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
1792                               pid, reclaimed_resident, reclaimed_compressed, 0, 0);
1793 }
1794
1795 kern_return_t
1796 task_mark_corpse(task_t task)
1797 {
1798         kern_return_t kr = KERN_SUCCESS;
1799         thread_t self_thread;
1800         (void) self_thread;
1801         wait_interrupt_t wsave;
1802 #if CONFIG_MACF
1803         struct label *crash_label = NULL;
1804 #endif
1805
1806         assert(task != kernel_task);
1807         assert(task == current_task());
1808         assert(!task_is_a_corpse(task));
1809
1810 #if CONFIG_MACF
1811         crash_label = mac_exc_create_label_for_proc((struct proc*)task->bsd_info);
1812 #endif
1813
1814         kr = task_collect_crash_info(task,
1815 #if CONFIG_MACF
1816                                                                  crash_label,
1817 #endif
1818                                                                  FALSE);
1819         if (kr != KERN_SUCCESS) {
1820                 goto out;
1821         }
1822
1823         self_thread = current_thread();
1824
1825         wsave = thread_interrupt_level(THREAD_UNINT);
1826         task_lock(task);
1827
1828         task_set_corpse_pending_report(task);
1829         task_set_corpse(task);
1830         task->crashed_thread_id = thread_tid(self_thread);
1831
1832         kr = task_start_halt_locked(task, TRUE);
1833         assert(kr == KERN_SUCCESS);
1834
1835         ipc_task_reset(task);
1836         /* Remove the naked send right for task port, needed to arm no sender notification */
1837         task_set_special_port(task, TASK_KERNEL_PORT, IPC_PORT_NULL);
1838         ipc_task_enable(task);
1839
1840         task_unlock(task);
1841         /* terminate the ipc space */
1842         ipc_space_terminate(task->itk_space);
1843
1844         /* Add it to global corpse task list */
1845         task_add_to_corpse_task_list(task);
1846
1847         task_start_halt(task);
1848         thread_terminate_internal(self_thread);
1849
1850         (void) thread_interrupt_level(wsave);
1851         assert(task->halting == TRUE);
1852
1853 out:
1854 #if CONFIG_MACF
1855         mac_exc_free_label(crash_label);
1856 #endif
1857         return kr;
1858 }
1859
1860 /*
1861  *      task_clear_corpse
1862  *
1863  *      Clears the corpse pending bit on task.
1864  *      Removes inspection bit on the threads.
1865  */
1866 void
1867 task_clear_corpse(task_t task)
1868 {
1869         thread_t th_iter = NULL;
1870
1871         task_lock(task);
1872         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1873         {
1874                 thread_mtx_lock(th_iter);
1875                 th_iter->inspection = FALSE;
1876                 thread_mtx_unlock(th_iter);
1877         }
1878
1879         thread_terminate_crashed_threads();
1880         /* remove the pending corpse report flag */
1881         task_clear_corpse_pending_report(task);
1882
1883         task_unlock(task);
1884 }
1885
1886 /*
1887  *      task_port_notify
1888  *
1889  *      Called whenever the Mach port system detects no-senders on
1890  *      the task port of a corpse.
1891  *      Each notification that comes in should terminate the task (corpse).
1892  */
1893 void
1894 task_port_notify(mach_msg_header_t *msg)
1895 {
1896         mach_no_senders_notification_t *notification = (void *)msg;
1897         ipc_port_t port = notification->not_header.msgh_remote_port;
1898         task_t task;
1899
1900         assert(ip_active(port));
1901         assert(IKOT_TASK == ip_kotype(port));
1902         task = (task_t) port->ip_kobject;
1903
1904         assert(task_is_a_corpse(task));
1905
1906         /* Remove the task from global corpse task list */
1907         task_remove_from_corpse_task_list(task);
1908
1909         task_clear_corpse(task);
1910         task_terminate_internal(task);
1911 }
1912
1913 /*
1914  *      task_wait_till_threads_terminate_locked
1915  *
1916  *      Wait till all the threads in the task are terminated.
1917  *      Might release the task lock and re-acquire it.
1918  */
1919 void
1920 task_wait_till_threads_terminate_locked(task_t task)
1921 {
1922         /* wait for all the threads in the task to terminate */
1923         while (task->active_thread_count != 0) {
1924                 assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
1925                 task_unlock(task);
1926                 thread_block(THREAD_CONTINUE_NULL);
1927
1928                 task_lock(task);
1929         }
1930 }
1931
1932 /*
1933  *      task_duplicate_map_and_threads
1934  *
1935  *      Copy vmmap of source task.
1936  *      Copy active threads from source task to destination task.
1937  *      Source task would be suspended during the copy.
1938  */
1939 kern_return_t
1940 task_duplicate_map_and_threads(
1941                 task_t task,
1942                 void *p,
1943                 task_t new_task,
1944                 thread_t *thread_ret,
1945                 uint64_t **udata_buffer,
1946                 int *size,
1947                 int *num_udata)
1948 {
1949         kern_return_t kr = KERN_SUCCESS;
1950         int active;
1951         thread_t thread, self, thread_return = THREAD_NULL;
1952         thread_t new_thread = THREAD_NULL;
1953         thread_t *thread_array;
1954         uint32_t active_thread_count = 0, array_count = 0, i;
1955         vm_map_t oldmap;
1956         uint64_t *buffer = NULL;
1957         int buf_size = 0;
1958         int est_knotes = 0, num_knotes = 0;
1959
1960         self = current_thread();
1961
1962         /*
1963          * Suspend the task to copy thread state, use the internal
1964          * variant so that no user-space process can resume
1965          * the task from under us
1966          */
1967         kr = task_suspend_internal(task);
1968         if (kr != KERN_SUCCESS) {
1969                 return kr;
1970         }
1971
1972         if (task->map->disable_vmentry_reuse == TRUE) {
1973                 /*
1974                  * Quite likely GuardMalloc (or some debugging tool)
1975                  * is being used on this task. And it has gone through
1976                  * its limit. Making a corpse will likely encounter
1977                  * a lot of VM entries that will need COW.
1978                  *
1979                  * Skip it.
1980                  */
1981                 task_resume_internal(task);
1982                 return KERN_FAILURE;
1983         }
1984
1985         /* Check with VM if vm_map_fork is allowed for this task */
1986         if (task_allowed_vm_map_fork(task)) {
1987
1988                 /* Setup new task's vmmap, switch from parent task's map to it COW map */
1989                 oldmap = new_task->map;
1990                 new_task->map = vm_map_fork(new_task->ledger,
1991                                             task->map,
1992                                             (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
1993                                              VM_MAP_FORK_PRESERVE_PURGEABLE));
1994                 vm_map_deallocate(oldmap);
1995
1996                 /* Get all the udata pointers from kqueue */
1997                 est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
1998                 if (est_knotes > 0) {
1999                         buf_size = (est_knotes + 32) * sizeof(uint64_t);
2000                         buffer = (uint64_t *) kalloc(buf_size);
2001                         num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2002                         if (num_knotes > est_knotes + 32) {
2003                                 num_knotes = est_knotes + 32;
2004                         }
2005                 }
2006         }
2007
2008         active_thread_count = task->active_thread_count;
2009         if (active_thread_count == 0) {
2010                 if (buffer != NULL) {
2011                         kfree(buffer, buf_size);
2012                 }
2013                 task_resume_internal(task);
2014                 return KERN_FAILURE;
2015         }
2016
2017         thread_array = (thread_t *) kalloc(sizeof(thread_t) * active_thread_count);
2018
2019         /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2020         task_lock(task);
2021         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2022                 /* Skip inactive threads */
2023                 active = thread->active;
2024                 if (!active) {
2025                         continue;
2026                 }
2027
2028                 if (array_count >= active_thread_count) {
2029                         break;
2030                 }
2031
2032                 thread_array[array_count++] = thread;
2033                 thread_reference(thread);
2034         }
2035         task_unlock(task);
2036
2037         for (i = 0; i < array_count; i++) {
2038
2039                 kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2040                 if (kr != KERN_SUCCESS) {
2041                         break;
2042                 }
2043
2044                 /* Equivalent of current thread in corpse */
2045                 if (thread_array[i] == self) {
2046                         thread_return = new_thread;
2047                         new_task->crashed_thread_id = thread_tid(new_thread);
2048                 } else {
2049                         /* drop the extra ref returned by thread_create_with_continuation */
2050                         thread_deallocate(new_thread);
2051                 }
2052
2053                 kr = thread_dup2(thread_array[i], new_thread);
2054                 if (kr != KERN_SUCCESS) {
2055                         thread_mtx_lock(new_thread);
2056                         new_thread->corpse_dup = TRUE;
2057                         thread_mtx_unlock(new_thread);
2058                         continue;
2059                 }
2060
2061                 /* Copy thread name */
2062                 bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread);
2063                 thread_copy_resource_info(new_thread, thread_array[i]);
2064         }
2065
2066         task_resume_internal(task);
2067
2068         for (i = 0; i < array_count; i++) {
2069                 thread_deallocate(thread_array[i]);
2070         }
2071         kfree(thread_array, sizeof(thread_t) * active_thread_count);
2072
2073         if (kr == KERN_SUCCESS) {
2074                 *thread_ret = thread_return;
2075                 *udata_buffer = buffer;
2076                 *size = buf_size;
2077                 *num_udata = num_knotes;
2078         } else {
2079                 if (thread_return != THREAD_NULL) {
2080                         thread_deallocate(thread_return);
2081                 }
2082                 if (buffer != NULL) {
2083                         kfree(buffer, buf_size);
2084                 }
2085         }
2086
2087         return kr;
2088 }
2089
2090 /*
2091  * Place holder function to be filled by VM to return
2092  * TRUE if vm_map_fork is allowed on the given task.
2093  */
2094 boolean_t
2095 task_allowed_vm_map_fork(task_t task __unused)
2096 {
2097         return memorystatus_allowed_vm_map_fork(task);
2098 }
2099
2100 #if CONFIG_SECLUDED_MEMORY
2101 extern void task_set_can_use_secluded_mem_locked(
2102         task_t          task,
2103         boolean_t       can_use_secluded_mem);
2104 #endif /* CONFIG_SECLUDED_MEMORY */
2105
2106 kern_return_t
2107 task_terminate_internal(
2108         task_t                  task)
2109 {
2110         thread_t                        thread, self;
2111         task_t                          self_task;
2112         boolean_t                       interrupt_save;
2113         int                             pid = 0;
2114
2115         assert(task != kernel_task);
2116
2117         self = current_thread();
2118         self_task = self->task;
2119
2120         /*
2121          *      Get the task locked and make sure that we are not racing
2122          *      with someone else trying to terminate us.
2123          */
2124         if (task == self_task)
2125                 task_lock(task);
2126         else
2127         if (task < self_task) {
2128                 task_lock(task);
2129                 task_lock(self_task);
2130         }
2131         else {
2132                 task_lock(self_task);
2133                 task_lock(task);
2134         }
2135
2136 #if CONFIG_SECLUDED_MEMORY
2137         if (task->task_can_use_secluded_mem) {
2138                 task_set_can_use_secluded_mem_locked(task, FALSE);
2139         }
2140         task->task_could_use_secluded_mem = FALSE;
2141         task->task_could_also_use_secluded_mem = FALSE;
2142 #endif /* CONFIG_SECLUDED_MEMORY */
2143
2144         if (!task->active) {
2145                 /*
2146                  *      Task is already being terminated.
2147                  *      Just return an error. If we are dying, this will
2148                  *      just get us to our AST special handler and that
2149                  *      will get us to finalize the termination of ourselves.
2150                  */
2151                 task_unlock(task);
2152                 if (self_task != task)
2153                         task_unlock(self_task);
2154
2155                 return (KERN_FAILURE);
2156         }
2157
2158         if (task_corpse_pending_report(task)) {
2159                 /*
2160                  *      Task is marked for reporting as corpse.
2161                  *      Just return an error. This will
2162                  *      just get us to our AST special handler and that
2163                  *      will get us to finish the path to death
2164                  */
2165                 task_unlock(task);
2166                 if (self_task != task)
2167                         task_unlock(self_task);
2168
2169                 return (KERN_FAILURE);
2170         }
2171
2172         if (self_task != task)
2173                 task_unlock(self_task);
2174
2175         /*
2176          * Make sure the current thread does not get aborted out of
2177          * the waits inside these operations.
2178          */
2179         interrupt_save = thread_interrupt_level(THREAD_UNINT);
2180
2181         /*
2182          *      Indicate that we want all the threads to stop executing
2183          *      at user space by holding the task (we would have held
2184          *      each thread independently in thread_terminate_internal -
2185          *      but this way we may be more likely to already find it
2186          *      held there).  Mark the task inactive, and prevent
2187          *      further task operations via the task port.
2188          */
2189         task_hold_locked(task);
2190         task->active = FALSE;
2191         ipc_task_disable(task);
2192
2193 #if CONFIG_TELEMETRY
2194         /*
2195          * Notify telemetry that this task is going away.
2196          */
2197         telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2198 #endif
2199
2200         /*
2201          *      Terminate each thread in the task.
2202          */
2203         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2204                         thread_terminate_internal(thread);
2205         }
2206
2207 #ifdef MACH_BSD
2208         if (task->bsd_info != NULL && !task_is_exec_copy(task)) {
2209                 pid = proc_pid(task->bsd_info);
2210         }
2211 #endif /* MACH_BSD */
2212
2213         task_unlock(task);
2214
2215         proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
2216                              TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
2217
2218         /* Early object reap phase */
2219
2220 // PR-17045188: Revisit implementation
2221 //        task_partial_reap(task, pid);
2222
2223 #if CONFIG_EMBEDDED
2224         /*
2225          * remove all task watchers
2226          */
2227         task_removewatchers(task);
2228
2229 #endif /* CONFIG_EMBEDDED */
2230
2231         /*
2232          *      Destroy all synchronizers owned by the task.
2233          */
2234         task_synchronizer_destroy_all(task);
2235
2236         /*
2237          *      Destroy the IPC space, leaving just a reference for it.
2238          */
2239         ipc_space_terminate(task->itk_space);
2240
2241 #if 00
2242         /* if some ledgers go negative on tear-down again... */
2243         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2244                                          task_ledgers.phys_footprint);
2245         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2246                                          task_ledgers.internal);
2247         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2248                                          task_ledgers.internal_compressed);
2249         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2250                                          task_ledgers.iokit_mapped);
2251         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2252                                          task_ledgers.alternate_accounting);
2253         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2254                                          task_ledgers.alternate_accounting_compressed);
2255 #endif
2256
2257         /*
2258          * If the current thread is a member of the task
2259          * being terminated, then the last reference to
2260          * the task will not be dropped until the thread
2261          * is finally reaped.  To avoid incurring the
2262          * expense of removing the address space regions
2263          * at reap time, we do it explictly here.
2264          */
2265
2266         vm_map_lock(task->map);
2267         vm_map_disable_hole_optimization(task->map);
2268         vm_map_unlock(task->map);
2269
2270 #if MACH_ASSERT
2271         /*
2272          * Identify the pmap's process, in case the pmap ledgers drift
2273          * and we have to report it.
2274          */
2275         char procname[17];
2276         if (task->bsd_info && !task_is_exec_copy(task)) {
2277                 pid = proc_pid(task->bsd_info);
2278                 proc_name_kdp(task, procname, sizeof (procname));
2279         } else {
2280                 pid = 0;
2281                 strlcpy(procname, "<unknown>", sizeof (procname));
2282         }
2283         pmap_set_process(task->map->pmap, pid, procname);
2284 #endif /* MACH_ASSERT */
2285
2286         vm_map_remove(task->map,
2287                       task->map->min_offset,
2288                       task->map->max_offset,
2289                       /*
2290                        * Final cleanup:
2291                        * + no unnesting
2292                        * + remove immutable mappings
2293                        */
2294                       (VM_MAP_REMOVE_NO_UNNESTING |
2295                        VM_MAP_REMOVE_IMMUTABLE));
2296
2297         /* release our shared region */
2298         vm_shared_region_set(task, NULL);
2299
2300
2301         lck_mtx_lock(&tasks_threads_lock);
2302         queue_remove(&tasks, task, task_t, tasks);
2303         queue_enter(&terminated_tasks, task, task_t, tasks);
2304         tasks_count--;
2305         terminated_tasks_count++;
2306         lck_mtx_unlock(&tasks_threads_lock);
2307
2308         /*
2309          * We no longer need to guard against being aborted, so restore
2310          * the previous interruptible state.
2311          */
2312         thread_interrupt_level(interrupt_save);
2313
2314 #if KPERF
2315         /* force the task to release all ctrs */
2316         if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS)
2317                 kpc_force_all_ctrs(task, 0);
2318 #endif
2319
2320 #if CONFIG_COALITIONS
2321         /*
2322          * Leave our coalitions. (drop activation but not reference)
2323          */
2324         coalitions_remove_task(task);
2325 #endif
2326
2327         /*
2328          * Get rid of the task active reference on itself.
2329          */
2330         task_deallocate(task);
2331
2332         return (KERN_SUCCESS);
2333 }
2334
2335 void
2336 tasks_system_suspend(boolean_t suspend)
2337 {
2338         task_t task;
2339
2340         lck_mtx_lock(&tasks_threads_lock);
2341         assert(tasks_suspend_state != suspend);
2342         tasks_suspend_state = suspend;
2343         queue_iterate(&tasks, task, task_t, tasks) {
2344                 if (task == kernel_task) {
2345                         continue;
2346                 }
2347                 suspend ? task_suspend_internal(task) : task_resume_internal(task);
2348         }
2349         lck_mtx_unlock(&tasks_threads_lock);
2350 }
2351
2352 /*
2353  * task_start_halt:
2354  *
2355  *      Shut the current task down (except for the current thread) in
2356  *      preparation for dramatic changes to the task (probably exec).
2357  *      We hold the task and mark all other threads in the task for
2358  *      termination.
2359  */
2360 kern_return_t
2361 task_start_halt(task_t task)
2362 {
2363         kern_return_t kr = KERN_SUCCESS;
2364         task_lock(task);
2365         kr = task_start_halt_locked(task, FALSE);
2366         task_unlock(task);
2367         return kr;
2368 }
2369
2370 static kern_return_t
2371 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
2372 {
2373         thread_t thread, self;
2374         uint64_t dispatchqueue_offset;
2375
2376         assert(task != kernel_task);
2377
2378         self = current_thread();
2379
2380         if (task != self->task && !task_is_a_corpse_fork(task))
2381                 return (KERN_INVALID_ARGUMENT);
2382
2383         if (task->halting || !task->active || !self->active) {
2384                 /*
2385                  * Task or current thread is already being terminated.
2386                  * Hurry up and return out of the current kernel context
2387                  * so that we run our AST special handler to terminate
2388                  * ourselves.
2389                  */
2390                 return (KERN_FAILURE);
2391         }
2392
2393         task->halting = TRUE;
2394
2395         /*
2396          * Mark all the threads to keep them from starting any more
2397          * user-level execution.  The thread_terminate_internal code
2398          * would do this on a thread by thread basis anyway, but this
2399          * gives us a better chance of not having to wait there.
2400          */
2401         task_hold_locked(task);
2402         dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
2403
2404         /*
2405          * Terminate all the other threads in the task.
2406          */
2407         queue_iterate(&task->threads, thread, thread_t, task_threads)
2408         {
2409                 if (should_mark_corpse) {
2410                         thread_mtx_lock(thread);
2411                         thread->inspection = TRUE;
2412                         thread_mtx_unlock(thread);
2413                 }
2414                 if (thread != self)
2415                         thread_terminate_internal(thread);
2416         }
2417         task->dispatchqueue_offset = dispatchqueue_offset;
2418
2419         task_release_locked(task);
2420
2421         return KERN_SUCCESS;
2422 }
2423
2424
2425 /*
2426  * task_complete_halt:
2427  *
2428  *      Complete task halt by waiting for threads to terminate, then clean
2429  *      up task resources (VM, port namespace, etc...) and then let the
2430  *      current thread go in the (practically empty) task context.
2431  *
2432  *      Note: task->halting flag is not cleared in order to avoid creation
2433  *      of new thread in old exec'ed task.
2434  */
2435 void
2436 task_complete_halt(task_t task)
2437 {
2438         task_lock(task);
2439         assert(task->halting);
2440         assert(task == current_task());
2441
2442         /*
2443          *      Wait for the other threads to get shut down.
2444          *      When the last other thread is reaped, we'll be
2445          *      woken up.
2446          */
2447         if (task->thread_count > 1) {
2448                 assert_wait((event_t)&task->halting, THREAD_UNINT);
2449                 task_unlock(task);
2450                 thread_block(THREAD_CONTINUE_NULL);
2451         } else {
2452                 task_unlock(task);
2453         }
2454
2455         /*
2456          *      Give the machine dependent code a chance
2457          *      to perform cleanup of task-level resources
2458          *      associated with the current thread before
2459          *      ripping apart the task.
2460          */
2461         machine_task_terminate(task);
2462
2463         /*
2464          *      Destroy all synchronizers owned by the task.
2465          */
2466         task_synchronizer_destroy_all(task);
2467
2468         /*
2469          *      Destroy the contents of the IPC space, leaving just
2470          *      a reference for it.
2471          */
2472         ipc_space_clean(task->itk_space);
2473
2474         /*
2475          * Clean out the address space, as we are going to be
2476          * getting a new one.
2477          */
2478         vm_map_remove(task->map, task->map->min_offset,
2479                       task->map->max_offset,
2480                       /*
2481                        * Final cleanup:
2482                        * + no unnesting
2483                        * + remove immutable mappings
2484                        */
2485                       (VM_MAP_REMOVE_NO_UNNESTING |
2486                        VM_MAP_REMOVE_IMMUTABLE));
2487
2488         /*
2489          * Kick out any IOKitUser handles to the task. At best they're stale,
2490          * at worst someone is racing a SUID exec.
2491          */
2492         iokit_task_terminate(task);
2493 }
2494
2495 /*
2496  *      task_hold_locked:
2497  *
2498  *      Suspend execution of the specified task.
2499  *      This is a recursive-style suspension of the task, a count of
2500  *      suspends is maintained.
2501  *
2502  *      CONDITIONS: the task is locked and active.
2503  */
2504 void
2505 task_hold_locked(
2506         task_t          task)
2507 {
2508         thread_t        thread;
2509
2510         assert(task->active);
2511
2512         if (task->suspend_count++ > 0)
2513                 return;
2514
2515         /*
2516          *      Iterate through all the threads and hold them.
2517          */
2518         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2519                 thread_mtx_lock(thread);
2520                 thread_hold(thread);
2521                 thread_mtx_unlock(thread);
2522         }
2523 }
2524
2525 /*
2526  *      task_hold:
2527  *
2528  *      Same as the internal routine above, except that is must lock
2529  *      and verify that the task is active.  This differs from task_suspend
2530  *      in that it places a kernel hold on the task rather than just a
2531  *      user-level hold.  This keeps users from over resuming and setting
2532  *      it running out from under the kernel.
2533  *
2534  *      CONDITIONS: the caller holds a reference on the task
2535  */
2536 kern_return_t
2537 task_hold(
2538         task_t          task)
2539 {
2540         if (task == TASK_NULL)
2541                 return (KERN_INVALID_ARGUMENT);
2542
2543         task_lock(task);
2544
2545         if (!task->active) {
2546                 task_unlock(task);
2547
2548                 return (KERN_FAILURE);
2549         }
2550
2551         task_hold_locked(task);
2552         task_unlock(task);
2553
2554         return (KERN_SUCCESS);
2555 }
2556
2557 kern_return_t
2558 task_wait(
2559                 task_t          task,
2560                 boolean_t       until_not_runnable)
2561 {
2562         if (task == TASK_NULL)
2563                 return (KERN_INVALID_ARGUMENT);
2564
2565         task_lock(task);
2566
2567         if (!task->active) {
2568                 task_unlock(task);
2569
2570                 return (KERN_FAILURE);
2571         }
2572
2573         task_wait_locked(task, until_not_runnable);
2574         task_unlock(task);
2575
2576         return (KERN_SUCCESS);
2577 }
2578
2579 /*
2580  *      task_wait_locked:
2581  *
2582  *      Wait for all threads in task to stop.
2583  *
2584  * Conditions:
2585  *      Called with task locked, active, and held.
2586  */
2587 void
2588 task_wait_locked(
2589         task_t          task,
2590         boolean_t               until_not_runnable)
2591 {
2592         thread_t        thread, self;
2593
2594         assert(task->active);
2595         assert(task->suspend_count > 0);
2596
2597         self = current_thread();
2598
2599         /*
2600          *      Iterate through all the threads and wait for them to
2601          *      stop.  Do not wait for the current thread if it is within
2602          *      the task.
2603          */
2604         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2605                 if (thread != self)
2606                         thread_wait(thread, until_not_runnable);
2607         }
2608 }
2609
2610 /*
2611  *      task_release_locked:
2612  *
2613  *      Release a kernel hold on a task.
2614  *
2615  *      CONDITIONS: the task is locked and active
2616  */
2617 void
2618 task_release_locked(
2619         task_t          task)
2620 {
2621         thread_t        thread;
2622
2623         assert(task->active);
2624         assert(task->suspend_count > 0);
2625
2626         if (--task->suspend_count > 0)
2627                 return;
2628
2629         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2630                 thread_mtx_lock(thread);
2631                 thread_release(thread);
2632                 thread_mtx_unlock(thread);
2633         }
2634 }
2635
2636 /*
2637  *      task_release:
2638  *
2639  *      Same as the internal routine above, except that it must lock
2640  *      and verify that the task is active.
2641  *
2642  *      CONDITIONS: The caller holds a reference to the task
2643  */
2644 kern_return_t
2645 task_release(
2646         task_t          task)
2647 {
2648         if (task == TASK_NULL)
2649                 return (KERN_INVALID_ARGUMENT);
2650
2651         task_lock(task);
2652
2653         if (!task->active) {
2654                 task_unlock(task);
2655
2656                 return (KERN_FAILURE);
2657         }
2658
2659         task_release_locked(task);
2660         task_unlock(task);
2661
2662         return (KERN_SUCCESS);
2663 }
2664
2665 kern_return_t
2666 task_threads(
2667         task_t                                  task,
2668         thread_act_array_t              *threads_out,
2669         mach_msg_type_number_t  *count)
2670 {
2671         mach_msg_type_number_t  actual;
2672         thread_t                                *thread_list;
2673         thread_t                                thread;
2674         vm_size_t                               size, size_needed;
2675         void                                    *addr;
2676         unsigned int                    i, j;
2677
2678         if (task == TASK_NULL)
2679                 return (KERN_INVALID_ARGUMENT);
2680
2681         size = 0; addr = NULL;
2682
2683         for (;;) {
2684                 task_lock(task);
2685                 if (!task->active) {
2686                         task_unlock(task);
2687
2688                         if (size != 0)
2689                                 kfree(addr, size);
2690
2691                         return (KERN_FAILURE);
2692                 }
2693
2694                 actual = task->thread_count;
2695
2696                 /* do we have the memory we need? */
2697                 size_needed = actual * sizeof (mach_port_t);
2698                 if (size_needed <= size)
2699                         break;
2700
2701                 /* unlock the task and allocate more memory */
2702                 task_unlock(task);
2703
2704                 if (size != 0)
2705                         kfree(addr, size);
2706
2707                 assert(size_needed > 0);
2708                 size = size_needed;
2709
2710                 addr = kalloc(size);
2711                 if (addr == 0)
2712                         return (KERN_RESOURCE_SHORTAGE);
2713         }
2714
2715         /* OK, have memory and the task is locked & active */
2716         thread_list = (thread_t *)addr;
2717
2718         i = j = 0;
2719
2720         for (thread = (thread_t)queue_first(&task->threads); i < actual;
2721                                 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
2722                 thread_reference_internal(thread);
2723                 thread_list[j++] = thread;
2724         }
2725
2726         assert(queue_end(&task->threads, (queue_entry_t)thread));
2727
2728         actual = j;
2729         size_needed = actual * sizeof (mach_port_t);
2730
2731         /* can unlock task now that we've got the thread refs */
2732         task_unlock(task);
2733
2734         if (actual == 0) {
2735                 /* no threads, so return null pointer and deallocate memory */
2736
2737                 *threads_out = NULL;
2738                 *count = 0;
2739
2740                 if (size != 0)
2741                         kfree(addr, size);
2742         }
2743         else {
2744                 /* if we allocated too much, must copy */
2745
2746                 if (size_needed < size) {
2747                         void *newaddr;
2748
2749                         newaddr = kalloc(size_needed);
2750                         if (newaddr == 0) {
2751                                 for (i = 0; i < actual; ++i)
2752                                         thread_deallocate(thread_list[i]);
2753                                 kfree(addr, size);
2754                                 return (KERN_RESOURCE_SHORTAGE);
2755                         }
2756
2757                         bcopy(addr, newaddr, size_needed);
2758                         kfree(addr, size);
2759                         thread_list = (thread_t *)newaddr;
2760                 }
2761
2762                 *threads_out = thread_list;
2763                 *count = actual;
2764
2765                 /* do the conversion that Mig should handle */
2766
2767                 for (i = 0; i < actual; ++i)
2768                         ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
2769         }
2770
2771         return (KERN_SUCCESS);
2772 }
2773
2774 #define TASK_HOLD_NORMAL        0
2775 #define TASK_HOLD_PIDSUSPEND    1
2776 #define TASK_HOLD_LEGACY        2
2777 #define TASK_HOLD_LEGACY_ALL    3
2778
2779 static kern_return_t
2780 place_task_hold    (
2781         task_t task,
2782         int mode)
2783 {
2784         if (!task->active && !task_is_a_corpse(task)) {
2785                 return (KERN_FAILURE);
2786         }
2787
2788         /* Return success for corpse task */
2789         if (task_is_a_corpse(task)) {
2790                 return KERN_SUCCESS;
2791         }
2792
2793         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2794             MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
2795             task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2796             task->user_stop_count, task->user_stop_count + 1, 0);
2797
2798 #if MACH_ASSERT
2799         current_task()->suspends_outstanding++;
2800 #endif
2801
2802         if (mode == TASK_HOLD_LEGACY)
2803                 task->legacy_stop_count++;
2804
2805         if (task->user_stop_count++ > 0) {
2806                 /*
2807                  *      If the stop count was positive, the task is
2808                  *      already stopped and we can exit.
2809                  */
2810                 return (KERN_SUCCESS);
2811         }
2812
2813         /*
2814          * Put a kernel-level hold on the threads in the task (all
2815          * user-level task suspensions added together represent a
2816          * single kernel-level hold).  We then wait for the threads
2817          * to stop executing user code.
2818          */
2819         task_hold_locked(task);
2820         task_wait_locked(task, FALSE);
2821
2822         return (KERN_SUCCESS);
2823 }
2824
2825 static kern_return_t
2826 release_task_hold    (
2827         task_t          task,
2828         int                     mode)
2829 {
2830         boolean_t release = FALSE;
2831
2832         if (!task->active && !task_is_a_corpse(task)) {
2833                 return (KERN_FAILURE);
2834         }
2835
2836         /* Return success for corpse task */
2837         if (task_is_a_corpse(task)) {
2838                 return KERN_SUCCESS;
2839         }
2840
2841         if (mode == TASK_HOLD_PIDSUSPEND) {
2842             if (task->pidsuspended == FALSE) {
2843                     return (KERN_FAILURE);
2844             }
2845             task->pidsuspended = FALSE;
2846         }
2847
2848         if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
2849
2850                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2851                     MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
2852                     task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2853                     task->user_stop_count, mode, task->legacy_stop_count);
2854
2855 #if MACH_ASSERT
2856                 /*
2857                  * This is obviously not robust; if we suspend one task and then resume a different one,
2858                  * we'll fly under the radar. This is only meant to catch the common case of a crashed
2859                  * or buggy suspender.
2860                  */
2861                 current_task()->suspends_outstanding--;
2862 #endif
2863
2864                 if (mode == TASK_HOLD_LEGACY_ALL) {
2865                         if (task->legacy_stop_count >= task->user_stop_count) {
2866                                 task->user_stop_count = 0;
2867                                 release = TRUE;
2868                         } else {
2869                                 task->user_stop_count -= task->legacy_stop_count;
2870                         }
2871                         task->legacy_stop_count = 0;
2872                 } else {
2873                         if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
2874                                 task->legacy_stop_count--;
2875                         if (--task->user_stop_count == 0)
2876                                 release = TRUE;
2877                 }
2878         }
2879         else {
2880                 return (KERN_FAILURE);
2881         }
2882
2883         /*
2884          *      Release the task if necessary.
2885          */
2886         if (release)
2887                 task_release_locked(task);
2888
2889     return (KERN_SUCCESS);
2890 }
2891
2892
2893 /*
2894  *      task_suspend:
2895  *
2896  *      Implement an (old-fashioned) user-level suspension on a task.
2897  *
2898  *      Because the user isn't expecting to have to manage a suspension
2899  *      token, we'll track it for him in the kernel in the form of a naked
2900  *      send right to the task's resume port.  All such send rights
2901  *      account for a single suspension against the task (unlike task_suspend2()
2902  *      where each caller gets a unique suspension count represented by a
2903  *      unique send-once right).
2904  *
2905  * Conditions:
2906  *      The caller holds a reference to the task
2907  */
2908 kern_return_t
2909 task_suspend(
2910         task_t          task)
2911 {
2912         kern_return_t                   kr;
2913         mach_port_t                     port, send, old_notify;
2914         mach_port_name_t                name;
2915
2916         if (task == TASK_NULL || task == kernel_task)
2917                 return (KERN_INVALID_ARGUMENT);
2918
2919         task_lock(task);
2920
2921         /*
2922          * Claim a send right on the task resume port, and request a no-senders
2923          * notification on that port (if none outstanding).
2924          */
2925         if (task->itk_resume == IP_NULL) {
2926                 task->itk_resume = ipc_port_alloc_kernel();
2927                 if (!IP_VALID(task->itk_resume))
2928                         panic("failed to create resume port");
2929                 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
2930         }
2931
2932         port = task->itk_resume;
2933         ip_lock(port);
2934         assert(ip_active(port));
2935
2936         send = ipc_port_make_send_locked(port);
2937         assert(IP_VALID(send));
2938
2939         if (port->ip_nsrequest == IP_NULL) {
2940                 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2941                 assert(old_notify == IP_NULL);
2942                 /* port unlocked */
2943         } else {
2944                 ip_unlock(port);
2945         }
2946
2947         /*
2948          * place a legacy hold on the task.
2949          */
2950         kr = place_task_hold(task, TASK_HOLD_LEGACY);
2951         if (kr != KERN_SUCCESS) {
2952                 task_unlock(task);
2953                 ipc_port_release_send(send);
2954                 return kr;
2955         }
2956
2957         task_unlock(task);
2958
2959         /*
2960          * Copyout the send right into the calling task's IPC space.  It won't know it is there,
2961          * but we'll look it up when calling a traditional resume.  Any IPC operations that
2962          * deallocate the send right will auto-release the suspension.
2963          */
2964         if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
2965                 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
2966                 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
2967                                 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2968                                 task_pid(task), kr);
2969                 return (kr);
2970         }
2971
2972         return (kr);
2973 }
2974
2975 /*
2976  *      task_resume:
2977  *              Release a user hold on a task.
2978  *
2979  * Conditions:
2980  *              The caller holds a reference to the task
2981  */
2982 kern_return_t
2983 task_resume(
2984         task_t  task)
2985 {
2986         kern_return_t    kr;
2987         mach_port_name_t resume_port_name;
2988         ipc_entry_t              resume_port_entry;
2989         ipc_space_t              space = current_task()->itk_space;
2990
2991         if (task == TASK_NULL || task == kernel_task )
2992                 return (KERN_INVALID_ARGUMENT);
2993
2994         /* release a legacy task hold */
2995         task_lock(task);
2996         kr = release_task_hold(task, TASK_HOLD_LEGACY);
2997         task_unlock(task);
2998
2999         is_write_lock(space);
3000         if (is_active(space) && IP_VALID(task->itk_resume) &&
3001             ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
3002                 /*
3003                  * We found a suspension token in the caller's IPC space. Release a send right to indicate that
3004                  * we are holding one less legacy hold on the task from this caller.  If the release failed,
3005                  * go ahead and drop all the rights, as someone either already released our holds or the task
3006                  * is gone.
3007                  */
3008                 if (kr == KERN_SUCCESS)
3009                         ipc_right_dealloc(space, resume_port_name, resume_port_entry);
3010                 else
3011                         ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
3012                 /* space unlocked */
3013         } else {
3014                 is_write_unlock(space);
3015                 if (kr == KERN_SUCCESS)
3016                         printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
3017                                proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3018                                task_pid(task));
3019         }
3020
3021         return kr;
3022 }
3023
3024 /*
3025  * Suspend the target task.
3026  * Making/holding a token/reference/port is the callers responsibility.
3027  */
3028 kern_return_t
3029 task_suspend_internal(task_t task)
3030 {
3031         kern_return_t    kr;
3032
3033         if (task == TASK_NULL || task == kernel_task)
3034                 return (KERN_INVALID_ARGUMENT);
3035
3036         task_lock(task);
3037         kr = place_task_hold(task, TASK_HOLD_NORMAL);
3038         task_unlock(task);
3039         return (kr);
3040 }
3041
3042 /*
3043  * Suspend the target task, and return a suspension token. The token
3044  * represents a reference on the suspended task.
3045  */
3046 kern_return_t
3047 task_suspend2(
3048         task_t                  task,
3049         task_suspension_token_t *suspend_token)
3050 {
3051         kern_return_t    kr;
3052
3053         kr = task_suspend_internal(task);
3054         if (kr != KERN_SUCCESS) {
3055                 *suspend_token = TASK_NULL;
3056                 return (kr);
3057         }
3058
3059         /*
3060          * Take a reference on the target task and return that to the caller
3061          * as a "suspension token," which can be converted into an SO right to
3062          * the now-suspended task's resume port.
3063          */
3064         task_reference_internal(task);
3065         *suspend_token = task;
3066
3067         return (KERN_SUCCESS);
3068 }
3069
3070 /*
3071  * Resume the task
3072  * (reference/token/port management is caller's responsibility).
3073  */
3074 kern_return_t
3075 task_resume_internal(
3076         task_suspension_token_t         task)
3077 {
3078         kern_return_t kr;
3079
3080         if (task == TASK_NULL || task == kernel_task)
3081                 return (KERN_INVALID_ARGUMENT);
3082
3083         task_lock(task);
3084         kr = release_task_hold(task, TASK_HOLD_NORMAL);
3085         task_unlock(task);
3086         return (kr);
3087 }
3088
3089 /*
3090  * Resume the task using a suspension token. Consumes the token's ref.
3091  */
3092 kern_return_t
3093 task_resume2(
3094         task_suspension_token_t         task)
3095 {
3096         kern_return_t kr;
3097
3098         kr = task_resume_internal(task);
3099         task_suspension_token_deallocate(task);
3100
3101         return (kr);
3102 }
3103
3104 boolean_t
3105 task_suspension_notify(mach_msg_header_t *request_header)
3106 {
3107         ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
3108         task_t task = convert_port_to_task_suspension_token(port);
3109         mach_msg_type_number_t not_count;
3110
3111         if (task == TASK_NULL || task == kernel_task)
3112                 return TRUE;  /* nothing to do */
3113
3114         switch (request_header->msgh_id) {
3115
3116         case MACH_NOTIFY_SEND_ONCE:
3117                 /* release the hold held by this specific send-once right */
3118                 task_lock(task);
3119                 release_task_hold(task, TASK_HOLD_NORMAL);
3120                 task_unlock(task);
3121                 break;
3122
3123         case MACH_NOTIFY_NO_SENDERS:
3124                 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
3125
3126                 task_lock(task);
3127                 ip_lock(port);
3128                 if (port->ip_mscount == not_count) {
3129
3130                         /* release all the [remaining] outstanding legacy holds */
3131                         assert(port->ip_nsrequest == IP_NULL);
3132                         ip_unlock(port);
3133                         release_task_hold(task, TASK_HOLD_LEGACY_ALL);
3134                         task_unlock(task);
3135
3136                 } else if (port->ip_nsrequest == IP_NULL) {
3137                         ipc_port_t old_notify;
3138
3139                         task_unlock(task);
3140                         /* new send rights, re-arm notification at current make-send count */
3141                         ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3142                         assert(old_notify == IP_NULL);
3143                         /* port unlocked */
3144                 } else {
3145                         ip_unlock(port);
3146                         task_unlock(task);
3147                 }
3148                 break;
3149
3150         default:
3151                 break;
3152         }
3153
3154         task_suspension_token_deallocate(task); /* drop token reference */
3155         return TRUE;
3156 }
3157
3158 kern_return_t
3159 task_pidsuspend_locked(task_t task)
3160 {
3161         kern_return_t kr;
3162
3163         if (task->pidsuspended) {
3164                 kr = KERN_FAILURE;
3165                 goto out;
3166         }
3167
3168         task->pidsuspended = TRUE;
3169
3170         kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
3171         if (kr != KERN_SUCCESS) {
3172                 task->pidsuspended = FALSE;
3173         }
3174 out:
3175         return(kr);
3176 }
3177
3178
3179 /*
3180  *      task_pidsuspend:
3181  *
3182  *      Suspends a task by placing a hold on its threads.
3183  *
3184  * Conditions:
3185  *      The caller holds a reference to the task
3186  */
3187 kern_return_t
3188 task_pidsuspend(
3189         task_t          task)
3190 {
3191         kern_return_t    kr;
3192
3193         if (task == TASK_NULL || task == kernel_task)
3194                 return (KERN_INVALID_ARGUMENT);
3195
3196         task_lock(task);
3197
3198         kr = task_pidsuspend_locked(task);
3199
3200         task_unlock(task);
3201
3202         return (kr);
3203 }
3204
3205 /*
3206  *      task_pidresume:
3207  *              Resumes a previously suspended task.
3208  *
3209  * Conditions:
3210  *              The caller holds a reference to the task
3211  */
3212 kern_return_t
3213 task_pidresume(
3214         task_t  task)
3215 {
3216         kern_return_t    kr;
3217
3218         if (task == TASK_NULL || task == kernel_task)
3219                 return (KERN_INVALID_ARGUMENT);
3220
3221         task_lock(task);
3222
3223 #if CONFIG_FREEZE
3224
3225         while (task->changing_freeze_state) {
3226
3227                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3228                 task_unlock(task);
3229                 thread_block(THREAD_CONTINUE_NULL);
3230
3231                 task_lock(task);
3232         }
3233         task->changing_freeze_state = TRUE;
3234 #endif
3235
3236         kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
3237
3238         task_unlock(task);
3239
3240 #if CONFIG_FREEZE
3241
3242         task_lock(task);
3243
3244         if (kr == KERN_SUCCESS)
3245                 task->frozen = FALSE;
3246         task->changing_freeze_state = FALSE;
3247         thread_wakeup(&task->changing_freeze_state);
3248
3249         task_unlock(task);
3250 #endif
3251
3252         return (kr);
3253 }
3254
3255
3256 #if DEVELOPMENT || DEBUG
3257
3258 extern void IOSleep(int);
3259
3260 kern_return_t
3261 task_disconnect_page_mappings(task_t task)
3262 {
3263         int     n;
3264
3265         if (task == TASK_NULL || task == kernel_task)
3266                 return (KERN_INVALID_ARGUMENT);
3267
3268         /*
3269          * this function is used to strip all of the mappings from
3270          * the pmap for the specified task to force the task to
3271          * re-fault all of the pages it is actively using... this
3272          * allows us to approximate the true working set of the
3273          * specified task.  We only engage if at least 1 of the
3274          * threads in the task is runnable, but we want to continuously
3275          * sweep (at least for a while - I've arbitrarily set the limit at
3276          * 100 sweeps to be re-looked at as we gain experience) to get a better
3277          * view into what areas within a page are being visited (as opposed to only
3278          * seeing the first fault of a page after the task becomes
3279          * runnable)...  in the future I may
3280          * try to block until awakened by a thread in this task
3281          * being made runnable, but for now we'll periodically poll from the
3282          * user level debug tool driving the sysctl
3283          */
3284         for (n = 0; n < 100; n++) {
3285                 thread_t        thread;
3286                 boolean_t       runnable;
3287                 boolean_t       do_unnest;
3288                 int             page_count;
3289
3290                 runnable = FALSE;
3291                 do_unnest = FALSE;
3292
3293                 task_lock(task);
3294
3295                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3296
3297                         if (thread->state & TH_RUN) {
3298                                 runnable = TRUE;
3299                                 break;
3300                         }
3301                 }
3302                 if (n == 0)
3303                         task->task_disconnected_count++;
3304
3305                 if (task->task_unnested == FALSE) {
3306                         if (runnable == TRUE) {
3307                                 task->task_unnested = TRUE;
3308                                 do_unnest = TRUE;
3309                         }
3310                 }
3311                 task_unlock(task);
3312
3313                 if (runnable == FALSE)
3314                         break;
3315
3316                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
3317                                           task, do_unnest, task->task_disconnected_count, 0, 0);
3318
3319                 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
3320
3321                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
3322                                           task, page_count, 0, 0, 0);
3323
3324                 if ((n % 5) == 4)
3325                         IOSleep(1);
3326         }
3327         return (KERN_SUCCESS);
3328 }
3329
3330 #endif
3331
3332
3333 #if CONFIG_FREEZE
3334
3335 /*
3336  *      task_freeze:
3337  *
3338  *      Freeze a task.
3339  *
3340  * Conditions:
3341  *      The caller holds a reference to the task
3342  */
3343 extern void             vm_wake_compactor_swapper(void);
3344 extern queue_head_t     c_swapout_list_head;
3345
3346 kern_return_t
3347 task_freeze(
3348         task_t    task,
3349         uint32_t           *purgeable_count,
3350         uint32_t           *wired_count,
3351         uint32_t           *clean_count,
3352         uint32_t           *dirty_count,
3353         uint32_t           dirty_budget,
3354         boolean_t          *shared,
3355         boolean_t          walk_only)
3356 {
3357         kern_return_t kr = KERN_SUCCESS;
3358
3359         if (task == TASK_NULL || task == kernel_task)
3360                 return (KERN_INVALID_ARGUMENT);
3361
3362         task_lock(task);
3363
3364         while (task->changing_freeze_state) {
3365
3366                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3367                 task_unlock(task);
3368                 thread_block(THREAD_CONTINUE_NULL);
3369
3370                 task_lock(task);
3371         }
3372         if (task->frozen) {
3373                 task_unlock(task);
3374                 return (KERN_FAILURE);
3375         }
3376         task->changing_freeze_state = TRUE;
3377
3378         task_unlock(task);
3379
3380         if (walk_only) {
3381                 panic("task_freeze - walk_only == TRUE");
3382         } else {
3383                 kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
3384         }
3385
3386         task_lock(task);
3387
3388         if (walk_only == FALSE && kr == KERN_SUCCESS)
3389                 task->frozen = TRUE;
3390         task->changing_freeze_state = FALSE;
3391         thread_wakeup(&task->changing_freeze_state);
3392
3393         task_unlock(task);
3394
3395         if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3396                 vm_wake_compactor_swapper();
3397                 /*
3398                  * We do an explicit wakeup of the swapout thread here
3399                  * because the compact_and_swap routines don't have
3400                  * knowledge about these kind of "per-task packed c_segs"
3401                  * and so will not be evaluating whether we need to do
3402                  * a wakeup there.
3403                  */
3404                 thread_wakeup((event_t)&c_swapout_list_head);
3405         }
3406
3407         return (kr);
3408 }
3409
3410 /*
3411  *      task_thaw:
3412  *
3413  *      Thaw a currently frozen task.
3414  *
3415  * Conditions:
3416  *      The caller holds a reference to the task
3417  */
3418 kern_return_t
3419 task_thaw(
3420         task_t          task)
3421 {
3422         if (task == TASK_NULL || task == kernel_task)
3423                 return (KERN_INVALID_ARGUMENT);
3424
3425         task_lock(task);
3426
3427         while (task->changing_freeze_state) {
3428
3429                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3430                 task_unlock(task);
3431                 thread_block(THREAD_CONTINUE_NULL);
3432
3433                 task_lock(task);
3434         }
3435         if (!task->frozen) {
3436                 task_unlock(task);
3437                 return (KERN_FAILURE);
3438         }
3439         task->frozen = FALSE;
3440
3441         task_unlock(task);
3442
3443         return (KERN_SUCCESS);
3444 }
3445
3446 #endif /* CONFIG_FREEZE */
3447
3448 kern_return_t
3449 host_security_set_task_token(
3450         host_security_t  host_security,
3451         task_t           task,
3452         security_token_t sec_token,
3453         audit_token_t    audit_token,
3454         host_priv_t      host_priv)
3455 {
3456         ipc_port_t       host_port;
3457         kern_return_t    kr;
3458
3459         if (task == TASK_NULL)
3460                 return(KERN_INVALID_ARGUMENT);
3461
3462         if (host_security == HOST_NULL)
3463                 return(KERN_INVALID_SECURITY);
3464
3465         task_lock(task);
3466         task->sec_token = sec_token;
3467         task->audit_token = audit_token;
3468
3469         task_unlock(task);
3470
3471         if (host_priv != HOST_PRIV_NULL) {
3472                 kr = host_get_host_priv_port(host_priv, &host_port);
3473         } else {
3474                 kr = host_get_host_port(host_priv_self(), &host_port);
3475         }
3476         assert(kr == KERN_SUCCESS);
3477         kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
3478         return(kr);
3479 }
3480
3481 kern_return_t
3482 task_send_trace_memory(
3483         task_t        target_task,
3484         __unused uint32_t pid,
3485         __unused uint64_t uniqueid)
3486 {
3487         kern_return_t kr = KERN_INVALID_ARGUMENT;
3488         if (target_task == TASK_NULL)
3489                 return (KERN_INVALID_ARGUMENT);
3490
3491 #if CONFIG_ATM
3492         kr = atm_send_proc_inspect_notification(target_task,
3493                                   pid,
3494                                   uniqueid);
3495
3496 #endif
3497         return (kr);
3498 }
3499 /*
3500  * This routine was added, pretty much exclusively, for registering the
3501  * RPC glue vector for in-kernel short circuited tasks.  Rather than
3502  * removing it completely, I have only disabled that feature (which was
3503  * the only feature at the time).  It just appears that we are going to
3504  * want to add some user data to tasks in the future (i.e. bsd info,
3505  * task names, etc...), so I left it in the formal task interface.
3506  */
3507 kern_return_t
3508 task_set_info(
3509         task_t          task,
3510         task_flavor_t   flavor,
3511         __unused task_info_t    task_info_in,           /* pointer to IN array */
3512         __unused mach_msg_type_number_t task_info_count)
3513 {
3514         if (task == TASK_NULL)
3515                 return(KERN_INVALID_ARGUMENT);
3516
3517         switch (flavor) {
3518
3519 #if CONFIG_ATM
3520                 case TASK_TRACE_MEMORY_INFO:
3521                 {
3522                         if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
3523                                 return (KERN_INVALID_ARGUMENT);
3524
3525                         assert(task_info_in != NULL);
3526                         task_trace_memory_info_t mem_info;
3527                         mem_info = (task_trace_memory_info_t) task_info_in;
3528                         kern_return_t kr = atm_register_trace_memory(task,
3529                                                 mem_info->user_memory_address,
3530                                                 mem_info->buffer_size);
3531                         return kr;
3532                 }
3533
3534 #endif
3535             default:
3536                 return (KERN_INVALID_ARGUMENT);
3537         }
3538         return (KERN_SUCCESS);
3539 }
3540
3541 int radar_20146450 = 1;
3542 kern_return_t
3543 task_info(
3544         task_t                  task,
3545         task_flavor_t           flavor,
3546         task_info_t             task_info_out,
3547         mach_msg_type_number_t  *task_info_count)
3548 {
3549         kern_return_t error = KERN_SUCCESS;
3550         mach_msg_type_number_t  original_task_info_count;
3551
3552         if (task == TASK_NULL)
3553                 return (KERN_INVALID_ARGUMENT);
3554
3555         original_task_info_count = *task_info_count;
3556         task_lock(task);
3557
3558         if ((task != current_task()) && (!task->active)) {
3559                 task_unlock(task);
3560                 return (KERN_INVALID_ARGUMENT);
3561         }
3562
3563         switch (flavor) {
3564
3565         case TASK_BASIC_INFO_32:
3566         case TASK_BASIC2_INFO_32:
3567 #if defined(__arm__) || defined(__arm64__)
3568         case TASK_BASIC_INFO_64:
3569 #endif
3570         {
3571                 task_basic_info_32_t    basic_info;
3572                 vm_map_t                                map;
3573                 clock_sec_t                             secs;
3574                 clock_usec_t                    usecs;
3575
3576                 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
3577                     error = KERN_INVALID_ARGUMENT;
3578                     break;
3579                 }
3580
3581                 basic_info = (task_basic_info_32_t)task_info_out;
3582
3583                 map = (task == kernel_task)? kernel_map: task->map;
3584                 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
3585                 if (flavor == TASK_BASIC2_INFO_32) {
3586                         /*
3587                          * The "BASIC2" flavor gets the maximum resident
3588                          * size instead of the current resident size...
3589                          */
3590                         basic_info->resident_size = pmap_resident_max(map->pmap);
3591                 } else {
3592                         basic_info->resident_size = pmap_resident_count(map->pmap);
3593                 }
3594                 basic_info->resident_size *= PAGE_SIZE;
3595
3596                 basic_info->policy = ((task != kernel_task)?
3597                                                                                   POLICY_TIMESHARE: POLICY_RR);
3598                 basic_info->suspend_count = task->user_stop_count;
3599
3600                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3601                 basic_info->user_time.seconds =
3602                         (typeof(basic_info->user_time.seconds))secs;
3603                 basic_info->user_time.microseconds = usecs;
3604
3605                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3606                 basic_info->system_time.seconds =
3607                         (typeof(basic_info->system_time.seconds))secs;
3608                 basic_info->system_time.microseconds = usecs;
3609
3610                 *task_info_count = TASK_BASIC_INFO_32_COUNT;
3611                 break;
3612         }
3613
3614 #if defined(__arm__) || defined(__arm64__)
3615         case TASK_BASIC_INFO_64_2:
3616         {
3617                 task_basic_info_64_2_t  basic_info;
3618                 vm_map_t                                map;
3619                 clock_sec_t                             secs;
3620                 clock_usec_t                    usecs;
3621
3622                 if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
3623                     error = KERN_INVALID_ARGUMENT;
3624                     break;
3625                 }
3626
3627                 basic_info = (task_basic_info_64_2_t)task_info_out;
3628
3629                 map = (task == kernel_task)? kernel_map: task->map;
3630                 basic_info->virtual_size  = map->size;
3631                 basic_info->resident_size =
3632                         (mach_vm_size_t)(pmap_resident_count(map->pmap))
3633                         * PAGE_SIZE_64;
3634
3635                 basic_info->policy = ((task != kernel_task)?
3636                                                                                   POLICY_TIMESHARE: POLICY_RR);
3637                 basic_info->suspend_count = task->user_stop_count;
3638
3639                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3640                 basic_info->user_time.seconds =
3641                         (typeof(basic_info->user_time.seconds))secs;
3642                 basic_info->user_time.microseconds = usecs;
3643
3644                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3645                 basic_info->system_time.seconds =
3646                         (typeof(basic_info->system_time.seconds))secs;
3647                 basic_info->system_time.microseconds = usecs;
3648
3649                 *task_info_count = TASK_BASIC_INFO_64_2_COUNT;
3650                 break;
3651         }
3652
3653 #else /* defined(__arm__) || defined(__arm64__) */
3654         case TASK_BASIC_INFO_64:
3655         {
3656                 task_basic_info_64_t    basic_info;
3657                 vm_map_t                                map;
3658                 clock_sec_t                             secs;
3659                 clock_usec_t                    usecs;
3660
3661                 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
3662                     error = KERN_INVALID_ARGUMENT;
3663                     break;
3664                 }
3665
3666                 basic_info = (task_basic_info_64_t)task_info_out;
3667
3668                 map = (task == kernel_task)? kernel_map: task->map;
3669                 basic_info->virtual_size  = map->size;
3670                 basic_info->resident_size =
3671                         (mach_vm_size_t)(pmap_resident_count(map->pmap))
3672                         * PAGE_SIZE_64;
3673
3674                 basic_info->policy = ((task != kernel_task)?
3675                                                                                   POLICY_TIMESHARE: POLICY_RR);
3676                 basic_info->suspend_count = task->user_stop_count;
3677
3678                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3679                 basic_info->user_time.seconds =
3680                         (typeof(basic_info->user_time.seconds))secs;
3681                 basic_info->user_time.microseconds = usecs;
3682
3683                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3684                 basic_info->system_time.seconds =
3685                         (typeof(basic_info->system_time.seconds))secs;
3686                 basic_info->system_time.microseconds = usecs;
3687
3688                 *task_info_count = TASK_BASIC_INFO_64_COUNT;
3689                 break;
3690         }
3691 #endif /* defined(__arm__) || defined(__arm64__) */
3692
3693         case MACH_TASK_BASIC_INFO:
3694         {
3695                 mach_task_basic_info_t  basic_info;
3696                 vm_map_t                map;
3697                 clock_sec_t             secs;
3698                 clock_usec_t            usecs;
3699
3700                 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
3701                     error = KERN_INVALID_ARGUMENT;
3702                     break;
3703                 }
3704
3705                 basic_info = (mach_task_basic_info_t)task_info_out;
3706
3707                 map = (task == kernel_task) ? kernel_map : task->map;
3708
3709                 basic_info->virtual_size  = map->size;
3710
3711                 basic_info->resident_size =
3712                     (mach_vm_size_t)(pmap_resident_count(map->pmap));
3713                 basic_info->resident_size *= PAGE_SIZE_64;
3714
3715                 basic_info->resident_size_max =
3716                     (mach_vm_size_t)(pmap_resident_max(map->pmap));
3717                 basic_info->resident_size_max *= PAGE_SIZE_64;
3718
3719                 basic_info->policy = ((task != kernel_task) ?
3720                                       POLICY_TIMESHARE : POLICY_RR);
3721
3722                 basic_info->suspend_count = task->user_stop_count;
3723
3724                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3725                 basic_info->user_time.seconds =
3726                     (typeof(basic_info->user_time.seconds))secs;
3727                 basic_info->user_time.microseconds = usecs;
3728
3729                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3730                 basic_info->system_time.seconds =
3731                     (typeof(basic_info->system_time.seconds))secs;
3732                 basic_info->system_time.microseconds = usecs;
3733
3734                 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
3735                 break;
3736         }
3737
3738         case TASK_THREAD_TIMES_INFO:
3739         {
3740                 task_thread_times_info_t        times_info;
3741                 thread_t                                        thread;
3742
3743                 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
3744                     error = KERN_INVALID_ARGUMENT;
3745                     break;
3746                 }
3747
3748                 times_info = (task_thread_times_info_t) task_info_out;
3749                 times_info->user_time.seconds = 0;
3750                 times_info->user_time.microseconds = 0;
3751                 times_info->system_time.seconds = 0;
3752                 times_info->system_time.microseconds = 0;
3753
3754
3755                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3756                         time_value_t    user_time, system_time;
3757
3758                         if (thread->options & TH_OPT_IDLE_THREAD)
3759                                 continue;
3760
3761                         thread_read_times(thread, &user_time, &system_time);
3762
3763                         time_value_add(&times_info->user_time, &user_time);
3764                         time_value_add(&times_info->system_time, &system_time);
3765                 }
3766
3767                 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
3768                 break;
3769         }
3770
3771         case TASK_ABSOLUTETIME_INFO:
3772         {
3773                 task_absolutetime_info_t        info;
3774                 thread_t                        thread;
3775
3776                 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
3777                         error = KERN_INVALID_ARGUMENT;
3778                         break;
3779                 }
3780
3781                 info = (task_absolutetime_info_t)task_info_out;
3782                 info->threads_user = info->threads_system = 0;
3783
3784
3785                 info->total_user = task->total_user_time;
3786                 info->total_system = task->total_system_time;
3787
3788                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3789                         uint64_t        tval;
3790                         spl_t           x;
3791
3792                         if (thread->options & TH_OPT_IDLE_THREAD)
3793                                 continue;
3794
3795                         x = splsched();
3796                         thread_lock(thread);
3797
3798                         tval = timer_grab(&thread->user_timer);
3799                         info->threads_user += tval;
3800                         info->total_user += tval;
3801
3802                         tval = timer_grab(&thread->system_timer);
3803                         if (thread->precise_user_kernel_time) {
3804                                 info->threads_system += tval;
3805                                 info->total_system += tval;
3806                         } else {
3807                                 /* system_timer may represent either sys or user */
3808                                 info->threads_user += tval;
3809                                 info->total_user += tval;
3810                         }
3811
3812                         thread_unlock(thread);
3813                         splx(x);
3814                 }
3815
3816
3817                 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
3818                 break;
3819         }
3820
3821         case TASK_DYLD_INFO:
3822         {
3823                 task_dyld_info_t info;
3824
3825                 /*
3826                  * We added the format field to TASK_DYLD_INFO output.  For
3827                  * temporary backward compatibility, accept the fact that
3828                  * clients may ask for the old version - distinquished by the
3829                  * size of the expected result structure.
3830                  */
3831 #define TASK_LEGACY_DYLD_INFO_COUNT \
3832                 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
3833
3834                 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
3835                         error = KERN_INVALID_ARGUMENT;
3836                         break;
3837                 }
3838
3839                 info = (task_dyld_info_t)task_info_out;
3840                 info->all_image_info_addr = task->all_image_info_addr;
3841                 info->all_image_info_size = task->all_image_info_size;
3842
3843                 /* only set format on output for those expecting it */
3844                 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
3845                         info->all_image_info_format = task_has_64BitAddr(task) ?
3846                                                  TASK_DYLD_ALL_IMAGE_INFO_64 :
3847                                                  TASK_DYLD_ALL_IMAGE_INFO_32 ;
3848                         *task_info_count = TASK_DYLD_INFO_COUNT;
3849                 } else {
3850                         *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
3851                 }
3852                 break;
3853         }
3854
3855         case TASK_EXTMOD_INFO:
3856         {
3857                 task_extmod_info_t info;
3858                 void *p;
3859
3860                 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
3861                         error = KERN_INVALID_ARGUMENT;
3862                         break;
3863                 }
3864
3865                 info = (task_extmod_info_t)task_info_out;
3866
3867                 p = get_bsdtask_info(task);
3868                 if (p) {
3869                         proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
3870                 } else {
3871                         bzero(info->task_uuid, sizeof(info->task_uuid));
3872                 }
3873                 info->extmod_statistics = task->extmod_statistics;
3874                 *task_info_count = TASK_EXTMOD_INFO_COUNT;
3875
3876                 break;
3877         }
3878
3879         case TASK_KERNELMEMORY_INFO:
3880         {
3881                 task_kernelmemory_info_t        tkm_info;
3882                 ledger_amount_t                 credit, debit;
3883
3884                 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
3885                    error = KERN_INVALID_ARGUMENT;
3886                    break;
3887                 }
3888
3889                 tkm_info = (task_kernelmemory_info_t) task_info_out;
3890                 tkm_info->total_palloc = 0;
3891                 tkm_info->total_pfree = 0;
3892                 tkm_info->total_salloc = 0;
3893                 tkm_info->total_sfree = 0;
3894
3895                 if (task == kernel_task) {
3896                         /*
3897                          * All shared allocs/frees from other tasks count against
3898                          * the kernel private memory usage.  If we are looking up
3899                          * info for the kernel task, gather from everywhere.
3900                          */
3901                         task_unlock(task);
3902
3903                         /* start by accounting for all the terminated tasks against the kernel */
3904                         tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
3905                         tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
3906
3907                         /* count all other task/thread shared alloc/free against the kernel */
3908                         lck_mtx_lock(&tasks_threads_lock);
3909
3910                         /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
3911                         queue_iterate(&tasks, task, task_t, tasks) {
3912                                 if (task == kernel_task) {
3913                                         if (ledger_get_entries(task->ledger,
3914                                             task_ledgers.tkm_private, &credit,
3915                                             &debit) == KERN_SUCCESS) {
3916                                                 tkm_info->total_palloc += credit;
3917                                                 tkm_info->total_pfree += debit;
3918                                         }
3919                                 }
3920                                 if (!ledger_get_entries(task->ledger,
3921                                     task_ledgers.tkm_shared, &credit, &debit)) {
3922                                         tkm_info->total_palloc += credit;
3923                                         tkm_info->total_pfree += debit;
3924                                 }
3925                         }
3926                         lck_mtx_unlock(&tasks_threads_lock);
3927                 } else {
3928                         if (!ledger_get_entries(task->ledger,
3929                             task_ledgers.tkm_private, &credit, &debit)) {
3930                                 tkm_info->total_palloc = credit;
3931                                 tkm_info->total_pfree = debit;
3932                         }
3933                         if (!ledger_get_entries(task->ledger,
3934                             task_ledgers.tkm_shared, &credit, &debit)) {
3935                                 tkm_info->total_salloc = credit;
3936                                 tkm_info->total_sfree = debit;
3937                         }
3938                         task_unlock(task);
3939                 }
3940
3941                 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
3942                 return KERN_SUCCESS;
3943         }
3944
3945         /* OBSOLETE */
3946         case TASK_SCHED_FIFO_INFO:
3947         {
3948
3949                 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
3950                         error = KERN_INVALID_ARGUMENT;
3951                         break;
3952                 }
3953
3954                 error = KERN_INVALID_POLICY;
3955                 break;
3956         }
3957
3958         /* OBSOLETE */
3959         case TASK_SCHED_RR_INFO:
3960         {
3961                 policy_rr_base_t        rr_base;
3962                 uint32_t quantum_time;
3963                 uint64_t quantum_ns;
3964
3965                 if (*task_info_count < POLICY_RR_BASE_COUNT) {
3966                         error = KERN_INVALID_ARGUMENT;
3967                         break;
3968                 }
3969
3970                 rr_base = (policy_rr_base_t) task_info_out;
3971
3972                 if (task != kernel_task) {
3973                         error = KERN_INVALID_POLICY;
3974                         break;
3975                 }
3976
3977                 rr_base->base_priority = task->priority;
3978
3979                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
3980                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
3981
3982                 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
3983
3984                 *task_info_count = POLICY_RR_BASE_COUNT;
3985                 break;
3986         }
3987
3988         /* OBSOLETE */
3989         case TASK_SCHED_TIMESHARE_INFO:
3990         {
3991                 policy_timeshare_base_t ts_base;
3992
3993                 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
3994                         error = KERN_INVALID_ARGUMENT;
3995                         break;
3996                 }
3997
3998                 ts_base = (policy_timeshare_base_t) task_info_out;
3999
4000                 if (task == kernel_task) {
4001                         error = KERN_INVALID_POLICY;
4002                         break;
4003                 }
4004
4005                 ts_base->base_priority = task->priority;
4006
4007                 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
4008                 break;
4009         }
4010
4011         case TASK_SECURITY_TOKEN:
4012         {
4013                 security_token_t        *sec_token_p;
4014
4015                 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
4016                     error = KERN_INVALID_ARGUMENT;
4017                     break;
4018                 }
4019
4020                 sec_token_p = (security_token_t *) task_info_out;
4021
4022                 *sec_token_p = task->sec_token;
4023
4024                 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
4025                 break;
4026         }
4027
4028         case TASK_AUDIT_TOKEN:
4029         {
4030                 audit_token_t   *audit_token_p;
4031
4032                 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
4033                     error = KERN_INVALID_ARGUMENT;
4034                     break;
4035                 }
4036
4037                 audit_token_p = (audit_token_t *) task_info_out;
4038
4039                 *audit_token_p = task->audit_token;
4040
4041                 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
4042                 break;
4043         }
4044
4045         case TASK_SCHED_INFO:
4046                 error = KERN_INVALID_ARGUMENT;
4047                 break;
4048
4049         case TASK_EVENTS_INFO:
4050         {
4051                 task_events_info_t      events_info;
4052                 thread_t                        thread;
4053
4054                 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
4055                    error = KERN_INVALID_ARGUMENT;
4056                    break;
4057                 }
4058
4059                 events_info = (task_events_info_t) task_info_out;
4060
4061
4062                 events_info->faults = task->faults;
4063                 events_info->pageins = task->pageins;
4064                 events_info->cow_faults = task->cow_faults;
4065                 events_info->messages_sent = task->messages_sent;
4066                 events_info->messages_received = task->messages_received;
4067                 events_info->syscalls_mach = task->syscalls_mach;
4068                 events_info->syscalls_unix = task->syscalls_unix;
4069
4070                 events_info->csw = task->c_switch;
4071
4072                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4073                         events_info->csw           += thread->c_switch;
4074                         events_info->syscalls_mach += thread->syscalls_mach;
4075                         events_info->syscalls_unix += thread->syscalls_unix;
4076                 }
4077
4078
4079                 *task_info_count = TASK_EVENTS_INFO_COUNT;
4080                 break;
4081         }
4082         case TASK_AFFINITY_TAG_INFO:
4083         {
4084                 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
4085                     error = KERN_INVALID_ARGUMENT;
4086                     break;
4087                 }
4088
4089                 error = task_affinity_info(task, task_info_out, task_info_count);
4090                 break;
4091         }
4092         case TASK_POWER_INFO:
4093         {
4094                 if (*task_info_count < TASK_POWER_INFO_COUNT) {
4095                         error = KERN_INVALID_ARGUMENT;
4096                         break;
4097                 }
4098
4099                 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL);
4100                 break;
4101         }
4102
4103         case TASK_POWER_INFO_V2:
4104         {
4105                 if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
4106                         error = KERN_INVALID_ARGUMENT;
4107                         break;
4108                 }
4109                 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
4110                 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2);
4111                 break;
4112         }
4113
4114         case TASK_VM_INFO:
4115         case TASK_VM_INFO_PURGEABLE:
4116         {
4117                 task_vm_info_t          vm_info;
4118                 vm_map_t                map;
4119
4120                 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
4121                     error = KERN_INVALID_ARGUMENT;
4122                     break;
4123                 }
4124
4125                 vm_info = (task_vm_info_t)task_info_out;
4126
4127                 if (task == kernel_task) {
4128                         map = kernel_map;
4129                         /* no lock */
4130                 } else {
4131                         map = task->map;
4132                         vm_map_lock_read(map);
4133                 }
4134
4135                 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
4136                 vm_info->region_count = map->hdr.nentries;
4137                 vm_info->page_size = vm_map_page_size(map);
4138
4139                 vm_info->resident_size = pmap_resident_count(map->pmap);
4140                 vm_info->resident_size *= PAGE_SIZE;
4141                 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
4142                 vm_info->resident_size_peak *= PAGE_SIZE;
4143
4144 #define _VM_INFO(_name) \
4145         vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
4146
4147                 _VM_INFO(device);
4148                 _VM_INFO(device_peak);
4149                 _VM_INFO(external);
4150                 _VM_INFO(external_peak);
4151                 _VM_INFO(internal);
4152                 _VM_INFO(internal_peak);
4153                 _VM_INFO(reusable);
4154                 _VM_INFO(reusable_peak);
4155                 _VM_INFO(compressed);
4156                 _VM_INFO(compressed_peak);
4157                 _VM_INFO(compressed_lifetime);
4158
4159                 vm_info->purgeable_volatile_pmap = 0;
4160                 vm_info->purgeable_volatile_resident = 0;
4161                 vm_info->purgeable_volatile_virtual = 0;
4162                 if (task == kernel_task) {
4163                         /*
4164                          * We do not maintain the detailed stats for the
4165                          * kernel_pmap, so just count everything as
4166                          * "internal"...
4167                          */
4168                         vm_info->internal = vm_info->resident_size;
4169                         /*
4170                          * ... but since the memory held by the VM compressor
4171                          * in the kernel address space ought to be attributed
4172                          * to user-space tasks, we subtract it from "internal"
4173                          * to give memory reporting tools a more accurate idea
4174                          * of what the kernel itself is actually using, instead
4175                          * of making it look like the kernel is leaking memory
4176                          * when the system is under memory pressure.
4177                          */
4178                         vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
4179                                               PAGE_SIZE);
4180                 } else {
4181                         mach_vm_size_t  volatile_virtual_size;
4182                         mach_vm_size_t  volatile_resident_size;
4183                         mach_vm_size_t  volatile_compressed_size;
4184                         mach_vm_size_t  volatile_pmap_size;
4185                         mach_vm_size_t  volatile_compressed_pmap_size;
4186                         kern_return_t   kr;
4187
4188                         if (flavor == TASK_VM_INFO_PURGEABLE) {
4189                                 kr = vm_map_query_volatile(
4190                                         map,
4191                                         &volatile_virtual_size,
4192                                         &volatile_resident_size,
4193                                         &volatile_compressed_size,
4194                                         &volatile_pmap_size,
4195                                         &volatile_compressed_pmap_size);
4196                                 if (kr == KERN_SUCCESS) {
4197                                         vm_info->purgeable_volatile_pmap =
4198                                                 volatile_pmap_size;
4199                                         if (radar_20146450) {
4200                                         vm_info->compressed -=
4201                                                 volatile_compressed_pmap_size;
4202                                         }
4203                                         vm_info->purgeable_volatile_resident =
4204                                                 volatile_resident_size;
4205                                         vm_info->purgeable_volatile_virtual =
4206                                                 volatile_virtual_size;
4207                                 }
4208                         }
4209                 }
4210                 *task_info_count = TASK_VM_INFO_REV0_COUNT;
4211
4212                 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
4213                         vm_info->phys_footprint =
4214                                 (mach_vm_size_t) get_task_phys_footprint(task);
4215                         *task_info_count = TASK_VM_INFO_REV1_COUNT;
4216                 }
4217                 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
4218                         vm_info->min_address = map->min_offset;
4219                         vm_info->max_address = map->max_offset;
4220                         *task_info_count = TASK_VM_INFO_REV2_COUNT;
4221                 }
4222
4223                 if (task != kernel_task) {
4224                         vm_map_unlock_read(map);
4225                 }
4226
4227                 break;
4228         }
4229
4230         case TASK_WAIT_STATE_INFO:
4231         {
4232                 /*
4233                  * Deprecated flavor. Currently allowing some results until all users
4234                  * stop calling it. The results may not be accurate.
4235          */
4236                 task_wait_state_info_t  wait_state_info;
4237                 uint64_t total_sfi_ledger_val = 0;
4238
4239                 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
4240                    error = KERN_INVALID_ARGUMENT;
4241                    break;
4242                 }
4243
4244                 wait_state_info = (task_wait_state_info_t) task_info_out;
4245
4246                 wait_state_info->total_wait_state_time = 0;
4247                 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
4248
4249 #if CONFIG_SCHED_SFI
4250                 int i, prev_lentry = -1;
4251                 int64_t  val_credit, val_debit;
4252
4253                 for (i = 0; i < MAX_SFI_CLASS_ID; i++){
4254                         val_credit =0;
4255                         /*
4256                          * checking with prev_lentry != entry ensures adjacent classes
4257                          * which share the same ledger do not add wait times twice.
4258                          * Note: Use ledger() call to get data for each individual sfi class.
4259                          */
4260                         if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
4261                                 KERN_SUCCESS == ledger_get_entries(task->ledger,
4262                                                 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
4263                                 total_sfi_ledger_val += val_credit;
4264                         }
4265                         prev_lentry = task_ledgers.sfi_wait_times[i];
4266                 }
4267
4268 #endif /* CONFIG_SCHED_SFI */
4269                 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
4270                 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
4271
4272                 break;
4273         }
4274         case TASK_VM_INFO_PURGEABLE_ACCOUNT:
4275         {
4276 #if DEVELOPMENT || DEBUG
4277                 pvm_account_info_t      acnt_info;
4278
4279                 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
4280                         error = KERN_INVALID_ARGUMENT;
4281                         break;
4282                 }
4283
4284                 if (task_info_out == NULL) {
4285                         error = KERN_INVALID_ARGUMENT;
4286                         break;
4287                 }
4288
4289                 acnt_info = (pvm_account_info_t) task_info_out;
4290
4291                 error = vm_purgeable_account(task, acnt_info);
4292
4293                 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
4294
4295                 break;
4296 #else /* DEVELOPMENT || DEBUG */
4297                 error = KERN_NOT_SUPPORTED;
4298                 break;
4299 #endif /* DEVELOPMENT || DEBUG */
4300         }
4301         case TASK_FLAGS_INFO:
4302         {
4303                 task_flags_info_t               flags_info;
4304
4305                 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
4306                     error = KERN_INVALID_ARGUMENT;
4307                     break;
4308                 }
4309
4310                 flags_info = (task_flags_info_t)task_info_out;
4311
4312                 /* only publish the 64-bit flag of the task */
4313                 flags_info->flags = task->t_flags & TF_64B_ADDR;
4314
4315                 *task_info_count = TASK_FLAGS_INFO_COUNT;
4316                 break;
4317         }
4318
4319         case TASK_DEBUG_INFO_INTERNAL:
4320         {
4321 #if DEVELOPMENT || DEBUG
4322                 task_debug_info_internal_t dbg_info;
4323                 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
4324                         error = KERN_NOT_SUPPORTED;
4325                         break;
4326                 }
4327
4328                 if (task_info_out == NULL) {
4329                         error = KERN_INVALID_ARGUMENT;
4330                         break;
4331                 }
4332                 dbg_info = (task_debug_info_internal_t) task_info_out;
4333                 dbg_info->ipc_space_size = 0;
4334                 if (task->itk_space){
4335                         dbg_info->ipc_space_size = task->itk_space->is_table_size;
4336                 }
4337
4338                 error = KERN_SUCCESS;
4339                 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
4340                 break;
4341 #else /* DEVELOPMENT || DEBUG */
4342                 error = KERN_NOT_SUPPORTED;
4343                 break;
4344 #endif /* DEVELOPMENT || DEBUG */
4345         }
4346         default:
4347                 error = KERN_INVALID_ARGUMENT;
4348         }
4349
4350         task_unlock(task);
4351         return (error);
4352 }
4353
4354 /*
4355  * task_info_from_user
4356  *
4357  * When calling task_info from user space,
4358  * this function will be executed as mig server side
4359  * instead of calling directly into task_info.
4360  * This gives the possibility to perform more security
4361  * checks on task_port.
4362  *
4363  * In the case of TASK_DYLD_INFO, we require the more
4364  * privileged task_port not the less-privileged task_name_port.
4365  *
4366  */
4367 kern_return_t
4368 task_info_from_user(
4369         mach_port_t             task_port,
4370         task_flavor_t           flavor,
4371         task_info_t             task_info_out,
4372         mach_msg_type_number_t  *task_info_count)
4373 {
4374         task_t task;
4375         kern_return_t ret;
4376
4377         if (flavor == TASK_DYLD_INFO)
4378                 task = convert_port_to_task(task_port);
4379         else
4380                 task = convert_port_to_task_name(task_port);
4381
4382         ret = task_info(task, flavor, task_info_out, task_info_count);
4383
4384         task_deallocate(task);
4385
4386         return ret;
4387 }
4388
4389 /*
4390  *      task_power_info
4391  *
4392  *      Returns power stats for the task.
4393  *      Note: Called with task locked.
4394  */
4395 void
4396 task_power_info_locked(
4397         task_t                  task,
4398         task_power_info_t       info,
4399         gpu_energy_data_t       ginfo,
4400         task_power_info_v2_t    infov2)
4401 {
4402         thread_t                thread;
4403         ledger_amount_t         tmp;
4404
4405         task_lock_assert_owned(task);
4406
4407         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
4408                 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
4409         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
4410                 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
4411
4412         info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
4413         info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
4414
4415         info->total_user = task->total_user_time;
4416         info->total_system = task->total_system_time;
4417
4418 #if CONFIG_EMBEDDED
4419         if (infov2) {
4420                 infov2->task_energy = task->task_energy;
4421         }
4422 #endif
4423
4424         if (ginfo) {
4425                 ginfo->task_gpu_utilisation = task->task_gpu_ns;
4426         }
4427
4428         if (infov2) {
4429                 infov2->task_ptime = task->total_ptime;
4430                 infov2->task_pset_switches = task->ps_switch;
4431         }
4432
4433         queue_iterate(&task->threads, thread, thread_t, task_threads) {
4434                 uint64_t        tval;
4435                 spl_t           x;
4436
4437                 if (thread->options & TH_OPT_IDLE_THREAD)
4438                         continue;
4439
4440                 x = splsched();
4441                 thread_lock(thread);
4442
4443                 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
4444                 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
4445
4446 #if CONFIG_EMBEDDED
4447                 if (infov2) {
4448                         infov2->task_energy += ml_energy_stat(thread);
4449                 }
4450 #endif
4451
4452                 tval = timer_grab(&thread->user_timer);
4453                 info->total_user += tval;
4454
4455                 if (infov2) {
4456                         tval = timer_grab(&thread->ptime);
4457                         infov2->task_ptime += tval;
4458                         infov2->task_pset_switches += thread->ps_switch;
4459                 }
4460
4461                 tval = timer_grab(&thread->system_timer);
4462                 if (thread->precise_user_kernel_time) {
4463                         info->total_system += tval;
4464                 } else {
4465                         /* system_timer may represent either sys or user */
4466                         info->total_user += tval;
4467                 }
4468
4469                 if (ginfo) {
4470                         ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
4471                 }
4472                 thread_unlock(thread);
4473                 splx(x);
4474         }
4475 }
4476
4477 /*
4478  *      task_gpu_utilisation
4479  *
4480  *      Returns the total gpu time used by the all the threads of the task
4481  *  (both dead and alive)
4482  */
4483 uint64_t
4484 task_gpu_utilisation(
4485         task_t  task)
4486 {
4487         uint64_t gpu_time = 0;
4488 #if !CONFIG_EMBEDDED
4489         thread_t thread;
4490
4491         task_lock(task);
4492         gpu_time += task->task_gpu_ns;
4493
4494         queue_iterate(&task->threads, thread, thread_t, task_threads) {
4495                 spl_t x;
4496                 x = splsched();
4497                 thread_lock(thread);
4498                 gpu_time += ml_gpu_stat(thread);
4499                 thread_unlock(thread);
4500                 splx(x);
4501         }
4502
4503         task_unlock(task);
4504 #else /* CONFIG_EMBEDDED */
4505         /* silence compiler warning */
4506         (void)task;
4507 #endif /* !CONFIG_EMBEDDED */
4508         return gpu_time;
4509 }
4510
4511 /*
4512  *      task_energy
4513  *
4514  *      Returns the total energy used by the all the threads of the task
4515  *  (both dead and alive)
4516  */
4517 uint64_t
4518 task_energy(
4519         task_t  task)
4520 {
4521         uint64_t energy = 0;
4522         thread_t thread;
4523
4524         task_lock(task);
4525         energy += task->task_energy;
4526
4527         queue_iterate(&task->threads, thread, thread_t, task_threads) {
4528                 spl_t x;
4529                 x = splsched();
4530                 thread_lock(thread);
4531                 energy += ml_energy_stat(thread);
4532                 thread_unlock(thread);
4533                 splx(x);
4534         }
4535
4536         task_unlock(task);
4537         return energy;
4538 }
4539
4540
4541 uint64_t
4542 task_cpu_ptime(
4543         __unused task_t  task)
4544 {
4545     return 0;
4546 }
4547
4548
4549 kern_return_t
4550 task_purgable_info(
4551         task_t                  task,
4552         task_purgable_info_t    *stats)
4553 {
4554         if (task == TASK_NULL || stats == NULL)
4555                 return KERN_INVALID_ARGUMENT;
4556         /* Take task reference */
4557         task_reference(task);
4558         vm_purgeable_stats((vm_purgeable_info_t)stats, task);
4559         /* Drop task reference */
4560         task_deallocate(task);
4561         return KERN_SUCCESS;
4562 }
4563
4564 void
4565 task_vtimer_set(
4566         task_t          task,
4567         integer_t       which)
4568 {
4569         thread_t        thread;
4570         spl_t           x;
4571
4572         task_lock(task);
4573
4574         task->vtimers |= which;
4575
4576         switch (which) {
4577
4578         case TASK_VTIMER_USER:
4579                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4580                         x = splsched();
4581                         thread_lock(thread);
4582                         if (thread->precise_user_kernel_time)
4583                                 thread->vtimer_user_save = timer_grab(&thread->user_timer);
4584                         else
4585                                 thread->vtimer_user_save = timer_grab(&thread->system_timer);
4586                         thread_unlock(thread);
4587                         splx(x);
4588                 }
4589                 break;
4590
4591         case TASK_VTIMER_PROF:
4592                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4593                         x = splsched();
4594                         thread_lock(thread);
4595                         thread->vtimer_prof_save = timer_grab(&thread->user_timer);
4596                         thread->vtimer_prof_save += timer_grab(&thread->system_timer);
4597                         thread_unlock(thread);
4598                         splx(x);
4599                 }
4600                 break;
4601
4602         case TASK_VTIMER_RLIM:
4603                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4604                         x = splsched();
4605                         thread_lock(thread);
4606                         thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
4607                         thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
4608                         thread_unlock(thread);
4609                         splx(x);
4610                 }
4611                 break;
4612         }
4613
4614         task_unlock(task);
4615 }
4616
4617 void
4618 task_vtimer_clear(
4619         task_t          task,
4620         integer_t       which)
4621 {
4622         assert(task == current_task());
4623
4624         task_lock(task);
4625
4626         task->vtimers &= ~which;
4627
4628         task_unlock(task);
4629 }
4630
4631 void
4632 task_vtimer_update(
4633 __unused
4634         task_t          task,
4635         integer_t       which,
4636         uint32_t        *microsecs)
4637 {
4638         thread_t        thread = current_thread();
4639         uint32_t        tdelt = 0;
4640         clock_sec_t     secs = 0;
4641         uint64_t        tsum;
4642
4643         assert(task == current_task());
4644
4645         spl_t s = splsched();
4646         thread_lock(thread);
4647
4648         if ((task->vtimers & which) != (uint32_t)which) {
4649                 thread_unlock(thread);
4650                 splx(s);
4651                 return;
4652         }
4653
4654         switch (which) {
4655
4656         case TASK_VTIMER_USER:
4657                 if (thread->precise_user_kernel_time) {
4658                         tdelt = (uint32_t)timer_delta(&thread->user_timer,
4659                                                                 &thread->vtimer_user_save);
4660                 } else {
4661                         tdelt = (uint32_t)timer_delta(&thread->system_timer,
4662                                                                 &thread->vtimer_user_save);
4663                 }
4664                 absolutetime_to_microtime(tdelt, &secs, microsecs);
4665                 break;
4666
4667         case TASK_VTIMER_PROF:
4668                 tsum = timer_grab(&thread->user_timer);
4669                 tsum += timer_grab(&thread->system_timer);
4670                 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
4671                 absolutetime_to_microtime(tdelt, &secs, microsecs);
4672                 /* if the time delta is smaller than a usec, ignore */
4673                 if (*microsecs != 0)
4674                         thread->vtimer_prof_save = tsum;
4675                 break;
4676
4677         case TASK_VTIMER_RLIM:
4678                 tsum = timer_grab(&thread->user_timer);
4679                 tsum += timer_grab(&thread->system_timer);
4680                 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
4681                 thread->vtimer_rlim_save = tsum;
4682                 absolutetime_to_microtime(tdelt, &secs, microsecs);
4683                 break;
4684         }
4685
4686         thread_unlock(thread);
4687         splx(s);
4688 }
4689
4690 /*
4691  *      task_assign:
4692  *
4693  *      Change the assigned processor set for the task
4694  */
4695 kern_return_t
4696 task_assign(
4697         __unused task_t         task,
4698         __unused processor_set_t        new_pset,
4699         __unused boolean_t      assign_threads)
4700 {
4701         return(KERN_FAILURE);
4702 }
4703
4704 /*
4705  *      task_assign_default:
4706  *
4707  *      Version of task_assign to assign to default processor set.
4708  */
4709 kern_return_t
4710 task_assign_default(
4711         task_t          task,
4712         boolean_t       assign_threads)
4713 {
4714     return (task_assign(task, &pset0, assign_threads));
4715 }
4716
4717 /*
4718  *      task_get_assignment
4719  *
4720  *      Return name of processor set that task is assigned to.
4721  */
4722 kern_return_t
4723 task_get_assignment(
4724         task_t          task,
4725         processor_set_t *pset)
4726 {
4727         if (!task || !task->active)
4728                 return KERN_FAILURE;
4729
4730         *pset = &pset0;
4731
4732         return KERN_SUCCESS;
4733 }
4734
4735 uint64_t
4736 get_task_dispatchqueue_offset(
4737                 task_t          task)
4738 {
4739         return task->dispatchqueue_offset;
4740 }
4741
4742 /*
4743  *      task_policy
4744  *
4745  *      Set scheduling policy and parameters, both base and limit, for
4746  *      the given task. Policy must be a policy which is enabled for the
4747  *      processor set. Change contained threads if requested.
4748  */
4749 kern_return_t
4750 task_policy(
4751         __unused task_t                 task,
4752         __unused policy_t                       policy_id,
4753         __unused policy_base_t          base,
4754         __unused mach_msg_type_number_t count,
4755         __unused boolean_t                      set_limit,
4756         __unused boolean_t                      change)
4757 {
4758         return(KERN_FAILURE);
4759 }
4760
4761 /*
4762  *      task_set_policy
4763  *
4764  *      Set scheduling policy and parameters, both base and limit, for
4765  *      the given task. Policy can be any policy implemented by the
4766  *      processor set, whether enabled or not. Change contained threads
4767  *      if requested.
4768  */
4769 kern_return_t
4770 task_set_policy(
4771         __unused task_t                 task,
4772         __unused processor_set_t                pset,
4773         __unused policy_t                       policy_id,
4774         __unused policy_base_t          base,
4775         __unused mach_msg_type_number_t base_count,
4776         __unused policy_limit_t         limit,
4777         __unused mach_msg_type_number_t limit_count,
4778         __unused boolean_t                      change)
4779 {
4780         return(KERN_FAILURE);
4781 }
4782
4783 kern_return_t
4784 task_set_ras_pc(
4785         __unused task_t task,
4786         __unused vm_offset_t    pc,
4787         __unused vm_offset_t    endpc)
4788 {
4789         return KERN_FAILURE;
4790 }
4791
4792 void
4793 task_synchronizer_destroy_all(task_t task)
4794 {
4795         /*
4796          *  Destroy owned semaphores
4797          */
4798         semaphore_destroy_all(task);
4799 }
4800
4801 /*
4802  * Install default (machine-dependent) initial thread state
4803  * on the task.  Subsequent thread creation will have this initial
4804  * state set on the thread by machine_thread_inherit_taskwide().
4805  * Flavors and structures are exactly the same as those to thread_set_state()
4806  */
4807 kern_return_t
4808 task_set_state(
4809         task_t task,
4810         int flavor,
4811         thread_state_t state,
4812         mach_msg_type_number_t state_count)
4813 {
4814         kern_return_t ret;
4815
4816         if (task == TASK_NULL) {
4817                 return (KERN_INVALID_ARGUMENT);
4818         }
4819
4820         task_lock(task);
4821
4822         if (!task->active) {
4823                 task_unlock(task);
4824                 return (KERN_FAILURE);
4825         }
4826
4827         ret = machine_task_set_state(task, flavor, state, state_count);
4828
4829         task_unlock(task);
4830         return ret;
4831 }
4832
4833 /*
4834  * Examine the default (machine-dependent) initial thread state
4835  * on the task, as set by task_set_state().  Flavors and structures
4836  * are exactly the same as those passed to thread_get_state().
4837  */
4838 kern_return_t
4839 task_get_state(
4840         task_t  task,
4841         int     flavor,
4842         thread_state_t state,
4843         mach_msg_type_number_t *state_count)
4844 {
4845         kern_return_t ret;
4846
4847         if (task == TASK_NULL) {
4848                 return (KERN_INVALID_ARGUMENT);
4849         }
4850
4851         task_lock(task);
4852
4853         if (!task->active) {
4854                 task_unlock(task);
4855                 return (KERN_FAILURE);
4856         }
4857
4858         ret = machine_task_get_state(task, flavor, state, state_count);
4859
4860         task_unlock(task);
4861         return ret;
4862 }
4863
4864
4865 static kern_return_t __attribute__((noinline,not_tail_called))
4866 PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(
4867         mach_exception_code_t code,
4868         mach_exception_subcode_t subcode,
4869         void *reason)
4870 {
4871 #ifdef MACH_BSD
4872         if (1 == proc_selfpid())
4873                 return KERN_NOT_SUPPORTED;              // initproc is immune
4874 #endif
4875         mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
4876                 [0] = code,
4877                 [1] = subcode,
4878         };
4879         task_t task = current_task();
4880         kern_return_t kr;
4881
4882         /* (See jetsam-related comments below) */
4883
4884         proc_memstat_terminated(task->bsd_info, TRUE);
4885         kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason);
4886         proc_memstat_terminated(task->bsd_info, FALSE);
4887         return kr;
4888 }
4889
4890 extern kern_return_t
4891 task_violated_guard(mach_exception_code_t, mach_exception_subcode_t, void *);
4892
4893 kern_return_t
4894 task_violated_guard(
4895         mach_exception_code_t code,
4896         mach_exception_subcode_t subcode,
4897         void *reason)
4898 {
4899         return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code, subcode, reason);
4900 }
4901
4902
4903 #if CONFIG_MEMORYSTATUS
4904
4905 boolean_t
4906 task_get_memlimit_is_active(task_t task)
4907 {
4908         assert (task != NULL);
4909
4910         if (task->memlimit_is_active == 1) {
4911                 return(TRUE);
4912         } else {
4913                 return (FALSE);
4914         }
4915 }
4916
4917 void
4918 task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
4919 {
4920         assert (task != NULL);
4921
4922         if (memlimit_is_active) {
4923                 task->memlimit_is_active = 1;
4924         } else {
4925                 task->memlimit_is_active = 0;
4926         }
4927 }
4928
4929 boolean_t
4930 task_get_memlimit_is_fatal(task_t task)
4931 {
4932         assert(task != NULL);
4933
4934         if (task->memlimit_is_fatal == 1) {
4935                 return(TRUE);
4936         } else {
4937                 return(FALSE);
4938         }
4939 }
4940
4941 void
4942 task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
4943 {
4944         assert (task != NULL);
4945
4946         if (memlimit_is_fatal) {
4947                 task->memlimit_is_fatal = 1;
4948         } else {
4949                 task->memlimit_is_fatal = 0;
4950         }
4951 }
4952
4953 boolean_t
4954 task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
4955 {
4956         boolean_t triggered = FALSE;
4957
4958         assert(task == current_task());
4959
4960         /*
4961          * Returns true, if task has already triggered an exc_resource exception.
4962          */
4963
4964         if (memlimit_is_active) {
4965                 triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
4966         } else {
4967                 triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
4968         }
4969
4970         return(triggered);
4971 }
4972
4973 void
4974 task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
4975 {
4976         assert(task == current_task());
4977
4978         /*
4979          * We allow one exc_resource per process per active/inactive limit.
4980          * The limit's fatal attribute does not come into play.
4981          */
4982
4983         if (memlimit_is_active) {
4984                 task->memlimit_active_exc_resource = 1;
4985         } else {
4986                 task->memlimit_inactive_exc_resource = 1;
4987         }
4988 }
4989
4990 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
4991
4992 void __attribute__((noinline))
4993 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
4994 {
4995         task_t                                          task            = current_task();
4996         int                                                     pid         = 0;
4997         const char                                      *procname       = "unknown";
4998         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
4999
5000 #ifdef MACH_BSD
5001         pid = proc_selfpid();
5002
5003         if (pid == 1) {
5004                 /*
5005                  * Cannot have ReportCrash analyzing
5006                  * a suspended initproc.
5007                  */
5008                 return;
5009         }
5010
5011         if (task->bsd_info != NULL)
5012                 procname = proc_name_address(current_task()->bsd_info);
5013 #endif
5014 #if CONFIG_COREDUMP
5015         if (hwm_user_cores) {
5016                 int                             error;
5017                 uint64_t                starttime, end;
5018                 clock_sec_t             secs = 0;
5019                 uint32_t                microsecs = 0;
5020
5021                 starttime = mach_absolute_time();
5022                 /*
5023                  * Trigger a coredump of this process. Don't proceed unless we know we won't
5024                  * be filling up the disk; and ignore the core size resource limit for this
5025                  * core file.
5026                  */
5027                 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
5028                         printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
5029                 }
5030                 /*
5031                 * coredump() leaves the task suspended.
5032                 */
5033                 task_resume_internal(current_task());
5034
5035                 end = mach_absolute_time();
5036                 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
5037                 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
5038                        proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
5039         }
5040 #endif /* CONFIG_COREDUMP */
5041
5042         if (disable_exc_resource) {
5043                 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
5044                         "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
5045                 return;
5046         }
5047
5048         /*
5049          * A task that has triggered an EXC_RESOURCE, should not be
5050          * jetsammed when the device is under memory pressure.  Here
5051          * we set the P_MEMSTAT_TERMINATED flag so that the process
5052          * will be skipped if the memorystatus_thread wakes up.
5053          */
5054         proc_memstat_terminated(current_task()->bsd_info, TRUE);
5055
5056         code[0] = code[1] = 0;
5057         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
5058         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
5059         EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
5060
5061         /* Do not generate a corpse fork if the violation is a fatal one */
5062         if (is_fatal || exc_via_corpse_forking == 0) {
5063                 /* Do not send a EXC_RESOURCE is corpse_for_fatal_memkill is set */
5064                 if (corpse_for_fatal_memkill == 0) {
5065                         /*
5066                          * Use the _internal_ variant so that no user-space
5067                          * process can resume our task from under us.
5068                          */
5069                         task_suspend_internal(task);
5070                         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5071                         task_resume_internal(task);
5072                 }
5073         } else {
5074                 if (audio_active) {
5075                         printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
5076                         "supressed due to audio playback.\n", procname, pid, max_footprint_mb);
5077                 } else {
5078                         task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
5079                                 code, EXCEPTION_CODE_MAX, NULL);
5080                 }
5081         }
5082
5083         /*
5084          * After the EXC_RESOURCE has been handled, we must clear the
5085          * P_MEMSTAT_TERMINATED flag so that the process can again be
5086          * considered for jetsam if the memorystatus_thread wakes up.
5087          */
5088         proc_memstat_terminated(current_task()->bsd_info, FALSE);  /* clear the flag */
5089 }
5090
5091 /*
5092  * Callback invoked when a task exceeds its physical footprint limit.
5093  */
5094 void
5095 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
5096 {
5097         ledger_amount_t max_footprint, max_footprint_mb;
5098         task_t task;
5099         boolean_t is_warning;
5100         boolean_t memlimit_is_active;
5101         boolean_t memlimit_is_fatal;
5102
5103         if (warning == LEDGER_WARNING_DIPPED_BELOW) {
5104                 /*
5105                  * Task memory limits only provide a warning on the way up.
5106                  */
5107                 return;
5108         } else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
5109                 /*
5110                  * This task is in danger of violating a memory limit,
5111                  * It has exceeded a percentage level of the limit.
5112                  */
5113                 is_warning = TRUE;
5114         } else {
5115                 /*
5116                  * The task has exceeded the physical footprint limit.
5117                  * This is not a warning but a true limit violation.
5118                  */
5119                 is_warning = FALSE;
5120         }
5121
5122         task = current_task();
5123
5124         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
5125         max_footprint_mb = max_footprint >> 20;
5126
5127         memlimit_is_active = task_get_memlimit_is_active(task);
5128         memlimit_is_fatal = task_get_memlimit_is_fatal(task);
5129
5130         /*
5131          * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
5132          * We only generate the exception once per process per memlimit (active/inactive limit).
5133          * To enforce this, we monitor state based on the  memlimit's active/inactive attribute
5134          * and we disable it by marking that memlimit as exception triggered.
5135          */
5136         if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
5137                 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
5138                 memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
5139                 task_mark_has_triggered_exc_resource(task, memlimit_is_active);
5140         }
5141
5142         memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
5143 }
5144
5145 extern int proc_check_footprint_priv(void);
5146
5147 kern_return_t
5148 task_set_phys_footprint_limit(
5149         task_t task,
5150         int new_limit_mb,
5151         int *old_limit_mb)
5152 {
5153         kern_return_t error;
5154
5155         boolean_t memlimit_is_active;
5156         boolean_t memlimit_is_fatal;
5157
5158         if ((error = proc_check_footprint_priv())) {
5159                 return (KERN_NO_ACCESS);
5160         }
5161
5162         /*
5163          * This call should probably be obsoleted.
5164          * But for now, we default to current state.
5165          */
5166         memlimit_is_active = task_get_memlimit_is_active(task);
5167         memlimit_is_fatal = task_get_memlimit_is_fatal(task);
5168
5169         return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
5170 }
5171
5172 kern_return_t
5173 task_convert_phys_footprint_limit(
5174         int limit_mb,
5175         int *converted_limit_mb)
5176 {
5177         if (limit_mb == -1) {
5178                 /*
5179                  * No limit
5180                  */
5181                 if (max_task_footprint != 0) {
5182                         *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024);   /* bytes to MB */
5183                 } else {
5184                         *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
5185                 }
5186         } else {
5187                 /* nothing to convert */
5188                 *converted_limit_mb = limit_mb;
5189         }
5190         return (KERN_SUCCESS);
5191 }
5192
5193
5194 kern_return_t
5195 task_set_phys_footprint_limit_internal(
5196         task_t task,
5197         int new_limit_mb,
5198         int *old_limit_mb,
5199         boolean_t memlimit_is_active,
5200         boolean_t memlimit_is_fatal)
5201 {
5202         ledger_amount_t old;
5203
5204         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
5205
5206         /*
5207          * Check that limit >> 20 will not give an "unexpected" 32-bit
5208          * result. There are, however, implicit assumptions that -1 mb limit
5209          * equates to LEDGER_LIMIT_INFINITY.
5210          */
5211         assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
5212
5213         if (old_limit_mb) {
5214                 *old_limit_mb = (int)(old >> 20);
5215         }
5216
5217         if (new_limit_mb == -1) {
5218                 /*
5219                  * Caller wishes to remove the limit.
5220                  */
5221                 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
5222                                  max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
5223                                  max_task_footprint ? max_task_footprint_warning_level : 0);
5224
5225                 task_lock(task);
5226                 task_set_memlimit_is_active(task, memlimit_is_active);
5227                 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
5228                 task_unlock(task);
5229
5230                 return (KERN_SUCCESS);
5231         }
5232
5233 #ifdef CONFIG_NOMONITORS
5234         return (KERN_SUCCESS);
5235 #endif /* CONFIG_NOMONITORS */
5236
5237         task_lock(task);
5238
5239         if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
5240             (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
5241             (((ledger_amount_t)new_limit_mb << 20) == old)) {
5242                 /*
5243                  * memlimit state is not changing
5244                  */
5245                 task_unlock(task);
5246                 return(KERN_SUCCESS);
5247         }
5248
5249         task_set_memlimit_is_active(task, memlimit_is_active);
5250         task_set_memlimit_is_fatal(task, memlimit_is_fatal);
5251
5252         ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
5253                 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
5254
5255         if (task == current_task()) {
5256                 ledger_check_new_balance(current_thread(), task->ledger,
5257                                          task_ledgers.phys_footprint);
5258         }
5259
5260         task_unlock(task);
5261
5262         return (KERN_SUCCESS);
5263 }
5264
5265 kern_return_t
5266 task_get_phys_footprint_limit(
5267         task_t task,
5268         int *limit_mb)
5269 {
5270         ledger_amount_t limit;
5271
5272         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
5273         /*
5274          * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
5275          * result. There are, however, implicit assumptions that -1 mb limit
5276          * equates to LEDGER_LIMIT_INFINITY.
5277          */
5278         assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
5279         *limit_mb = (int)(limit >> 20);
5280
5281         return (KERN_SUCCESS);
5282 }
5283 #else /* CONFIG_MEMORYSTATUS */
5284 kern_return_t
5285 task_set_phys_footprint_limit(
5286         __unused task_t task,
5287         __unused int new_limit_mb,
5288         __unused int *old_limit_mb)
5289 {
5290         return (KERN_FAILURE);
5291 }
5292
5293 kern_return_t
5294 task_get_phys_footprint_limit(
5295         __unused task_t task,
5296         __unused int *limit_mb)
5297 {
5298         return (KERN_FAILURE);
5299 }
5300 #endif /* CONFIG_MEMORYSTATUS */
5301
5302 /*
5303  * We need to export some functions to other components that
5304  * are currently implemented in macros within the osfmk
5305  * component.  Just export them as functions of the same name.
5306  */
5307 boolean_t is_kerneltask(task_t t)
5308 {
5309         if (t == kernel_task)
5310                 return (TRUE);
5311
5312         return (FALSE);
5313 }
5314
5315 boolean_t is_corpsetask(task_t t)
5316 {
5317         return (task_is_a_corpse(t));
5318 }
5319
5320 #undef current_task
5321 task_t current_task(void);
5322 task_t current_task(void)
5323 {
5324         return (current_task_fast());
5325 }
5326
5327 #undef task_reference
5328 void task_reference(task_t task);
5329 void
5330 task_reference(
5331         task_t          task)
5332 {
5333         if (task != TASK_NULL)
5334                 task_reference_internal(task);
5335 }
5336
5337 /* defined in bsd/kern/kern_prot.c */
5338 extern int get_audit_token_pid(audit_token_t *audit_token);
5339
5340 int task_pid(task_t task)
5341 {
5342         if (task)
5343                 return get_audit_token_pid(&task->audit_token);
5344         return -1;
5345 }
5346
5347
5348 /*
5349  * This routine finds a thread in a task by its unique id
5350  * Returns a referenced thread or THREAD_NULL if the thread was not found
5351  *
5352  * TODO: This is super inefficient - it's an O(threads in task) list walk!
5353  *       We should make a tid hash, or transition all tid clients to thread ports
5354  *
5355  * Precondition: No locks held (will take task lock)
5356  */
5357 thread_t
5358 task_findtid(task_t task, uint64_t tid)
5359 {
5360         thread_t self           = current_thread();
5361         thread_t found_thread   = THREAD_NULL;
5362         thread_t iter_thread    = THREAD_NULL;
5363
5364         /* Short-circuit the lookup if we're looking up ourselves */
5365         if (tid == self->thread_id || tid == TID_NULL) {
5366                 assert(self->task == task);
5367
5368                 thread_reference(self);
5369
5370                 return self;
5371         }
5372
5373         task_lock(task);
5374
5375         queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
5376                 if (iter_thread->thread_id == tid) {
5377                         found_thread = iter_thread;
5378                         thread_reference(found_thread);
5379                         break;
5380                 }
5381         }
5382
5383         task_unlock(task);
5384
5385         return (found_thread);
5386 }
5387
5388 int pid_from_task(task_t task)
5389 {
5390         int pid = -1;
5391
5392         if (task->bsd_info) {
5393                 pid = proc_pid(task->bsd_info);
5394         } else {
5395                 pid = task_pid(task);
5396         }
5397
5398         return pid;
5399 }
5400
5401 /*
5402  * Control the CPU usage monitor for a task.
5403  */
5404 kern_return_t
5405 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
5406 {
5407         int error = KERN_SUCCESS;
5408
5409         if (*flags & CPUMON_MAKE_FATAL) {
5410                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
5411         } else {
5412                 error = KERN_INVALID_ARGUMENT;
5413         }
5414
5415         return error;
5416 }
5417
5418 /*
5419  * Control the wakeups monitor for a task.
5420  */
5421 kern_return_t
5422 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
5423 {
5424         ledger_t ledger = task->ledger;
5425
5426         task_lock(task);
5427         if (*flags & WAKEMON_GET_PARAMS) {
5428                 ledger_amount_t limit;
5429                 uint64_t                period;
5430
5431                 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
5432                 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
5433
5434                 if (limit != LEDGER_LIMIT_INFINITY) {
5435                         /*
5436                          * An active limit means the wakeups monitor is enabled.
5437                          */
5438                         *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
5439                         *flags = WAKEMON_ENABLE;
5440                         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
5441                                 *flags |= WAKEMON_MAKE_FATAL;
5442                         }
5443                 } else {
5444                         *flags = WAKEMON_DISABLE;
5445                         *rate_hz = -1;
5446                 }
5447
5448                 /*
5449                  * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
5450                  */
5451                 task_unlock(task);
5452                 return KERN_SUCCESS;
5453         }
5454
5455         if (*flags & WAKEMON_ENABLE) {
5456                 if (*flags & WAKEMON_SET_DEFAULTS) {
5457                         *rate_hz = task_wakeups_monitor_rate;
5458                 }
5459
5460 #ifndef CONFIG_NOMONITORS
5461                 if (*flags & WAKEMON_MAKE_FATAL) {
5462                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5463                 }
5464 #endif /* CONFIG_NOMONITORS */
5465
5466                 if (*rate_hz <= 0) {
5467                         task_unlock(task);
5468                         return KERN_INVALID_ARGUMENT;
5469                 }
5470
5471 #ifndef CONFIG_NOMONITORS
5472                 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
5473                         task_wakeups_monitor_ustackshots_trigger_pct);
5474                 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
5475                 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
5476 #endif /* CONFIG_NOMONITORS */
5477         } else if (*flags & WAKEMON_DISABLE) {
5478                 /*
5479                  * Caller wishes to disable wakeups monitor on the task.
5480                  *
5481                  * Disable telemetry if it was triggered by the wakeups monitor, and
5482                  * remove the limit & callback on the wakeups ledger entry.
5483                  */
5484 #if CONFIG_TELEMETRY
5485                 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
5486 #endif
5487                 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
5488                 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
5489         }
5490
5491         task_unlock(task);
5492         return KERN_SUCCESS;
5493 }
5494
5495 void
5496 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
5497 {
5498         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
5499 #if CONFIG_TELEMETRY
5500                 /*
5501                  * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
5502                  * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
5503                  */
5504                 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
5505 #endif
5506                 return;
5507         }
5508
5509 #if CONFIG_TELEMETRY
5510         /*
5511          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
5512          * exceeded the limit, turn telemetry off for the task.
5513          */
5514         telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
5515 #endif
5516
5517         if (warning == 0) {
5518                 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
5519         }
5520 }
5521
5522 void __attribute__((noinline))
5523 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
5524 {
5525         task_t                      task        = current_task();
5526         int                         pid         = 0;
5527         const char                  *procname   = "unknown";
5528         boolean_t                   fatal;
5529         kern_return_t               kr;
5530 #ifdef EXC_RESOURCE_MONITORS
5531         mach_exception_data_type_t  code[EXCEPTION_CODE_MAX];
5532 #endif /* EXC_RESOURCE_MONITORS */
5533         struct ledger_entry_info    lei;
5534
5535 #ifdef MACH_BSD
5536         pid = proc_selfpid();
5537         if (task->bsd_info != NULL)
5538                 procname = proc_name_address(current_task()->bsd_info);
5539 #endif
5540
5541         ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
5542
5543         /*
5544          * Disable the exception notification so we don't overwhelm
5545          * the listener with an endless stream of redundant exceptions.
5546          * TODO: detect whether another thread is already reporting the violation.
5547          */
5548         uint32_t flags = WAKEMON_DISABLE;
5549         task_wakeups_monitor_ctl(task, &flags, NULL);
5550
5551         fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5552         trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
5553         os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
5554                "over ~%llu seconds, averaging %llu wakes / second and "
5555                "violating a %slimit of %llu wakes over %llu seconds.\n",
5556                procname, pid,
5557                lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
5558                    lei.lei_last_refill == 0 ? 0 :
5559                                 (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
5560                fatal ? "FATAL " : "",
5561                    lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
5562
5563         kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
5564                                      fatal ? kRNFatalLimitFlag : 0);
5565         if (kr) {
5566                 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
5567         }
5568
5569 #ifdef EXC_RESOURCE_MONITORS
5570         if (disable_exc_resource) {
5571                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5572                         "supressed by a boot-arg\n", procname, pid);
5573                 return;
5574         }
5575         if (audio_active) {
5576                 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5577                        "supressed due to audio playback\n", procname, pid);
5578                 return;
5579         }
5580         if (lei.lei_last_refill == 0) {
5581                 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5582                        "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
5583         }
5584
5585         code[0] = code[1] = 0;
5586         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
5587         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
5588         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
5589                             NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
5590         EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
5591                             lei.lei_last_refill);
5592         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
5593                             NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
5594         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5595 #endif /* EXC_RESOURCE_MONITORS */
5596
5597         if (fatal) {
5598                 task_terminate_internal(task);
5599         }
5600 }
5601
5602 static boolean_t
5603 global_update_logical_writes(int64_t io_delta)
5604 {
5605         int64_t old_count, new_count;
5606         boolean_t needs_telemetry;
5607
5608         do {
5609                 new_count = old_count = global_logical_writes_count;
5610                 new_count += io_delta;
5611                 if (new_count >= io_telemetry_limit) {
5612                         new_count = 0;
5613                         needs_telemetry = TRUE;
5614                 } else {
5615                         needs_telemetry = FALSE;
5616                 }
5617         } while(!OSCompareAndSwap64(old_count, new_count, &global_logical_writes_count));
5618         return needs_telemetry;
5619 }
5620
5621 void task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
5622 {
5623         int64_t io_delta = 0;
5624         boolean_t needs_telemetry = FALSE;
5625
5626         if ((!task) || (!io_size) || (!vp))
5627                 return;
5628
5629         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
5630                                                         task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
5631         DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
5632         switch(flags) {
5633                 case TASK_WRITE_IMMEDIATE:
5634                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_immediate_writes));
5635                         ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5636                         break;
5637                 case TASK_WRITE_DEFERRED:
5638                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_deferred_writes));
5639                         ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5640                         break;
5641                 case TASK_WRITE_INVALIDATED:
5642                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_invalidated_writes));
5643                         ledger_debit(task->ledger, task_ledgers.logical_writes, io_size);
5644                         break;
5645                 case TASK_WRITE_METADATA:
5646                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_metadata_writes));
5647                         ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5648                         break;
5649         }
5650
5651         io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
5652         if (io_telemetry_limit != 0) {
5653                 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
5654                 needs_telemetry = global_update_logical_writes(io_delta);
5655                 if (needs_telemetry) {
5656                         act_set_io_telemetry_ast(current_thread());
5657                 }
5658         }
5659 }
5660
5661 /*
5662  * Control the I/O monitor for a task.
5663  */
5664 kern_return_t
5665 task_io_monitor_ctl(task_t task, uint32_t *flags)
5666 {
5667         ledger_t ledger = task->ledger;
5668
5669         task_lock(task);
5670         if (*flags & IOMON_ENABLE) {
5671                 /* Configure the physical I/O ledger */
5672                 ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
5673                 ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5674
5675                 /* Configure the logical I/O ledger */
5676                 ledger_set_limit(ledger, task_ledgers.logical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
5677                 ledger_set_period(ledger, task_ledgers.logical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5678
5679         } else if (*flags & IOMON_DISABLE) {
5680                 /*
5681                  * Caller wishes to disable I/O monitor on the task.
5682                  */
5683                 ledger_disable_refill(ledger, task_ledgers.physical_writes);
5684                 ledger_disable_callback(ledger, task_ledgers.physical_writes);
5685                 ledger_disable_refill(ledger, task_ledgers.logical_writes);
5686                 ledger_disable_callback(ledger, task_ledgers.logical_writes);
5687         }
5688
5689         task_unlock(task);
5690         return KERN_SUCCESS;
5691 }
5692
5693 void
5694 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
5695 {
5696         if (warning == 0) {
5697                 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
5698         }
5699 }
5700
5701 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
5702 {
5703         int                             pid = 0;
5704         task_t                          task = current_task();
5705 #ifdef EXC_RESOURCE_MONITORS
5706         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
5707 #endif /* EXC_RESOURCE_MONITORS */
5708         struct ledger_entry_info        lei;
5709         kern_return_t                   kr;
5710
5711 #ifdef MACH_BSD
5712         pid = proc_selfpid();
5713 #endif
5714         /*
5715          * Get the ledger entry info. We need to do this before disabling the exception
5716          * to get correct values for all fields.
5717          */
5718         switch(flavor) {
5719                 case FLAVOR_IO_PHYSICAL_WRITES:
5720                         ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
5721                         break;
5722                 case FLAVOR_IO_LOGICAL_WRITES:
5723                         ledger_get_entry_info(task->ledger, task_ledgers.logical_writes, &lei);
5724                         break;
5725         }
5726
5727
5728         /*
5729          * Disable the exception notification so we don't overwhelm
5730          * the listener with an endless stream of redundant exceptions.
5731          * TODO: detect whether another thread is already reporting the violation.
5732          */
5733         uint32_t flags = IOMON_DISABLE;
5734         task_io_monitor_ctl(task, &flags);
5735
5736         if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
5737                 trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
5738         }
5739         os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
5740                 pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
5741
5742         kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
5743         if (kr) {
5744                 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
5745         }
5746
5747 #ifdef EXC_RESOURCE_MONITORS
5748         code[0] = code[1] = 0;
5749         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
5750         EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
5751         EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
5752         EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
5753         EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
5754         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5755 #endif /* EXC_RESOURCE_MONITORS */
5756 }
5757
5758 /* Placeholders for the task set/get voucher interfaces */
5759 kern_return_t
5760 task_get_mach_voucher(
5761         task_t                  task,
5762         mach_voucher_selector_t __unused which,
5763         ipc_voucher_t           *voucher)
5764 {
5765         if (TASK_NULL == task)
5766                 return KERN_INVALID_TASK;
5767
5768         *voucher = NULL;
5769         return KERN_SUCCESS;
5770 }
5771
5772 kern_return_t
5773 task_set_mach_voucher(
5774         task_t                  task,
5775         ipc_voucher_t           __unused voucher)
5776 {
5777         if (TASK_NULL == task)
5778                 return KERN_INVALID_TASK;
5779
5780         return KERN_SUCCESS;
5781 }
5782
5783 kern_return_t
5784 task_swap_mach_voucher(
5785         task_t                  task,
5786         ipc_voucher_t           new_voucher,
5787         ipc_voucher_t           *in_out_old_voucher)
5788 {
5789         if (TASK_NULL == task)
5790                 return KERN_INVALID_TASK;
5791
5792         *in_out_old_voucher = new_voucher;
5793         return KERN_SUCCESS;
5794 }
5795
5796 void task_set_gpu_denied(task_t task, boolean_t denied)
5797 {
5798         task_lock(task);
5799
5800         if (denied) {
5801                 task->t_flags |= TF_GPU_DENIED;
5802         } else {
5803                 task->t_flags &= ~TF_GPU_DENIED;
5804         }
5805
5806         task_unlock(task);
5807 }
5808
5809 boolean_t task_is_gpu_denied(task_t task)
5810 {
5811         /* We don't need the lock to read this flag */
5812         return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
5813 }
5814
5815
5816 uint64_t get_task_memory_region_count(task_t task)
5817 {
5818         vm_map_t map;
5819         map = (task == kernel_task) ? kernel_map: task->map;
5820         return((uint64_t)get_map_nentries(map));
5821 }
5822
5823 static void
5824 kdebug_trace_dyld_internal(uint32_t base_code,
5825         struct dyld_kernel_image_info *info)
5826 {
5827         static_assert(sizeof(info->uuid) >= 16);
5828
5829 #if defined(__LP64__)
5830         uint64_t *uuid = (uint64_t *)&(info->uuid);
5831
5832         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5833                 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
5834                 uuid[1], info->load_addr,
5835                 (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
5836                 0);
5837         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5838                 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
5839                 (uint64_t)info->fsobjid.fid_objno |
5840                 ((uint64_t)info->fsobjid.fid_generation << 32),
5841                 0, 0, 0, 0);
5842 #else /* defined(__LP64__) */
5843         uint32_t *uuid = (uint32_t *)&(info->uuid);
5844
5845         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5846                 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
5847                 uuid[1], uuid[2], uuid[3], 0);
5848         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5849                 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
5850                 (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
5851                 info->fsobjid.fid_objno, 0);
5852         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5853                 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
5854                 info->fsobjid.fid_generation, 0, 0, 0, 0);
5855 #endif /* !defined(__LP64__) */
5856 }
5857
5858 static kern_return_t
5859 kdebug_trace_dyld(task_t task, uint32_t base_code,
5860         vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
5861 {
5862         kern_return_t kr;
5863         dyld_kernel_image_info_array_t infos;
5864         vm_map_offset_t map_data;
5865         vm_offset_t data;
5866
5867         if (!infos_copy) {
5868                 return KERN_INVALID_ADDRESS;
5869         }
5870
5871         if (!kdebug_enable ||
5872                 !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0)))
5873         {
5874                 vm_map_copy_discard(infos_copy);
5875                 return KERN_SUCCESS;
5876         }
5877
5878         if (task == NULL || task != current_task()) {
5879                 return KERN_INVALID_TASK;
5880         }
5881
5882         kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
5883         if (kr != KERN_SUCCESS) {
5884                 return kr;
5885         }
5886
5887         infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
5888
5889         for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
5890                 kdebug_trace_dyld_internal(base_code, &(infos[i]));
5891         }
5892
5893         data = CAST_DOWN(vm_offset_t, map_data);
5894         mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
5895         return KERN_SUCCESS;
5896 }
5897
5898 kern_return_t
5899 task_register_dyld_image_infos(task_t task,
5900                                dyld_kernel_image_info_array_t infos_copy,
5901                                mach_msg_type_number_t infos_len)
5902 {
5903         return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
5904                 (vm_map_copy_t)infos_copy, infos_len);
5905 }
5906
5907 kern_return_t
5908 task_unregister_dyld_image_infos(task_t task,
5909                                  dyld_kernel_image_info_array_t infos_copy,
5910                                  mach_msg_type_number_t infos_len)
5911 {
5912         return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
5913                 (vm_map_copy_t)infos_copy, infos_len);
5914 }
5915
5916 kern_return_t
5917 task_get_dyld_image_infos(__unused task_t task,
5918                           __unused dyld_kernel_image_info_array_t * dyld_images,
5919                           __unused mach_msg_type_number_t * dyld_imagesCnt)
5920 {
5921         return KERN_NOT_SUPPORTED;
5922 }
5923
5924 kern_return_t
5925 task_register_dyld_shared_cache_image_info(task_t task,
5926                                            dyld_kernel_image_info_t cache_img,
5927                                            __unused boolean_t no_cache,
5928                                            __unused boolean_t private_cache)
5929 {
5930         if (task == NULL || task != current_task()) {
5931                 return KERN_INVALID_TASK;
5932         }
5933
5934         kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
5935         return KERN_SUCCESS;
5936 }
5937
5938 kern_return_t
5939 task_register_dyld_set_dyld_state(__unused task_t task,
5940                                   __unused uint8_t dyld_state)
5941 {
5942         return KERN_NOT_SUPPORTED;
5943 }
5944
5945 kern_return_t
5946 task_register_dyld_get_process_state(__unused task_t task,
5947                                      __unused dyld_kernel_process_info_t * dyld_process_state)
5948 {
5949         return KERN_NOT_SUPPORTED;
5950 }
5951
5952 kern_return_t
5953 task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
5954                 task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
5955 {
5956 #if MONOTONIC
5957         task_t task = (task_t)task_insp;
5958         kern_return_t kr = KERN_SUCCESS;
5959         mach_msg_type_number_t size;
5960
5961         if (task == TASK_NULL) {
5962                 return KERN_INVALID_ARGUMENT;
5963         }
5964
5965         size = *size_in_out;
5966
5967         switch (flavor) {
5968         case TASK_INSPECT_BASIC_COUNTS: {
5969                 struct task_inspect_basic_counts *bc;
5970                 uint64_t task_counts[MT_CORE_NFIXED];
5971
5972                 if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
5973                         kr = KERN_INVALID_ARGUMENT;
5974                         break;
5975                 }
5976
5977                 mt_fixed_task_counts(task, task_counts);
5978                 bc = (struct task_inspect_basic_counts *)info_out;
5979 #ifdef MT_CORE_INSTRS
5980                 bc->instructions = task_counts[MT_CORE_INSTRS];
5981 #else /* defined(MT_CORE_INSTRS) */
5982                 bc->instructions = 0;
5983 #endif /* !defined(MT_CORE_INSTRS) */
5984                 bc->cycles = task_counts[MT_CORE_CYCLES];
5985                 size = TASK_INSPECT_BASIC_COUNTS_COUNT;
5986                 break;
5987         }
5988         default:
5989                 kr = KERN_INVALID_ARGUMENT;
5990                 break;
5991         }
5992
5993         if (kr == KERN_SUCCESS) {
5994                 *size_in_out = size;
5995         }
5996         return kr;
5997 #else /* MONOTONIC */
5998 #pragma unused(task_insp, flavor, info_out, size_in_out)
5999         return KERN_NOT_SUPPORTED;
6000 #endif /* !MONOTONIC */
6001 }
6002
6003 #if CONFIG_SECLUDED_MEMORY
6004 int num_tasks_can_use_secluded_mem = 0;
6005
6006 void
6007 task_set_can_use_secluded_mem(
6008         task_t          task,
6009         boolean_t       can_use_secluded_mem)
6010 {
6011         if (!task->task_could_use_secluded_mem) {
6012                 return;
6013         }
6014         task_lock(task);
6015         task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
6016         task_unlock(task);
6017 }
6018
6019 void
6020 task_set_can_use_secluded_mem_locked(
6021         task_t          task,
6022         boolean_t       can_use_secluded_mem)
6023 {
6024         assert(task->task_could_use_secluded_mem);
6025         if (can_use_secluded_mem &&
6026             secluded_for_apps && /* global boot-arg */
6027             !task->task_can_use_secluded_mem) {
6028                 assert(num_tasks_can_use_secluded_mem >= 0);
6029                 OSAddAtomic(+1,
6030                             (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
6031                 task->task_can_use_secluded_mem = TRUE;
6032         } else if (!can_use_secluded_mem &&
6033                    task->task_can_use_secluded_mem) {
6034                 assert(num_tasks_can_use_secluded_mem > 0);
6035                 OSAddAtomic(-1,
6036                             (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
6037                 task->task_can_use_secluded_mem = FALSE;
6038         }
6039 }
6040
6041 void
6042 task_set_could_use_secluded_mem(
6043         task_t          task,
6044         boolean_t       could_use_secluded_mem)
6045 {
6046         task->task_could_use_secluded_mem = could_use_secluded_mem;
6047 }
6048
6049 void
6050 task_set_could_also_use_secluded_mem(
6051         task_t          task,
6052         boolean_t       could_also_use_secluded_mem)
6053 {
6054         task->task_could_also_use_secluded_mem = could_also_use_secluded_mem;
6055 }
6056
6057 boolean_t
6058 task_can_use_secluded_mem(
6059         task_t  task)
6060 {
6061         if (task->task_can_use_secluded_mem) {
6062                 assert(task->task_could_use_secluded_mem);
6063                 assert(num_tasks_can_use_secluded_mem > 0);
6064                 return TRUE;
6065         }
6066         if (task->task_could_also_use_secluded_mem &&
6067             num_tasks_can_use_secluded_mem > 0) {
6068                 assert(num_tasks_can_use_secluded_mem > 0);
6069                 return TRUE;
6070         }
6071         return FALSE;
6072 }
6073
6074 boolean_t
6075 task_could_use_secluded_mem(
6076         task_t  task)
6077 {
6078         return task->task_could_use_secluded_mem;
6079 }
6080 #endif /* CONFIG_SECLUDED_MEMORY */
6081
6082 queue_head_t *
6083 task_io_user_clients(task_t task)
6084 {
6085     return (&task->io_user_clients);
6086 }
6087
6088 void
6089 task_copy_fields_for_exec(task_t dst_task, task_t src_task)
6090 {
6091         dst_task->vtimers = src_task->vtimers;
6092 }