osfmk/kern/task.c

   1 /*
   2  * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  *      File:   kern/task.c
  58  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
  59  *              David Black
  60  *
  61  *      Task management primitives implementation.
  62  */
  63 /*
  64  * Copyright (c) 1993 The University of Utah and
  65  * the Computer Systems Laboratory (CSL).  All rights reserved.
  66  *
  67  * Permission to use, copy, modify and distribute this software and its
  68  * documentation is hereby granted, provided that both the copyright
  69  * notice and this permission notice appear in all copies of the
  70  * software, derivative works or modified versions, and any portions
  71  * thereof, and that both notices appear in supporting documentation.
  72  *
  73  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  74  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  75  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  76  *
  77  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  78  * improvements that they make and grant CSL redistribution rights.
  79  *
  80  */
  81 /*
  82  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
  83  * support for mandatory and extensible security protections.  This notice
  84  * is included in support of clause 2.2 (b) of the Apple Public License,
  85  * Version 2.0.
  86  * Copyright (c) 2005 SPARTA, Inc.
  87  */
  88
  89 #include <mach/mach_types.h>
  90 #include <mach/boolean.h>
  91 #include <mach/host_priv.h>
  92 #include <mach/machine/vm_types.h>
  93 #include <mach/vm_param.h>
  94 #include <mach/mach_vm.h>
  95 #include <mach/semaphore.h>
  96 #include <mach/task_info.h>
  97 #include <mach/task_inspect.h>
  98 #include <mach/task_special_ports.h>
  99 #include <mach/sdt.h>
 100
 101 #include <ipc/ipc_importance.h>
 102 #include <ipc/ipc_types.h>
 103 #include <ipc/ipc_space.h>
 104 #include <ipc/ipc_entry.h>
 105 #include <ipc/ipc_hash.h>
 106
 107 #include <kern/kern_types.h>
 108 #include <kern/mach_param.h>
 109 #include <kern/misc_protos.h>
 110 #include <kern/task.h>
 111 #include <kern/thread.h>
 112 #include <kern/coalition.h>
 113 #include <kern/zalloc.h>
 114 #include <kern/kalloc.h>
 115 #include <kern/kern_cdata.h>
 116 #include <kern/processor.h>
 117 #include <kern/sched_prim.h>    /* for thread_wakeup */
 118 #include <kern/ipc_tt.h>
 119 #include <kern/host.h>
 120 #include <kern/clock.h>
 121 #include <kern/timer.h>
 122 #include <kern/assert.h>
 123 #include <kern/sync_lock.h>
 124 #include <kern/affinity.h>
 125 #include <kern/exc_resource.h>
 126 #include <kern/machine.h>
 127 #include <kern/policy_internal.h>
 128
 129 #include <corpses/task_corpse.h>
 130 #if CONFIG_TELEMETRY
 131 #include <kern/telemetry.h>
 132 #endif
 133
 134 #if MONOTONIC
 135 #include <kern/monotonic.h>
 136 #include <machine/monotonic.h>
 137 #endif /* MONOTONIC */
 138
 139 #include <os/log.h>
 140
 141 #include <vm/pmap.h>
 142 #include <vm/vm_map.h>
 143 #include <vm/vm_kern.h>         /* for kernel_map, ipc_kernel_map */
 144 #include <vm/vm_pageout.h>
 145 #include <vm/vm_protos.h>
 146 #include <vm/vm_purgeable_internal.h>
 147
 148 #include <sys/resource.h>
 149 #include <sys/signalvar.h> /* for coredump */
 150
 151 /*
 152  * Exported interfaces
 153  */
 154
 155 #include <mach/task_server.h>
 156 #include <mach/mach_host_server.h>
 157 #include <mach/host_security_server.h>
 158 #include <mach/mach_port_server.h>
 159
 160 #include <vm/vm_shared_region.h>
 161
 162 #include <libkern/OSDebug.h>
 163 #include <libkern/OSAtomic.h>
 164 #include <libkern/section_keywords.h>
 165
 166 #if CONFIG_ATM
 167 #include <atm/atm_internal.h>
 168 #endif
 169
 170 #include <kern/sfi.h>           /* picks up ledger.h */
 171
 172 #if CONFIG_MACF
 173 #include <security/mac_mach_internal.h>
 174 #endif
 175
 176 #if KPERF
 177 extern int kpc_force_all_ctrs(task_t, int);
 178 #endif
 179
 180 task_t                  kernel_task;
 181 zone_t                  task_zone;
 182 lck_attr_t      task_lck_attr;
 183 lck_grp_t       task_lck_grp;
 184 lck_grp_attr_t  task_lck_grp_attr;
 185
 186 extern int exc_via_corpse_forking;
 187 extern int corpse_for_fatal_memkill;
 188 extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
 189
 190 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
 191 int audio_active = 0;
 192
 193 zinfo_usage_store_t tasks_tkm_private;
 194 zinfo_usage_store_t tasks_tkm_shared;
 195
 196 /* A container to accumulate statistics for expired tasks */
 197 expired_task_statistics_t               dead_task_statistics;
 198 lck_spin_t              dead_task_statistics_lock;
 199
 200 ledger_template_t task_ledger_template = NULL;
 201
 202 SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
 203 {.cpu_time = -1,
 204  .tkm_private = -1,
 205  .tkm_shared = -1,
 206  .phys_mem = -1,
 207  .wired_mem = -1,
 208  .internal = -1,
 209  .iokit_mapped = -1,
 210  .alternate_accounting = -1,
 211  .alternate_accounting_compressed = -1,
 212  .page_table = -1,
 213  .phys_footprint = -1,
 214  .internal_compressed = -1,
 215  .purgeable_volatile = -1,
 216  .purgeable_nonvolatile = -1,
 217  .purgeable_volatile_compressed = -1,
 218  .purgeable_nonvolatile_compressed = -1,
 219  .network_volatile = -1,
 220  .network_nonvolatile = -1,
 221  .network_volatile_compressed = -1,
 222  .network_nonvolatile_compressed = -1,
 223  .platform_idle_wakeups = -1,
 224  .interrupt_wakeups = -1,
 225 #if !CONFIG_EMBEDDED
 226  .sfi_wait_times = { 0 /* initialized at runtime */},
 227 #endif /* !CONFIG_EMBEDDED */
 228  .cpu_time_billed_to_me = -1,
 229  .cpu_time_billed_to_others = -1,
 230  .physical_writes = -1,
 231  .logical_writes = -1,
 232  .energy_billed_to_me = -1,
 233  .energy_billed_to_others = -1,
 234  .pages_grabbed = -1,
 235  .pages_grabbed_kern = -1,
 236  .pages_grabbed_iopl = -1,
 237  .pages_grabbed_upl = -1};
 238
 239 /* System sleep state */
 240 boolean_t tasks_suspend_state;
 241
 242
 243 void init_task_ledgers(void);
 244 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 245 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 246 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
 247 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
 248 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
 249 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
 250
 251 kern_return_t task_suspend_internal(task_t);
 252 kern_return_t task_resume_internal(task_t);
 253 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
 254
 255 extern kern_return_t iokit_task_terminate(task_t task);
 256
 257 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
 258 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
 259 extern kern_return_t thread_resume(thread_t thread);
 260
 261 // Warn tasks when they hit 80% of their memory limit.
 262 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
 263
 264 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT              150 /* wakeups per second */
 265 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL   300 /* in seconds. */
 266
 267 /*
 268  * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
 269  *
 270  * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
 271  *  stacktraces, aka micro-stackshots)
 272  */
 273 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER        70
 274
 275 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
 276 int task_wakeups_monitor_rate;     /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
 277
 278 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 279
 280 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
 281
 282 ledger_amount_t max_task_footprint = 0;  /* Per-task limit on physical memory consumption in bytes     */
 283 int max_task_footprint_warning_level = 0;  /* Per-task limit warning percentage */
 284 int max_task_footprint_mb = 0;  /* Per-task limit on physical memory consumption in megabytes */
 285
 286 /* I/O Monitor Limits */
 287 #define IOMON_DEFAULT_LIMIT                     (20480ull)      /* MB of logical/physical I/O */
 288 #define IOMON_DEFAULT_INTERVAL                  (86400ull)      /* in seconds */
 289
 290 uint64_t task_iomon_limit_mb;           /* Per-task I/O monitor limit in MBs */
 291 uint64_t task_iomon_interval_secs;      /* Per-task I/O monitor interval in secs */
 292
 293 #define IO_TELEMETRY_DEFAULT_LIMIT              (10ll * 1024ll * 1024ll)
 294 int64_t io_telemetry_limit;                     /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
 295 int64_t global_logical_writes_count = 0;        /* Global count for logical writes */
 296 static boolean_t global_update_logical_writes(int64_t);
 297
 298 #define TASK_MAX_THREAD_LIMIT 256
 299
 300 #if MACH_ASSERT
 301 int pmap_ledgers_panic = 1;
 302 int pmap_ledgers_panic_leeway = 3;
 303 #endif /* MACH_ASSERT */
 304
 305 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
 306
 307 #if CONFIG_COREDUMP
 308 int hwm_user_cores = 0; /* high watermark violations generate user core files */
 309 #endif
 310
 311 #ifdef MACH_BSD
 312 extern void     proc_getexecutableuuid(void *, unsigned char *, unsigned long);
 313 extern int      proc_pid(struct proc *p);
 314 extern int      proc_selfpid(void);
 315 extern struct proc *current_proc(void);
 316 extern char     *proc_name_address(struct proc *p);
 317 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 318 extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, int bufsize);
 319 extern void workq_proc_suspended(struct proc *p);
 320 extern void workq_proc_resumed(struct proc *p);
 321
 322 #if CONFIG_MEMORYSTATUS
 323 extern void     proc_memstat_terminated(struct proc* p, boolean_t set);
 324 extern void     memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
 325 extern void     memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
 326 extern boolean_t memorystatus_allowed_vm_map_fork(task_t task);
 327
 328 #if DEVELOPMENT || DEBUG
 329 extern void memorystatus_abort_vm_map_fork(task_t);
 330 #endif
 331
 332 #endif /* CONFIG_MEMORYSTATUS */
 333
 334 #endif /* MACH_BSD */
 335
 336 #if DEVELOPMENT || DEBUG
 337 int exc_resource_threads_enabled;
 338 #endif /* DEVELOPMENT || DEBUG */
 339
 340 #if (DEVELOPMENT || DEBUG) && TASK_EXC_GUARD_DELIVER_CORPSE
 341 uint32_t task_exc_guard_default = TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_CORPSE |
 342     TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_CORPSE;
 343 #else
 344 uint32_t task_exc_guard_default = 0;
 345 #endif
 346
 347 /* Forwards */
 348
 349 static void task_hold_locked(task_t task);
 350 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
 351 static void task_release_locked(task_t task);
 352
 353 static void task_synchronizer_destroy_all(task_t task);
 354
 355
 356 void
 357 task_set_64bit(
 358         task_t task,
 359         boolean_t is_64bit,
 360         boolean_t is_64bit_data)
 361 {
 362 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
 363         thread_t thread;
 364 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
 365
 366         task_lock(task);
 367
 368         /*
 369          * Switching to/from 64-bit address spaces
 370          */
 371         if (is_64bit) {
 372                 if (!task_has_64Bit_addr(task)) {
 373                         task_set_64Bit_addr(task);
 374                 }
 375         } else {
 376                 if (task_has_64Bit_addr(task)) {
 377                         task_clear_64Bit_addr(task);
 378                 }
 379         }
 380
 381         /*
 382          * Switching to/from 64-bit register state.
 383          */
 384         if (is_64bit_data) {
 385                 if (task_has_64Bit_data(task)) {
 386                         goto out;
 387                 }
 388
 389                 task_set_64Bit_data(task);
 390         } else {
 391                 if (!task_has_64Bit_data(task)) {
 392                         goto out;
 393                 }
 394
 395                 task_clear_64Bit_data(task);
 396         }
 397
 398         /* FIXME: On x86, the thread save state flavor can diverge from the
 399          * task's 64-bit feature flag due to the 32-bit/64-bit register save
 400          * state dichotomy. Since we can be pre-empted in this interval,
 401          * certain routines may observe the thread as being in an inconsistent
 402          * state with respect to its task's 64-bitness.
 403          */
 404
 405 #if defined(__x86_64__) || defined(__arm64__)
 406         queue_iterate(&task->threads, thread, thread_t, task_threads) {
 407                 thread_mtx_lock(thread);
 408                 machine_thread_switch_addrmode(thread);
 409                 thread_mtx_unlock(thread);
 410
 411 #if defined(__arm64__)
 412                 /* specifically, if running on H9 */
 413                 if (thread == current_thread()) {
 414                         uint64_t arg1, arg2;
 415                         int urgency;
 416                         spl_t spl = splsched();
 417                         /*
 418                          * This call tell that the current thread changed it's 32bitness.
 419                          * Other thread were no more on core when 32bitness was changed,
 420                          * but current_thread() is on core and the previous call to
 421                          * machine_thread_going_on_core() gave 32bitness which is now wrong.
 422                          *
 423                          * This is needed for bring-up, a different callback should be used
 424                          * in the future.
 425                          *
 426                          * TODO: Remove this callout when we no longer support 32-bit code on H9
 427                          */
 428                         thread_lock(thread);
 429                         urgency = thread_get_urgency(thread, &arg1, &arg2);
 430                         machine_thread_going_on_core(thread, urgency, 0, 0, mach_approximate_time());
 431                         thread_unlock(thread);
 432                         splx(spl);
 433                 }
 434 #endif /* defined(__arm64__) */
 435         }
 436 #endif /* defined(__x86_64__) || defined(__arm64__) */
 437
 438 out:
 439         task_unlock(task);
 440 }
 441
 442 boolean_t
 443 task_get_64bit_data(task_t task)
 444 {
 445         return task_has_64Bit_data(task);
 446 }
 447
 448 void
 449 task_set_platform_binary(
 450         task_t task,
 451         boolean_t is_platform)
 452 {
 453         task_lock(task);
 454         if (is_platform) {
 455                 task->t_flags |= TF_PLATFORM;
 456         } else {
 457                 task->t_flags &= ~(TF_PLATFORM);
 458         }
 459         task_unlock(task);
 460 }
 461
 462 /*
 463  * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
 464  * Returns "false" if flag is already set, and "true" in other cases.
 465  */
 466 bool
 467 task_set_ca_client_wi(
 468         task_t task,
 469         boolean_t set_or_clear)
 470 {
 471         bool ret = true;
 472         task_lock(task);
 473         if (set_or_clear) {
 474                 /* Tasks can have only one CA_CLIENT work interval */
 475                 if (task->t_flags & TF_CA_CLIENT_WI) {
 476                         ret = false;
 477                 } else {
 478                         task->t_flags |= TF_CA_CLIENT_WI;
 479                 }
 480         } else {
 481                 task->t_flags &= ~TF_CA_CLIENT_WI;
 482         }
 483         task_unlock(task);
 484         return ret;
 485 }
 486
 487 void
 488 task_set_dyld_info(
 489         task_t task,
 490         mach_vm_address_t addr,
 491         mach_vm_size_t size)
 492 {
 493         task_lock(task);
 494         task->all_image_info_addr = addr;
 495         task->all_image_info_size = size;
 496         task_unlock(task);
 497 }
 498
 499 void
 500 task_atm_reset(__unused task_t task)
 501 {
 502 #if CONFIG_ATM
 503         if (task->atm_context != NULL) {
 504                 atm_task_descriptor_destroy(task->atm_context);
 505                 task->atm_context = NULL;
 506         }
 507 #endif
 508 }
 509
 510 void
 511 task_bank_reset(__unused task_t task)
 512 {
 513         if (task->bank_context != NULL) {
 514                 bank_task_destroy(task);
 515         }
 516 }
 517
 518 /*
 519  * NOTE: This should only be called when the P_LINTRANSIT
 520  *       flag is set (the proc_trans lock is held) on the
 521  *       proc associated with the task.
 522  */
 523 void
 524 task_bank_init(__unused task_t task)
 525 {
 526         if (task->bank_context != NULL) {
 527                 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
 528         }
 529         bank_task_initialize(task);
 530 }
 531
 532 void
 533 task_set_did_exec_flag(task_t task)
 534 {
 535         task->t_procflags |= TPF_DID_EXEC;
 536 }
 537
 538 void
 539 task_clear_exec_copy_flag(task_t task)
 540 {
 541         task->t_procflags &= ~TPF_EXEC_COPY;
 542 }
 543
 544 /*
 545  * This wait event is t_procflags instead of t_flags because t_flags is volatile
 546  *
 547  * TODO: store the flags in the same place as the event
 548  * rdar://problem/28501994
 549  */
 550 event_t
 551 task_get_return_wait_event(task_t task)
 552 {
 553         return (event_t)&task->t_procflags;
 554 }
 555
 556 void
 557 task_clear_return_wait(task_t task)
 558 {
 559         task_lock(task);
 560
 561         task->t_flags &= ~TF_LRETURNWAIT;
 562
 563         if (task->t_flags & TF_LRETURNWAITER) {
 564                 thread_wakeup(task_get_return_wait_event(task));
 565                 task->t_flags &= ~TF_LRETURNWAITER;
 566         }
 567
 568         task_unlock(task);
 569 }
 570
 571 void __attribute__((noreturn))
 572 task_wait_to_return(void)
 573 {
 574         task_t task;
 575
 576         task = current_task();
 577         task_lock(task);
 578
 579         if (task->t_flags & TF_LRETURNWAIT) {
 580                 do {
 581                         task->t_flags |= TF_LRETURNWAITER;
 582                         assert_wait(task_get_return_wait_event(task), THREAD_UNINT);
 583                         task_unlock(task);
 584
 585                         thread_block(THREAD_CONTINUE_NULL);
 586
 587                         task_lock(task);
 588                 } while (task->t_flags & TF_LRETURNWAIT);
 589         }
 590
 591         task_unlock(task);
 592
 593 #if CONFIG_MACF
 594         /*
 595          * Before jumping to userspace and allowing this process to execute any code,
 596          * notify any interested parties.
 597          */
 598         mac_proc_notify_exec_complete(current_proc());
 599 #endif
 600
 601         thread_bootstrap_return();
 602 }
 603
 604 #ifdef CONFIG_32BIT_TELEMETRY
 605 boolean_t
 606 task_consume_32bit_log_flag(task_t task)
 607 {
 608         if ((task->t_procflags & TPF_LOG_32BIT_TELEMETRY) != 0) {
 609                 task->t_procflags &= ~TPF_LOG_32BIT_TELEMETRY;
 610                 return TRUE;
 611         } else {
 612                 return FALSE;
 613         }
 614 }
 615
 616 void
 617 task_set_32bit_log_flag(task_t task)
 618 {
 619         task->t_procflags |= TPF_LOG_32BIT_TELEMETRY;
 620 }
 621 #endif /* CONFIG_32BIT_TELEMETRY */
 622
 623 boolean_t
 624 task_is_exec_copy(task_t task)
 625 {
 626         return task_is_exec_copy_internal(task);
 627 }
 628
 629 boolean_t
 630 task_did_exec(task_t task)
 631 {
 632         return task_did_exec_internal(task);
 633 }
 634
 635 boolean_t
 636 task_is_active(task_t task)
 637 {
 638         return task->active;
 639 }
 640
 641 boolean_t
 642 task_is_halting(task_t task)
 643 {
 644         return task->halting;
 645 }
 646
 647 #if TASK_REFERENCE_LEAK_DEBUG
 648 #include <kern/btlog.h>
 649
 650 static btlog_t *task_ref_btlog;
 651 #define TASK_REF_OP_INCR        0x1
 652 #define TASK_REF_OP_DECR        0x2
 653
 654 #define TASK_REF_NUM_RECORDS    100000
 655 #define TASK_REF_BTDEPTH        7
 656
 657 void
 658 task_reference_internal(task_t task)
 659 {
 660         void *       bt[TASK_REF_BTDEPTH];
 661         int             numsaved = 0;
 662
 663         os_ref_retain(&task->ref_count);
 664
 665         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 666         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
 667             bt, numsaved);
 668 }
 669
 670 os_ref_count_t
 671 task_deallocate_internal(task_t task)
 672 {
 673         void *       bt[TASK_REF_BTDEPTH];
 674         int             numsaved = 0;
 675
 676         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 677         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
 678             bt, numsaved);
 679
 680         return os_ref_release(&task->ref_count);
 681 }
 682
 683 #endif /* TASK_REFERENCE_LEAK_DEBUG */
 684
 685 void
 686 task_init(void)
 687 {
 688         lck_grp_attr_setdefault(&task_lck_grp_attr);
 689         lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
 690         lck_attr_setdefault(&task_lck_attr);
 691         lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
 692         lck_mtx_init(&tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
 693
 694         task_zone = zinit(
 695                 sizeof(struct task),
 696                 task_max * sizeof(struct task),
 697                 TASK_CHUNK * sizeof(struct task),
 698                 "tasks");
 699
 700         zone_change(task_zone, Z_NOENCRYPT, TRUE);
 701
 702 #if CONFIG_EMBEDDED
 703         task_watch_init();
 704 #endif /* CONFIG_EMBEDDED */
 705
 706         /*
 707          * Configure per-task memory limit.
 708          * The boot-arg is interpreted as Megabytes,
 709          * and takes precedence over the device tree.
 710          * Setting the boot-arg to 0 disables task limits.
 711          */
 712         if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
 713             sizeof(max_task_footprint_mb))) {
 714                 /*
 715                  * No limit was found in boot-args, so go look in the device tree.
 716                  */
 717                 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
 718                     sizeof(max_task_footprint_mb))) {
 719                         /*
 720                          * No limit was found in device tree.
 721                          */
 722                         max_task_footprint_mb = 0;
 723                 }
 724         }
 725
 726         if (max_task_footprint_mb != 0) {
 727 #if CONFIG_MEMORYSTATUS
 728                 if (max_task_footprint_mb < 50) {
 729                         printf("Warning: max_task_pmem %d below minimum.\n",
 730                             max_task_footprint_mb);
 731                         max_task_footprint_mb = 50;
 732                 }
 733                 printf("Limiting task physical memory footprint to %d MB\n",
 734                     max_task_footprint_mb);
 735
 736                 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
 737
 738                 /*
 739                  * Configure the per-task memory limit warning level.
 740                  * This is computed as a percentage.
 741                  */
 742                 max_task_footprint_warning_level = 0;
 743
 744                 if (max_mem < 0x40000000) {
 745                         /*
 746                          * On devices with < 1GB of memory:
 747                          *    -- set warnings to 50MB below the per-task limit.
 748                          */
 749                         if (max_task_footprint_mb > 50) {
 750                                 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
 751                         }
 752                 } else {
 753                         /*
 754                          * On devices with >= 1GB of memory:
 755                          *    -- set warnings to 100MB below the per-task limit.
 756                          */
 757                         if (max_task_footprint_mb > 100) {
 758                                 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
 759                         }
 760                 }
 761
 762                 /*
 763                  * Never allow warning level to land below the default.
 764                  */
 765                 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
 766                         max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
 767                 }
 768
 769                 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
 770
 771 #else
 772                 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
 773 #endif /* CONFIG_MEMORYSTATUS */
 774         }
 775
 776 #if DEVELOPMENT || DEBUG
 777         if (!PE_parse_boot_argn("exc_resource_threads",
 778             &exc_resource_threads_enabled,
 779             sizeof(exc_resource_threads_enabled))) {
 780                 exc_resource_threads_enabled = 1;
 781         }
 782         PE_parse_boot_argn("task_exc_guard_default",
 783             &task_exc_guard_default,
 784             sizeof(task_exc_guard_default));
 785 #endif /* DEVELOPMENT || DEBUG */
 786
 787 #if CONFIG_COREDUMP
 788         if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
 789             sizeof(hwm_user_cores))) {
 790                 hwm_user_cores = 0;
 791         }
 792 #endif
 793
 794         proc_init_cpumon_params();
 795
 796         if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof(task_wakeups_monitor_rate))) {
 797                 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
 798         }
 799
 800         if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof(task_wakeups_monitor_interval))) {
 801                 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
 802         }
 803
 804         if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
 805             sizeof(task_wakeups_monitor_ustackshots_trigger_pct))) {
 806                 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
 807         }
 808
 809         if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
 810             sizeof(disable_exc_resource))) {
 811                 disable_exc_resource = 0;
 812         }
 813
 814         if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof(task_iomon_limit_mb))) {
 815                 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
 816         }
 817
 818         if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof(task_iomon_interval_secs))) {
 819                 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
 820         }
 821
 822         if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof(io_telemetry_limit))) {
 823                 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
 824         }
 825
 826 /*
 827  * If we have coalitions, coalition_init() will call init_task_ledgers() as it
 828  * sets up the ledgers for the default coalition. If we don't have coalitions,
 829  * then we have to call it now.
 830  */
 831 #if CONFIG_COALITIONS
 832         assert(task_ledger_template);
 833 #else /* CONFIG_COALITIONS */
 834         init_task_ledgers();
 835 #endif /* CONFIG_COALITIONS */
 836
 837 #if TASK_REFERENCE_LEAK_DEBUG
 838         task_ref_btlog = btlog_create(TASK_REF_NUM_RECORDS, TASK_REF_BTDEPTH, TRUE /* caller_will_remove_entries_for_element? */);
 839         assert(task_ref_btlog);
 840 #endif
 841
 842         /*
 843          * Create the kernel task as the first task.
 844          */
 845 #ifdef __LP64__
 846         if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TRUE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)
 847 #else
 848         if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, FALSE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)
 849 #endif
 850         { panic("task_init\n");}
 851
 852
 853         vm_map_deallocate(kernel_task->map);
 854         kernel_task->map = kernel_map;
 855         lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
 856 }
 857
 858 /*
 859  * Create a task running in the kernel address space.  It may
 860  * have its own map of size mem_size and may have ipc privileges.
 861  */
 862 kern_return_t
 863 kernel_task_create(
 864         __unused task_t         parent_task,
 865         __unused vm_offset_t            map_base,
 866         __unused vm_size_t              map_size,
 867         __unused task_t         *child_task)
 868 {
 869         return KERN_INVALID_ARGUMENT;
 870 }
 871
 872 kern_return_t
 873 task_create(
 874         task_t                          parent_task,
 875         __unused ledger_port_array_t    ledger_ports,
 876         __unused mach_msg_type_number_t num_ledger_ports,
 877         __unused boolean_t              inherit_memory,
 878         __unused task_t                 *child_task)    /* OUT */
 879 {
 880         if (parent_task == TASK_NULL) {
 881                 return KERN_INVALID_ARGUMENT;
 882         }
 883
 884         /*
 885          * No longer supported: too many calls assume that a task has a valid
 886          * process attached.
 887          */
 888         return KERN_FAILURE;
 889 }
 890
 891 kern_return_t
 892 host_security_create_task_token(
 893         host_security_t                 host_security,
 894         task_t                          parent_task,
 895         __unused security_token_t       sec_token,
 896         __unused audit_token_t          audit_token,
 897         __unused host_priv_t            host_priv,
 898         __unused ledger_port_array_t    ledger_ports,
 899         __unused mach_msg_type_number_t num_ledger_ports,
 900         __unused boolean_t              inherit_memory,
 901         __unused task_t                 *child_task)    /* OUT */
 902 {
 903         if (parent_task == TASK_NULL) {
 904                 return KERN_INVALID_ARGUMENT;
 905         }
 906
 907         if (host_security == HOST_NULL) {
 908                 return KERN_INVALID_SECURITY;
 909         }
 910
 911         /*
 912          * No longer supported.
 913          */
 914         return KERN_FAILURE;
 915 }
 916
 917 /*
 918  * Task ledgers
 919  * ------------
 920  *
 921  * phys_footprint
 922  *   Physical footprint: This is the sum of:
 923  *     + (internal - alternate_accounting)
 924  *     + (internal_compressed - alternate_accounting_compressed)
 925  *     + iokit_mapped
 926  *     + purgeable_nonvolatile
 927  *     + purgeable_nonvolatile_compressed
 928  *     + page_table
 929  *
 930  * internal
 931  *   The task's anonymous memory, which on iOS is always resident.
 932  *
 933  * internal_compressed
 934  *   Amount of this task's internal memory which is held by the compressor.
 935  *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
 936  *   and could be either decompressed back into memory, or paged out to storage, depending
 937  *   on our implementation.
 938  *
 939  * iokit_mapped
 940  *   IOKit mappings: The total size of all IOKit mappings in this task, regardless of
 941  *    clean/dirty or internal/external state].
 942  *
 943  * alternate_accounting
 944  *   The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
 945  *   are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
 946  *   double counting.
 947  *
 948  * pages_grabbed
 949  *   pages_grabbed counts all page grabs in a task.  It is also broken out into three subtypes
 950  *   which track UPL, IOPL and Kernel page grabs.
 951  */
 952 void
 953 init_task_ledgers(void)
 954 {
 955         ledger_template_t t;
 956
 957         assert(task_ledger_template == NULL);
 958         assert(kernel_task == TASK_NULL);
 959
 960 #if MACH_ASSERT
 961         PE_parse_boot_argn("pmap_ledgers_panic",
 962             &pmap_ledgers_panic,
 963             sizeof(pmap_ledgers_panic));
 964         PE_parse_boot_argn("pmap_ledgers_panic_leeway",
 965             &pmap_ledgers_panic_leeway,
 966             sizeof(pmap_ledgers_panic_leeway));
 967 #endif /* MACH_ASSERT */
 968
 969         if ((t = ledger_template_create("Per-task ledger")) == NULL) {
 970                 panic("couldn't create task ledger template");
 971         }
 972
 973         task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
 974         task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
 975             "physmem", "bytes");
 976         task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
 977             "bytes");
 978         task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
 979             "bytes");
 980         task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
 981             "bytes");
 982         task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
 983             "bytes");
 984         task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
 985             "bytes");
 986         task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
 987             "bytes");
 988         task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
 989             "bytes");
 990         task_ledgers.page_table = ledger_entry_add(t, "page_table", "physmem",
 991             "bytes");
 992         task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
 993             "bytes");
 994         task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
 995             "bytes");
 996         task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
 997         task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
 998         task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
 999         task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
1000         task_ledgers.pages_grabbed = ledger_entry_add(t, "pages_grabbed", "physmem", "count");
1001         task_ledgers.pages_grabbed_kern = ledger_entry_add(t, "pages_grabbed_kern", "physmem", "count");
1002         task_ledgers.pages_grabbed_iopl = ledger_entry_add(t, "pages_grabbed_iopl", "physmem", "count");
1003         task_ledgers.pages_grabbed_upl = ledger_entry_add(t, "pages_grabbed_upl", "physmem", "count");
1004
1005         task_ledgers.network_volatile = ledger_entry_add(t, "network_volatile", "physmem", "bytes");
1006         task_ledgers.network_nonvolatile = ledger_entry_add(t, "network_nonvolatile", "physmem", "bytes");
1007         task_ledgers.network_volatile_compressed = ledger_entry_add(t, "network_volatile_compressed", "physmem", "bytes");
1008         task_ledgers.network_nonvolatile_compressed = ledger_entry_add(t, "network_nonvolatile_compressed", "physmem", "bytes");
1009
1010         task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
1011             "count");
1012         task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
1013             "count");
1014
1015 #if CONFIG_SCHED_SFI
1016         sfi_class_id_t class_id, ledger_alias;
1017         for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1018                 task_ledgers.sfi_wait_times[class_id] = -1;
1019         }
1020
1021         /* don't account for UNSPECIFIED */
1022         for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
1023                 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1024                 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1025                         /* Check to see if alias has been registered yet */
1026                         if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
1027                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1028                         } else {
1029                                 /* Otherwise, initialize it first */
1030                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
1031                         }
1032                 } else {
1033                         task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
1034                 }
1035
1036                 if (task_ledgers.sfi_wait_times[class_id] < 0) {
1037                         panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1038                 }
1039         }
1040
1041         assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID - 1] != -1);
1042 #endif /* CONFIG_SCHED_SFI */
1043
1044         task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
1045         task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
1046         task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
1047         task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
1048         task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
1049         task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
1050
1051         if ((task_ledgers.cpu_time < 0) ||
1052             (task_ledgers.tkm_private < 0) ||
1053             (task_ledgers.tkm_shared < 0) ||
1054             (task_ledgers.phys_mem < 0) ||
1055             (task_ledgers.wired_mem < 0) ||
1056             (task_ledgers.internal < 0) ||
1057             (task_ledgers.iokit_mapped < 0) ||
1058             (task_ledgers.alternate_accounting < 0) ||
1059             (task_ledgers.alternate_accounting_compressed < 0) ||
1060             (task_ledgers.page_table < 0) ||
1061             (task_ledgers.phys_footprint < 0) ||
1062             (task_ledgers.internal_compressed < 0) ||
1063             (task_ledgers.purgeable_volatile < 0) ||
1064             (task_ledgers.purgeable_nonvolatile < 0) ||
1065             (task_ledgers.purgeable_volatile_compressed < 0) ||
1066             (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
1067             (task_ledgers.network_volatile < 0) ||
1068             (task_ledgers.network_nonvolatile < 0) ||
1069             (task_ledgers.network_volatile_compressed < 0) ||
1070             (task_ledgers.network_nonvolatile_compressed < 0) ||
1071             (task_ledgers.platform_idle_wakeups < 0) ||
1072             (task_ledgers.interrupt_wakeups < 0) ||
1073             (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
1074             (task_ledgers.physical_writes < 0) ||
1075             (task_ledgers.logical_writes < 0) ||
1076             (task_ledgers.energy_billed_to_me < 0) ||
1077             (task_ledgers.energy_billed_to_others < 0)
1078             ) {
1079                 panic("couldn't create entries for task ledger template");
1080         }
1081
1082         ledger_track_credit_only(t, task_ledgers.phys_footprint);
1083         ledger_track_credit_only(t, task_ledgers.page_table);
1084         ledger_track_credit_only(t, task_ledgers.internal);
1085         ledger_track_credit_only(t, task_ledgers.internal_compressed);
1086         ledger_track_credit_only(t, task_ledgers.iokit_mapped);
1087         ledger_track_credit_only(t, task_ledgers.alternate_accounting);
1088         ledger_track_credit_only(t, task_ledgers.alternate_accounting_compressed);
1089         ledger_track_credit_only(t, task_ledgers.purgeable_volatile);
1090         ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile);
1091         ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed);
1092         ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed);
1093         ledger_track_credit_only(t, task_ledgers.pages_grabbed);
1094         ledger_track_credit_only(t, task_ledgers.pages_grabbed_kern);
1095         ledger_track_credit_only(t, task_ledgers.pages_grabbed_iopl);
1096         ledger_track_credit_only(t, task_ledgers.pages_grabbed_upl);
1097
1098         ledger_track_credit_only(t, task_ledgers.network_volatile);
1099         ledger_track_credit_only(t, task_ledgers.network_nonvolatile);
1100         ledger_track_credit_only(t, task_ledgers.network_volatile_compressed);
1101         ledger_track_credit_only(t, task_ledgers.network_nonvolatile_compressed);
1102
1103         ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
1104 #if MACH_ASSERT
1105         if (pmap_ledgers_panic) {
1106                 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
1107                 ledger_panic_on_negative(t, task_ledgers.page_table);
1108                 ledger_panic_on_negative(t, task_ledgers.internal);
1109                 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
1110                 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1111                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1112                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1113                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1114                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1115                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1116                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1117
1118                 ledger_panic_on_negative(t, task_ledgers.network_volatile);
1119                 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1120                 ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1121                 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
1122         }
1123 #endif /* MACH_ASSERT */
1124
1125 #if CONFIG_MEMORYSTATUS
1126         ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
1127 #endif /* CONFIG_MEMORYSTATUS */
1128
1129         ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1130             task_wakeups_rate_exceeded, NULL, NULL);
1131         ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1132         ledger_set_callback(t, task_ledgers.logical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_LOGICAL_WRITES, NULL);
1133
1134         ledger_template_complete(t);
1135         task_ledger_template = t;
1136 }
1137
1138 os_refgrp_decl(static, task_refgrp, "task", NULL);
1139
1140 kern_return_t
1141 task_create_internal(
1142         task_t          parent_task,
1143         coalition_t     *parent_coalitions __unused,
1144         boolean_t       inherit_memory,
1145         __unused boolean_t      is_64bit,
1146         boolean_t is_64bit_data,
1147         uint32_t        t_flags,
1148         uint32_t        t_procflags,
1149         task_t          *child_task)            /* OUT */
1150 {
1151         task_t                  new_task;
1152         vm_shared_region_t      shared_region;
1153         ledger_t                ledger = NULL;
1154
1155         new_task = (task_t) zalloc(task_zone);
1156
1157         if (new_task == TASK_NULL) {
1158                 return KERN_RESOURCE_SHORTAGE;
1159         }
1160
1161         /* one ref for just being alive; one for our caller */
1162         os_ref_init_count(&new_task->ref_count, &task_refgrp, 2);
1163
1164         /* allocate with active entries */
1165         assert(task_ledger_template != NULL);
1166         if ((ledger = ledger_instantiate(task_ledger_template,
1167             LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
1168                 zfree(task_zone, new_task);
1169                 return KERN_RESOURCE_SHORTAGE;
1170         }
1171
1172
1173         new_task->ledger = ledger;
1174
1175 #if defined(CONFIG_SCHED_MULTIQ)
1176         new_task->sched_group = sched_group_create();
1177 #endif
1178
1179         /* if inherit_memory is true, parent_task MUST not be NULL */
1180         if (!(t_flags & TF_CORPSE_FORK) && inherit_memory) {
1181                 new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1182         } else {
1183                 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
1184                     (vm_map_offset_t)(VM_MIN_ADDRESS),
1185                     (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
1186         }
1187
1188         /* Inherit memlock limit from parent */
1189         if (parent_task) {
1190                 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
1191         }
1192
1193         lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1194         queue_init(&new_task->threads);
1195         new_task->suspend_count = 0;
1196         new_task->thread_count = 0;
1197         new_task->active_thread_count = 0;
1198         new_task->user_stop_count = 0;
1199         new_task->legacy_stop_count = 0;
1200         new_task->active = TRUE;
1201         new_task->halting = FALSE;
1202         new_task->priv_flags = 0;
1203         new_task->t_flags = t_flags;
1204         new_task->t_procflags = t_procflags;
1205         new_task->importance = 0;
1206         new_task->crashed_thread_id = 0;
1207         new_task->exec_token = 0;
1208
1209         new_task->task_exc_guard = task_exc_guard_default;
1210
1211 #if CONFIG_ATM
1212         new_task->atm_context = NULL;
1213 #endif
1214         new_task->bank_context = NULL;
1215
1216 #ifdef MACH_BSD
1217         new_task->bsd_info = NULL;
1218         new_task->corpse_info = NULL;
1219 #endif /* MACH_BSD */
1220
1221 #if CONFIG_MACF
1222         new_task->crash_label = NULL;
1223 #endif
1224
1225 #if CONFIG_MEMORYSTATUS
1226         if (max_task_footprint != 0) {
1227                 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1228         }
1229 #endif /* CONFIG_MEMORYSTATUS */
1230
1231         if (task_wakeups_monitor_rate != 0) {
1232                 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1233                 int32_t  rate; // Ignored because of WAKEMON_SET_DEFAULTS
1234                 task_wakeups_monitor_ctl(new_task, &flags, &rate);
1235         }
1236
1237 #if CONFIG_IO_ACCOUNTING
1238         uint32_t flags = IOMON_ENABLE;
1239         task_io_monitor_ctl(new_task, &flags);
1240 #endif /* CONFIG_IO_ACCOUNTING */
1241
1242         machine_task_init(new_task, parent_task, inherit_memory);
1243
1244         new_task->task_debug = NULL;
1245
1246 #if DEVELOPMENT || DEBUG
1247         new_task->task_unnested = FALSE;
1248         new_task->task_disconnected_count = 0;
1249 #endif
1250         queue_init(&new_task->semaphore_list);
1251         new_task->semaphores_owned = 0;
1252
1253         ipc_task_init(new_task, parent_task);
1254
1255         new_task->vtimers = 0;
1256
1257         new_task->shared_region = NULL;
1258
1259         new_task->affinity_space = NULL;
1260
1261         new_task->t_kpc = 0;
1262
1263         new_task->pidsuspended = FALSE;
1264         new_task->frozen = FALSE;
1265         new_task->changing_freeze_state = FALSE;
1266         new_task->rusage_cpu_flags = 0;
1267         new_task->rusage_cpu_percentage = 0;
1268         new_task->rusage_cpu_interval = 0;
1269         new_task->rusage_cpu_deadline = 0;
1270         new_task->rusage_cpu_callt = NULL;
1271 #if MACH_ASSERT
1272         new_task->suspends_outstanding = 0;
1273 #endif
1274
1275 #if HYPERVISOR
1276         new_task->hv_task_target = NULL;
1277 #endif /* HYPERVISOR */
1278
1279 #if CONFIG_EMBEDDED
1280         queue_init(&new_task->task_watchers);
1281         new_task->num_taskwatchers  = 0;
1282         new_task->watchapplying  = 0;
1283 #endif /* CONFIG_EMBEDDED */
1284
1285         new_task->mem_notify_reserved = 0;
1286         new_task->memlimit_attrs_reserved = 0;
1287
1288         new_task->requested_policy = default_task_requested_policy;
1289         new_task->effective_policy = default_task_effective_policy;
1290
1291         task_importance_init_from_parent(new_task, parent_task);
1292
1293         if (parent_task != TASK_NULL) {
1294                 new_task->sec_token = parent_task->sec_token;
1295                 new_task->audit_token = parent_task->audit_token;
1296
1297                 /* inherit the parent's shared region */
1298                 shared_region = vm_shared_region_get(parent_task);
1299                 vm_shared_region_set(new_task, shared_region);
1300
1301                 if (task_has_64Bit_addr(parent_task)) {
1302                         task_set_64Bit_addr(new_task);
1303                 }
1304
1305                 if (task_has_64Bit_data(parent_task)) {
1306                         task_set_64Bit_data(new_task);
1307                 }
1308
1309                 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1310                 new_task->all_image_info_size = parent_task->all_image_info_size;
1311
1312                 if (inherit_memory && parent_task->affinity_space) {
1313                         task_affinity_create(parent_task, new_task);
1314                 }
1315
1316                 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1317
1318 #if DEBUG || DEVELOPMENT
1319                 if (parent_task->t_flags & TF_NO_SMT) {
1320                         new_task->t_flags |= TF_NO_SMT;
1321                 }
1322 #endif
1323
1324                 new_task->priority = BASEPRI_DEFAULT;
1325                 new_task->max_priority = MAXPRI_USER;
1326
1327                 task_policy_create(new_task, parent_task);
1328         } else {
1329                 new_task->sec_token = KERNEL_SECURITY_TOKEN;
1330                 new_task->audit_token = KERNEL_AUDIT_TOKEN;
1331 #ifdef __LP64__
1332                 if (is_64bit) {
1333                         task_set_64Bit_addr(new_task);
1334                 }
1335 #endif
1336
1337                 if (is_64bit_data) {
1338                         task_set_64Bit_data(new_task);
1339                 }
1340
1341                 new_task->all_image_info_addr = (mach_vm_address_t)0;
1342                 new_task->all_image_info_size = (mach_vm_size_t)0;
1343
1344                 new_task->pset_hint = PROCESSOR_SET_NULL;
1345
1346                 if (kernel_task == TASK_NULL) {
1347                         new_task->priority = BASEPRI_KERNEL;
1348                         new_task->max_priority = MAXPRI_KERNEL;
1349                 } else {
1350                         new_task->priority = BASEPRI_DEFAULT;
1351                         new_task->max_priority = MAXPRI_USER;
1352                 }
1353         }
1354
1355         bzero(new_task->coalition, sizeof(new_task->coalition));
1356         for (int i = 0; i < COALITION_NUM_TYPES; i++) {
1357                 queue_chain_init(new_task->task_coalition[i]);
1358         }
1359
1360         /* Allocate I/O Statistics */
1361         new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1362         assert(new_task->task_io_stats != NULL);
1363         bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1364
1365         bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1366         bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
1367
1368         bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1369
1370         /* Copy resource acc. info from Parent for Corpe Forked task. */
1371         if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1372                 task_rollup_accounting_info(new_task, parent_task);
1373         } else {
1374                 /* Initialize to zero for standard fork/spawn case */
1375                 new_task->total_user_time = 0;
1376                 new_task->total_system_time = 0;
1377                 new_task->total_ptime = 0;
1378                 new_task->total_runnable_time = 0;
1379                 new_task->faults = 0;
1380                 new_task->pageins = 0;
1381                 new_task->cow_faults = 0;
1382                 new_task->messages_sent = 0;
1383                 new_task->messages_received = 0;
1384                 new_task->syscalls_mach = 0;
1385                 new_task->syscalls_unix = 0;
1386                 new_task->c_switch = 0;
1387                 new_task->p_switch = 0;
1388                 new_task->ps_switch = 0;
1389                 new_task->low_mem_notified_warn = 0;
1390                 new_task->low_mem_notified_critical = 0;
1391                 new_task->purged_memory_warn = 0;
1392                 new_task->purged_memory_critical = 0;
1393                 new_task->low_mem_privileged_listener = 0;
1394                 new_task->memlimit_is_active = 0;
1395                 new_task->memlimit_is_fatal = 0;
1396                 new_task->memlimit_active_exc_resource = 0;
1397                 new_task->memlimit_inactive_exc_resource = 0;
1398                 new_task->task_timer_wakeups_bin_1 = 0;
1399                 new_task->task_timer_wakeups_bin_2 = 0;
1400                 new_task->task_gpu_ns = 0;
1401                 new_task->task_immediate_writes = 0;
1402                 new_task->task_deferred_writes = 0;
1403                 new_task->task_invalidated_writes = 0;
1404                 new_task->task_metadata_writes = 0;
1405                 new_task->task_energy = 0;
1406 #if MONOTONIC
1407                 memset(&new_task->task_monotonic, 0, sizeof(new_task->task_monotonic));
1408 #endif /* MONOTONIC */
1409         }
1410
1411
1412 #if CONFIG_COALITIONS
1413         if (!(t_flags & TF_CORPSE_FORK)) {
1414                 /* TODO: there is no graceful failure path here... */
1415                 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1416                         coalitions_adopt_task(parent_coalitions, new_task);
1417                 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1418                         /*
1419                          * all tasks at least have a resource coalition, so
1420                          * if the parent has one then inherit all coalitions
1421                          * the parent is a part of
1422                          */
1423                         coalitions_adopt_task(parent_task->coalition, new_task);
1424                 } else {
1425                         /* TODO: assert that new_task will be PID 1 (launchd) */
1426                         coalitions_adopt_init_task(new_task);
1427                 }
1428                 /*
1429                  * on exec, we need to transfer the coalition roles from the
1430                  * parent task to the exec copy task.
1431                  */
1432                 if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1433                         int coal_roles[COALITION_NUM_TYPES];
1434                         task_coalition_roles(parent_task, coal_roles);
1435                         (void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1436                 }
1437         } else {
1438                 coalitions_adopt_corpse_task(new_task);
1439         }
1440
1441         if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1442                 panic("created task is not a member of a resource coalition");
1443         }
1444 #endif /* CONFIG_COALITIONS */
1445
1446         new_task->dispatchqueue_offset = 0;
1447         if (parent_task != NULL) {
1448                 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1449         }
1450
1451         new_task->task_volatile_objects = 0;
1452         new_task->task_nonvolatile_objects = 0;
1453         new_task->task_purgeable_disowning = FALSE;
1454         new_task->task_purgeable_disowned = FALSE;
1455         queue_init(&new_task->task_objq);
1456         task_objq_lock_init(new_task);
1457
1458 #if __arm64__
1459         new_task->task_legacy_footprint = FALSE;
1460 #endif /* __arm64__ */
1461         new_task->task_region_footprint = FALSE;
1462         new_task->task_has_crossed_thread_limit = FALSE;
1463         new_task->task_thread_limit = 0;
1464 #if CONFIG_SECLUDED_MEMORY
1465         new_task->task_can_use_secluded_mem = FALSE;
1466         new_task->task_could_use_secluded_mem = FALSE;
1467         new_task->task_could_also_use_secluded_mem = FALSE;
1468         new_task->task_suppressed_secluded = FALSE;
1469 #endif /* CONFIG_SECLUDED_MEMORY */
1470
1471         /*
1472          * t_flags is set up above. But since we don't
1473          * support darkwake mode being set that way
1474          * currently, we clear it out here explicitly.
1475          */
1476         new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1477
1478         queue_init(&new_task->io_user_clients);
1479
1480         ipc_task_enable(new_task);
1481
1482         lck_mtx_lock(&tasks_threads_lock);
1483         queue_enter(&tasks, new_task, task_t, tasks);
1484         tasks_count++;
1485         if (tasks_suspend_state) {
1486                 task_suspend_internal(new_task);
1487         }
1488         lck_mtx_unlock(&tasks_threads_lock);
1489
1490         *child_task = new_task;
1491         return KERN_SUCCESS;
1492 }
1493
1494 /*
1495  *      task_rollup_accounting_info
1496  *
1497  *      Roll up accounting stats. Used to rollup stats
1498  *      for exec copy task and corpse fork.
1499  */
1500 void
1501 task_rollup_accounting_info(task_t to_task, task_t from_task)
1502 {
1503         assert(from_task != to_task);
1504
1505         to_task->total_user_time = from_task->total_user_time;
1506         to_task->total_system_time = from_task->total_system_time;
1507         to_task->total_ptime = from_task->total_ptime;
1508         to_task->total_runnable_time = from_task->total_runnable_time;
1509         to_task->faults = from_task->faults;
1510         to_task->pageins = from_task->pageins;
1511         to_task->cow_faults = from_task->cow_faults;
1512         to_task->messages_sent = from_task->messages_sent;
1513         to_task->messages_received = from_task->messages_received;
1514         to_task->syscalls_mach = from_task->syscalls_mach;
1515         to_task->syscalls_unix = from_task->syscalls_unix;
1516         to_task->c_switch = from_task->c_switch;
1517         to_task->p_switch = from_task->p_switch;
1518         to_task->ps_switch = from_task->ps_switch;
1519         to_task->extmod_statistics = from_task->extmod_statistics;
1520         to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1521         to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1522         to_task->purged_memory_warn = from_task->purged_memory_warn;
1523         to_task->purged_memory_critical = from_task->purged_memory_critical;
1524         to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1525         *to_task->task_io_stats = *from_task->task_io_stats;
1526         to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1527         to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1528         to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1529         to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1530         to_task->task_gpu_ns = from_task->task_gpu_ns;
1531         to_task->task_immediate_writes = from_task->task_immediate_writes;
1532         to_task->task_deferred_writes = from_task->task_deferred_writes;
1533         to_task->task_invalidated_writes = from_task->task_invalidated_writes;
1534         to_task->task_metadata_writes = from_task->task_metadata_writes;
1535         to_task->task_energy = from_task->task_energy;
1536
1537         /* Skip ledger roll up for memory accounting entries */
1538         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1539         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1540         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1541 #if CONFIG_SCHED_SFI
1542         for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1543                 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1544         }
1545 #endif
1546         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1547         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1548         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1549         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1550         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1551         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
1552 }
1553
1554 int task_dropped_imp_count = 0;
1555
1556 /*
1557  *      task_deallocate:
1558  *
1559  *      Drop a reference on a task.
1560  */
1561 void
1562 task_deallocate(
1563         task_t          task)
1564 {
1565         ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1566         os_ref_count_t refs;
1567
1568         if (task == TASK_NULL) {
1569                 return;
1570         }
1571
1572         refs = task_deallocate_internal(task);
1573
1574 #if IMPORTANCE_INHERITANCE
1575         if (refs == 1) {
1576                 /*
1577                  * If last ref potentially comes from the task's importance,
1578                  * disconnect it.  But more task refs may be added before
1579                  * that completes, so wait for the reference to go to zero
1580                  * naturally (it may happen on a recursive task_deallocate()
1581                  * from the ipc_importance_disconnect_task() call).
1582                  */
1583                 if (IIT_NULL != task->task_imp_base) {
1584                         ipc_importance_disconnect_task(task);
1585                 }
1586                 return;
1587         }
1588 #endif /* IMPORTANCE_INHERITANCE */
1589
1590         if (refs > 0) {
1591                 return;
1592         }
1593
1594         lck_mtx_lock(&tasks_threads_lock);
1595         queue_remove(&terminated_tasks, task, task_t, tasks);
1596         terminated_tasks_count--;
1597         lck_mtx_unlock(&tasks_threads_lock);
1598
1599         /*
1600          * remove the reference on atm descriptor
1601          */
1602         task_atm_reset(task);
1603
1604         /*
1605          * remove the reference on bank context
1606          */
1607         task_bank_reset(task);
1608
1609         if (task->task_io_stats) {
1610                 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1611         }
1612
1613         /*
1614          *      Give the machine dependent code a chance
1615          *      to perform cleanup before ripping apart
1616          *      the task.
1617          */
1618         machine_task_terminate(task);
1619
1620         ipc_task_terminate(task);
1621
1622         /* let iokit know */
1623         iokit_task_terminate(task);
1624
1625         if (task->affinity_space) {
1626                 task_affinity_deallocate(task);
1627         }
1628
1629 #if MACH_ASSERT
1630         if (task->ledger != NULL &&
1631             task->map != NULL &&
1632             task->map->pmap != NULL &&
1633             task->map->pmap->ledger != NULL) {
1634                 assert(task->ledger == task->map->pmap->ledger);
1635         }
1636 #endif /* MACH_ASSERT */
1637
1638         vm_purgeable_disown(task);
1639         assert(task->task_purgeable_disowned);
1640         if (task->task_volatile_objects != 0 ||
1641             task->task_nonvolatile_objects != 0) {
1642                 panic("task_deallocate(%p): "
1643                     "volatile_objects=%d nonvolatile_objects=%d\n",
1644                     task,
1645                     task->task_volatile_objects,
1646                     task->task_nonvolatile_objects);
1647         }
1648
1649         vm_map_deallocate(task->map);
1650         is_release(task->itk_space);
1651
1652         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1653             &interrupt_wakeups, &debit);
1654         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1655             &platform_idle_wakeups, &debit);
1656
1657 #if defined(CONFIG_SCHED_MULTIQ)
1658         sched_group_destroy(task->sched_group);
1659 #endif
1660
1661         /* Accumulate statistics for dead tasks */
1662         lck_spin_lock(&dead_task_statistics_lock);
1663         dead_task_statistics.total_user_time += task->total_user_time;
1664         dead_task_statistics.total_system_time += task->total_system_time;
1665
1666         dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1667         dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1668
1669         dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1670         dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1671         dead_task_statistics.total_ptime += task->total_ptime;
1672         dead_task_statistics.total_pset_switches += task->ps_switch;
1673         dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
1674         dead_task_statistics.task_energy += task->task_energy;
1675
1676         lck_spin_unlock(&dead_task_statistics_lock);
1677         lck_mtx_destroy(&task->lock, &task_lck_grp);
1678
1679         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1680             &debit)) {
1681                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1682                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1683         }
1684         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1685             &debit)) {
1686                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1687                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1688         }
1689         ledger_dereference(task->ledger);
1690
1691 #if TASK_REFERENCE_LEAK_DEBUG
1692         btlog_remove_entries_for_element(task_ref_btlog, task);
1693 #endif
1694
1695 #if CONFIG_COALITIONS
1696         task_release_coalitions(task);
1697 #endif /* CONFIG_COALITIONS */
1698
1699         bzero(task->coalition, sizeof(task->coalition));
1700
1701 #if MACH_BSD
1702         /* clean up collected information since last reference to task is gone */
1703         if (task->corpse_info) {
1704                 void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
1705                 task_crashinfo_destroy(task->corpse_info);
1706                 task->corpse_info = NULL;
1707                 if (corpse_info_kernel) {
1708                         kfree(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
1709                 }
1710         }
1711 #endif
1712
1713 #if CONFIG_MACF
1714         if (task->crash_label) {
1715                 mac_exc_free_label(task->crash_label);
1716                 task->crash_label = NULL;
1717         }
1718 #endif
1719
1720         assert(queue_empty(&task->task_objq));
1721
1722         zfree(task_zone, task);
1723 }
1724
1725 /*
1726  *      task_name_deallocate:
1727  *
1728  *      Drop a reference on a task name.
1729  */
1730 void
1731 task_name_deallocate(
1732         task_name_t             task_name)
1733 {
1734         return task_deallocate((task_t)task_name);
1735 }
1736
1737 /*
1738  *      task_inspect_deallocate:
1739  *
1740  *      Drop a task inspection reference.
1741  */
1742 void
1743 task_inspect_deallocate(
1744         task_inspect_t          task_inspect)
1745 {
1746         return task_deallocate((task_t)task_inspect);
1747 }
1748
1749 /*
1750  *      task_suspension_token_deallocate:
1751  *
1752  *      Drop a reference on a task suspension token.
1753  */
1754 void
1755 task_suspension_token_deallocate(
1756         task_suspension_token_t         token)
1757 {
1758         return task_deallocate((task_t)token);
1759 }
1760
1761
1762 /*
1763  * task_collect_crash_info:
1764  *
1765  * collect crash info from bsd and mach based data
1766  */
1767 kern_return_t
1768 task_collect_crash_info(
1769         task_t task,
1770 #ifdef CONFIG_MACF
1771         struct label *crash_label,
1772 #endif
1773         int is_corpse_fork)
1774 {
1775         kern_return_t kr = KERN_SUCCESS;
1776
1777         kcdata_descriptor_t crash_data = NULL;
1778         kcdata_descriptor_t crash_data_release = NULL;
1779         mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1780         mach_vm_offset_t crash_data_ptr = 0;
1781         void *crash_data_kernel = NULL;
1782         void *crash_data_kernel_release = NULL;
1783 #if CONFIG_MACF
1784         struct label *label, *free_label;
1785 #endif
1786
1787         if (!corpses_enabled()) {
1788                 return KERN_NOT_SUPPORTED;
1789         }
1790
1791 #if CONFIG_MACF
1792         free_label = label = mac_exc_create_label();
1793 #endif
1794
1795         task_lock(task);
1796
1797         assert(is_corpse_fork || task->bsd_info != NULL);
1798         if (task->corpse_info == NULL && (is_corpse_fork || task->bsd_info != NULL)) {
1799 #if CONFIG_MACF
1800                 /* Set the crash label, used by the exception delivery mac hook */
1801                 free_label = task->crash_label; // Most likely NULL.
1802                 task->crash_label = label;
1803                 mac_exc_update_task_crash_label(task, crash_label);
1804 #endif
1805                 task_unlock(task);
1806
1807                 crash_data_kernel = (void *) kalloc(CORPSEINFO_ALLOCATION_SIZE);
1808                 if (crash_data_kernel == NULL) {
1809                         kr = KERN_RESOURCE_SHORTAGE;
1810                         goto out_no_lock;
1811                 }
1812                 bzero(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1813                 crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
1814
1815                 /* Do not get a corpse ref for corpse fork */
1816                 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
1817                     is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
1818                     KCFLAG_USE_MEMCOPY);
1819                 if (crash_data) {
1820                         task_lock(task);
1821                         crash_data_release = task->corpse_info;
1822                         crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
1823                         task->corpse_info = crash_data;
1824
1825                         task_unlock(task);
1826                         kr = KERN_SUCCESS;
1827                 } else {
1828                         kfree(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1829                         kr = KERN_FAILURE;
1830                 }
1831
1832                 if (crash_data_release != NULL) {
1833                         task_crashinfo_destroy(crash_data_release);
1834                 }
1835                 if (crash_data_kernel_release != NULL) {
1836                         kfree(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
1837                 }
1838         } else {
1839                 task_unlock(task);
1840         }
1841
1842 out_no_lock:
1843 #if CONFIG_MACF
1844         if (free_label != NULL) {
1845                 mac_exc_free_label(free_label);
1846         }
1847 #endif
1848         return kr;
1849 }
1850
1851 /*
1852  * task_deliver_crash_notification:
1853  *
1854  * Makes outcall to registered host port for a corpse.
1855  */
1856 kern_return_t
1857 task_deliver_crash_notification(
1858         task_t task,
1859         thread_t thread,
1860         exception_type_t etype,
1861         mach_exception_subcode_t subcode)
1862 {
1863         kcdata_descriptor_t crash_info = task->corpse_info;
1864         thread_t th_iter = NULL;
1865         kern_return_t kr = KERN_SUCCESS;
1866         wait_interrupt_t wsave;
1867         mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1868         ipc_port_t task_port, old_notify;
1869
1870         if (crash_info == NULL) {
1871                 return KERN_FAILURE;
1872         }
1873
1874         task_lock(task);
1875         if (task_is_a_corpse_fork(task)) {
1876                 /* Populate code with EXC_{RESOURCE,GUARD} for corpse fork */
1877                 code[0] = etype;
1878                 code[1] = subcode;
1879         } else {
1880                 /* Populate code with EXC_CRASH for corpses */
1881                 code[0] = EXC_CRASH;
1882                 code[1] = 0;
1883                 /* Update the code[1] if the boot-arg corpse_for_fatal_memkill is set */
1884                 if (corpse_for_fatal_memkill) {
1885                         code[1] = subcode;
1886                 }
1887         }
1888
1889         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1890         {
1891                 if (th_iter->corpse_dup == FALSE) {
1892                         ipc_thread_reset(th_iter);
1893                 }
1894         }
1895         task_unlock(task);
1896
1897         /* Arm the no-sender notification for taskport */
1898         task_reference(task);
1899         task_port = convert_task_to_port(task);
1900         ip_lock(task_port);
1901         assert(ip_active(task_port));
1902         ipc_port_nsrequest(task_port, task_port->ip_mscount, ipc_port_make_sonce_locked(task_port), &old_notify);
1903         /* port unlocked */
1904         assert(IP_NULL == old_notify);
1905
1906         wsave = thread_interrupt_level(THREAD_UNINT);
1907         kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
1908         if (kr != KERN_SUCCESS) {
1909                 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
1910         }
1911
1912         (void)thread_interrupt_level(wsave);
1913
1914         /*
1915          * Drop the send right on task port, will fire the
1916          * no-sender notification if exception deliver failed.
1917          */
1918         ipc_port_release_send(task_port);
1919         return kr;
1920 }
1921
1922 /*
1923  *      task_terminate:
1924  *
1925  *      Terminate the specified task.  See comments on thread_terminate
1926  *      (kern/thread.c) about problems with terminating the "current task."
1927  */
1928
1929 kern_return_t
1930 task_terminate(
1931         task_t          task)
1932 {
1933         if (task == TASK_NULL) {
1934                 return KERN_INVALID_ARGUMENT;
1935         }
1936
1937         if (task->bsd_info) {
1938                 return KERN_FAILURE;
1939         }
1940
1941         return task_terminate_internal(task);
1942 }
1943
1944 #if MACH_ASSERT
1945 extern int proc_pid(struct proc *);
1946 extern void proc_name_kdp(task_t t, char *buf, int size);
1947 #endif /* MACH_ASSERT */
1948
1949 #define VM_MAP_PARTIAL_REAP 0x54  /* 0x150 */
1950 static void
1951 __unused task_partial_reap(task_t task, __unused int pid)
1952 {
1953         unsigned int    reclaimed_resident = 0;
1954         unsigned int    reclaimed_compressed = 0;
1955         uint64_t        task_page_count;
1956
1957         task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1958
1959         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
1960             pid, task_page_count, 0, 0, 0);
1961
1962         vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1963
1964         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
1965             pid, reclaimed_resident, reclaimed_compressed, 0, 0);
1966 }
1967
1968 kern_return_t
1969 task_mark_corpse(task_t task)
1970 {
1971         kern_return_t kr = KERN_SUCCESS;
1972         thread_t self_thread;
1973         (void) self_thread;
1974         wait_interrupt_t wsave;
1975 #if CONFIG_MACF
1976         struct label *crash_label = NULL;
1977 #endif
1978
1979         assert(task != kernel_task);
1980         assert(task == current_task());
1981         assert(!task_is_a_corpse(task));
1982
1983 #if CONFIG_MACF
1984         crash_label = mac_exc_create_label_for_proc((struct proc*)task->bsd_info);
1985 #endif
1986
1987         kr = task_collect_crash_info(task,
1988 #if CONFIG_MACF
1989             crash_label,
1990 #endif
1991             FALSE);
1992         if (kr != KERN_SUCCESS) {
1993                 goto out;
1994         }
1995
1996         self_thread = current_thread();
1997
1998         wsave = thread_interrupt_level(THREAD_UNINT);
1999         task_lock(task);
2000
2001         task_set_corpse_pending_report(task);
2002         task_set_corpse(task);
2003         task->crashed_thread_id = thread_tid(self_thread);
2004
2005         kr = task_start_halt_locked(task, TRUE);
2006         assert(kr == KERN_SUCCESS);
2007
2008         ipc_task_reset(task);
2009         /* Remove the naked send right for task port, needed to arm no sender notification */
2010         task_set_special_port(task, TASK_KERNEL_PORT, IPC_PORT_NULL);
2011         ipc_task_enable(task);
2012
2013         task_unlock(task);
2014         /* terminate the ipc space */
2015         ipc_space_terminate(task->itk_space);
2016
2017         /* Add it to global corpse task list */
2018         task_add_to_corpse_task_list(task);
2019
2020         task_start_halt(task);
2021         thread_terminate_internal(self_thread);
2022
2023         (void) thread_interrupt_level(wsave);
2024         assert(task->halting == TRUE);
2025
2026 out:
2027 #if CONFIG_MACF
2028         mac_exc_free_label(crash_label);
2029 #endif
2030         return kr;
2031 }
2032
2033 /*
2034  *      task_clear_corpse
2035  *
2036  *      Clears the corpse pending bit on task.
2037  *      Removes inspection bit on the threads.
2038  */
2039 void
2040 task_clear_corpse(task_t task)
2041 {
2042         thread_t th_iter = NULL;
2043
2044         task_lock(task);
2045         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2046         {
2047                 thread_mtx_lock(th_iter);
2048                 th_iter->inspection = FALSE;
2049                 thread_mtx_unlock(th_iter);
2050         }
2051
2052         thread_terminate_crashed_threads();
2053         /* remove the pending corpse report flag */
2054         task_clear_corpse_pending_report(task);
2055
2056         task_unlock(task);
2057 }
2058
2059 /*
2060  *      task_port_notify
2061  *
2062  *      Called whenever the Mach port system detects no-senders on
2063  *      the task port of a corpse.
2064  *      Each notification that comes in should terminate the task (corpse).
2065  */
2066 void
2067 task_port_notify(mach_msg_header_t *msg)
2068 {
2069         mach_no_senders_notification_t *notification = (void *)msg;
2070         ipc_port_t port = notification->not_header.msgh_remote_port;
2071         task_t task;
2072
2073         assert(ip_active(port));
2074         assert(IKOT_TASK == ip_kotype(port));
2075         task = (task_t) port->ip_kobject;
2076
2077         assert(task_is_a_corpse(task));
2078
2079         /* Remove the task from global corpse task list */
2080         task_remove_from_corpse_task_list(task);
2081
2082         task_clear_corpse(task);
2083         task_terminate_internal(task);
2084 }
2085
2086 /*
2087  *      task_wait_till_threads_terminate_locked
2088  *
2089  *      Wait till all the threads in the task are terminated.
2090  *      Might release the task lock and re-acquire it.
2091  */
2092 void
2093 task_wait_till_threads_terminate_locked(task_t task)
2094 {
2095         /* wait for all the threads in the task to terminate */
2096         while (task->active_thread_count != 0) {
2097                 assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
2098                 task_unlock(task);
2099                 thread_block(THREAD_CONTINUE_NULL);
2100
2101                 task_lock(task);
2102         }
2103 }
2104
2105 /*
2106  *      task_duplicate_map_and_threads
2107  *
2108  *      Copy vmmap of source task.
2109  *      Copy active threads from source task to destination task.
2110  *      Source task would be suspended during the copy.
2111  */
2112 kern_return_t
2113 task_duplicate_map_and_threads(
2114         task_t task,
2115         void *p,
2116         task_t new_task,
2117         thread_t *thread_ret,
2118         uint64_t **udata_buffer,
2119         int *size,
2120         int *num_udata)
2121 {
2122         kern_return_t kr = KERN_SUCCESS;
2123         int active;
2124         thread_t thread, self, thread_return = THREAD_NULL;
2125         thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
2126         thread_t *thread_array;
2127         uint32_t active_thread_count = 0, array_count = 0, i;
2128         vm_map_t oldmap;
2129         uint64_t *buffer = NULL;
2130         int buf_size = 0;
2131         int est_knotes = 0, num_knotes = 0;
2132
2133         self = current_thread();
2134
2135         /*
2136          * Suspend the task to copy thread state, use the internal
2137          * variant so that no user-space process can resume
2138          * the task from under us
2139          */
2140         kr = task_suspend_internal(task);
2141         if (kr != KERN_SUCCESS) {
2142                 return kr;
2143         }
2144
2145         if (task->map->disable_vmentry_reuse == TRUE) {
2146                 /*
2147                  * Quite likely GuardMalloc (or some debugging tool)
2148                  * is being used on this task. And it has gone through
2149                  * its limit. Making a corpse will likely encounter
2150                  * a lot of VM entries that will need COW.
2151                  *
2152                  * Skip it.
2153                  */
2154 #if DEVELOPMENT || DEBUG
2155                 memorystatus_abort_vm_map_fork(task);
2156 #endif
2157                 task_resume_internal(task);
2158                 return KERN_FAILURE;
2159         }
2160
2161         /* Check with VM if vm_map_fork is allowed for this task */
2162         if (memorystatus_allowed_vm_map_fork(task)) {
2163                 /* Setup new task's vmmap, switch from parent task's map to it COW map */
2164                 oldmap = new_task->map;
2165                 new_task->map = vm_map_fork(new_task->ledger,
2166                     task->map,
2167                     (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
2168                     VM_MAP_FORK_PRESERVE_PURGEABLE |
2169                     VM_MAP_FORK_CORPSE_FOOTPRINT));
2170                 vm_map_deallocate(oldmap);
2171
2172                 /* copy ledgers that impact the memory footprint */
2173                 vm_map_copy_footprint_ledgers(task, new_task);
2174
2175                 /* Get all the udata pointers from kqueue */
2176                 est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
2177                 if (est_knotes > 0) {
2178                         buf_size = (est_knotes + 32) * sizeof(uint64_t);
2179                         buffer = (uint64_t *) kalloc(buf_size);
2180                         num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2181                         if (num_knotes > est_knotes + 32) {
2182                                 num_knotes = est_knotes + 32;
2183                         }
2184                 }
2185         }
2186
2187         active_thread_count = task->active_thread_count;
2188         if (active_thread_count == 0) {
2189                 if (buffer != NULL) {
2190                         kfree(buffer, buf_size);
2191                 }
2192                 task_resume_internal(task);
2193                 return KERN_FAILURE;
2194         }
2195
2196         thread_array = (thread_t *) kalloc(sizeof(thread_t) * active_thread_count);
2197
2198         /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2199         task_lock(task);
2200         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2201                 /* Skip inactive threads */
2202                 active = thread->active;
2203                 if (!active) {
2204                         continue;
2205                 }
2206
2207                 if (array_count >= active_thread_count) {
2208                         break;
2209                 }
2210
2211                 thread_array[array_count++] = thread;
2212                 thread_reference(thread);
2213         }
2214         task_unlock(task);
2215
2216         for (i = 0; i < array_count; i++) {
2217                 kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2218                 if (kr != KERN_SUCCESS) {
2219                         break;
2220                 }
2221
2222                 /* Equivalent of current thread in corpse */
2223                 if (thread_array[i] == self) {
2224                         thread_return = new_thread;
2225                         new_task->crashed_thread_id = thread_tid(new_thread);
2226                 } else if (first_thread == NULL) {
2227                         first_thread = new_thread;
2228                 } else {
2229                         /* drop the extra ref returned by thread_create_with_continuation */
2230                         thread_deallocate(new_thread);
2231                 }
2232
2233                 kr = thread_dup2(thread_array[i], new_thread);
2234                 if (kr != KERN_SUCCESS) {
2235                         thread_mtx_lock(new_thread);
2236                         new_thread->corpse_dup = TRUE;
2237                         thread_mtx_unlock(new_thread);
2238                         continue;
2239                 }
2240
2241                 /* Copy thread name */
2242                 bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread);
2243                 new_thread->thread_tag = thread_array[i]->thread_tag;
2244                 thread_copy_resource_info(new_thread, thread_array[i]);
2245         }
2246
2247         /* return the first thread if we couldn't find the equivalent of current */
2248         if (thread_return == THREAD_NULL) {
2249                 thread_return = first_thread;
2250         } else if (first_thread != THREAD_NULL) {
2251                 /* drop the extra ref returned by thread_create_with_continuation */
2252                 thread_deallocate(first_thread);
2253         }
2254
2255         task_resume_internal(task);
2256
2257         for (i = 0; i < array_count; i++) {
2258                 thread_deallocate(thread_array[i]);
2259         }
2260         kfree(thread_array, sizeof(thread_t) * active_thread_count);
2261
2262         if (kr == KERN_SUCCESS) {
2263                 *thread_ret = thread_return;
2264                 *udata_buffer = buffer;
2265                 *size = buf_size;
2266                 *num_udata = num_knotes;
2267         } else {
2268                 if (thread_return != THREAD_NULL) {
2269                         thread_deallocate(thread_return);
2270                 }
2271                 if (buffer != NULL) {
2272                         kfree(buffer, buf_size);
2273                 }
2274         }
2275
2276         return kr;
2277 }
2278
2279 #if CONFIG_SECLUDED_MEMORY
2280 extern void task_set_can_use_secluded_mem_locked(
2281         task_t          task,
2282         boolean_t       can_use_secluded_mem);
2283 #endif /* CONFIG_SECLUDED_MEMORY */
2284
2285 kern_return_t
2286 task_terminate_internal(
2287         task_t                  task)
2288 {
2289         thread_t                        thread, self;
2290         task_t                          self_task;
2291         boolean_t                       interrupt_save;
2292         int                             pid = 0;
2293
2294         assert(task != kernel_task);
2295
2296         self = current_thread();
2297         self_task = self->task;
2298
2299         /*
2300          *      Get the task locked and make sure that we are not racing
2301          *      with someone else trying to terminate us.
2302          */
2303         if (task == self_task) {
2304                 task_lock(task);
2305         } else if (task < self_task) {
2306                 task_lock(task);
2307                 task_lock(self_task);
2308         } else {
2309                 task_lock(self_task);
2310                 task_lock(task);
2311         }
2312
2313 #if CONFIG_SECLUDED_MEMORY
2314         if (task->task_can_use_secluded_mem) {
2315                 task_set_can_use_secluded_mem_locked(task, FALSE);
2316         }
2317         task->task_could_use_secluded_mem = FALSE;
2318         task->task_could_also_use_secluded_mem = FALSE;
2319
2320         if (task->task_suppressed_secluded) {
2321                 stop_secluded_suppression(task);
2322         }
2323 #endif /* CONFIG_SECLUDED_MEMORY */
2324
2325         if (!task->active) {
2326                 /*
2327                  *      Task is already being terminated.
2328                  *      Just return an error. If we are dying, this will
2329                  *      just get us to our AST special handler and that
2330                  *      will get us to finalize the termination of ourselves.
2331                  */
2332                 task_unlock(task);
2333                 if (self_task != task) {
2334                         task_unlock(self_task);
2335                 }
2336
2337                 return KERN_FAILURE;
2338         }
2339
2340         if (task_corpse_pending_report(task)) {
2341                 /*
2342                  *      Task is marked for reporting as corpse.
2343                  *      Just return an error. This will
2344                  *      just get us to our AST special handler and that
2345                  *      will get us to finish the path to death
2346                  */
2347                 task_unlock(task);
2348                 if (self_task != task) {
2349                         task_unlock(self_task);
2350                 }
2351
2352                 return KERN_FAILURE;
2353         }
2354
2355         if (self_task != task) {
2356                 task_unlock(self_task);
2357         }
2358
2359         /*
2360          * Make sure the current thread does not get aborted out of
2361          * the waits inside these operations.
2362          */
2363         interrupt_save = thread_interrupt_level(THREAD_UNINT);
2364
2365         /*
2366          *      Indicate that we want all the threads to stop executing
2367          *      at user space by holding the task (we would have held
2368          *      each thread independently in thread_terminate_internal -
2369          *      but this way we may be more likely to already find it
2370          *      held there).  Mark the task inactive, and prevent
2371          *      further task operations via the task port.
2372          */
2373         task_hold_locked(task);
2374         task->active = FALSE;
2375         ipc_task_disable(task);
2376
2377 #if CONFIG_TELEMETRY
2378         /*
2379          * Notify telemetry that this task is going away.
2380          */
2381         telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2382 #endif
2383
2384         /*
2385          *      Terminate each thread in the task.
2386          */
2387         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2388                 thread_terminate_internal(thread);
2389         }
2390
2391 #ifdef MACH_BSD
2392         if (task->bsd_info != NULL && !task_is_exec_copy(task)) {
2393                 pid = proc_pid(task->bsd_info);
2394         }
2395 #endif /* MACH_BSD */
2396
2397         task_unlock(task);
2398
2399         proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
2400             TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
2401
2402         /* Early object reap phase */
2403
2404 // PR-17045188: Revisit implementation
2405 //        task_partial_reap(task, pid);
2406
2407 #if CONFIG_EMBEDDED
2408         /*
2409          * remove all task watchers
2410          */
2411         task_removewatchers(task);
2412
2413 #endif /* CONFIG_EMBEDDED */
2414
2415         /*
2416          *      Destroy all synchronizers owned by the task.
2417          */
2418         task_synchronizer_destroy_all(task);
2419
2420         /*
2421          *      Destroy the IPC space, leaving just a reference for it.
2422          */
2423         ipc_space_terminate(task->itk_space);
2424
2425 #if 00
2426         /* if some ledgers go negative on tear-down again... */
2427         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2428             task_ledgers.phys_footprint);
2429         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2430             task_ledgers.internal);
2431         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2432             task_ledgers.internal_compressed);
2433         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2434             task_ledgers.iokit_mapped);
2435         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2436             task_ledgers.alternate_accounting);
2437         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2438             task_ledgers.alternate_accounting_compressed);
2439 #endif
2440
2441         /*
2442          * If the current thread is a member of the task
2443          * being terminated, then the last reference to
2444          * the task will not be dropped until the thread
2445          * is finally reaped.  To avoid incurring the
2446          * expense of removing the address space regions
2447          * at reap time, we do it explictly here.
2448          */
2449
2450         vm_map_lock(task->map);
2451         vm_map_disable_hole_optimization(task->map);
2452         vm_map_unlock(task->map);
2453
2454 #if MACH_ASSERT
2455         /*
2456          * Identify the pmap's process, in case the pmap ledgers drift
2457          * and we have to report it.
2458          */
2459         char procname[17];
2460         if (task->bsd_info && !task_is_exec_copy(task)) {
2461                 pid = proc_pid(task->bsd_info);
2462                 proc_name_kdp(task, procname, sizeof(procname));
2463         } else {
2464                 pid = 0;
2465                 strlcpy(procname, "<unknown>", sizeof(procname));
2466         }
2467         pmap_set_process(task->map->pmap, pid, procname);
2468 #endif /* MACH_ASSERT */
2469
2470         vm_map_remove(task->map,
2471             task->map->min_offset,
2472             task->map->max_offset,
2473             /*
2474              * Final cleanup:
2475              * + no unnesting
2476              * + remove immutable mappings
2477              * + allow gaps in range
2478              */
2479             (VM_MAP_REMOVE_NO_UNNESTING |
2480             VM_MAP_REMOVE_IMMUTABLE |
2481             VM_MAP_REMOVE_GAPS_OK));
2482
2483         /* release our shared region */
2484         vm_shared_region_set(task, NULL);
2485
2486
2487         lck_mtx_lock(&tasks_threads_lock);
2488         queue_remove(&tasks, task, task_t, tasks);
2489         queue_enter(&terminated_tasks, task, task_t, tasks);
2490         tasks_count--;
2491         terminated_tasks_count++;
2492         lck_mtx_unlock(&tasks_threads_lock);
2493
2494         /*
2495          * We no longer need to guard against being aborted, so restore
2496          * the previous interruptible state.
2497          */
2498         thread_interrupt_level(interrupt_save);
2499
2500 #if KPC
2501         /* force the task to release all ctrs */
2502         if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS) {
2503                 kpc_force_all_ctrs(task, 0);
2504         }
2505 #endif /* KPC */
2506
2507 #if CONFIG_COALITIONS
2508         /*
2509          * Leave our coalitions. (drop activation but not reference)
2510          */
2511         coalitions_remove_task(task);
2512 #endif
2513
2514         /*
2515          * Get rid of the task active reference on itself.
2516          */
2517         task_deallocate(task);
2518
2519         return KERN_SUCCESS;
2520 }
2521
2522 void
2523 tasks_system_suspend(boolean_t suspend)
2524 {
2525         task_t task;
2526
2527         lck_mtx_lock(&tasks_threads_lock);
2528         assert(tasks_suspend_state != suspend);
2529         tasks_suspend_state = suspend;
2530         queue_iterate(&tasks, task, task_t, tasks) {
2531                 if (task == kernel_task) {
2532                         continue;
2533                 }
2534                 suspend ? task_suspend_internal(task) : task_resume_internal(task);
2535         }
2536         lck_mtx_unlock(&tasks_threads_lock);
2537 }
2538
2539 /*
2540  * task_start_halt:
2541  *
2542  *      Shut the current task down (except for the current thread) in
2543  *      preparation for dramatic changes to the task (probably exec).
2544  *      We hold the task and mark all other threads in the task for
2545  *      termination.
2546  */
2547 kern_return_t
2548 task_start_halt(task_t task)
2549 {
2550         kern_return_t kr = KERN_SUCCESS;
2551         task_lock(task);
2552         kr = task_start_halt_locked(task, FALSE);
2553         task_unlock(task);
2554         return kr;
2555 }
2556
2557 static kern_return_t
2558 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
2559 {
2560         thread_t thread, self;
2561         uint64_t dispatchqueue_offset;
2562
2563         assert(task != kernel_task);
2564
2565         self = current_thread();
2566
2567         if (task != self->task && !task_is_a_corpse_fork(task)) {
2568                 return KERN_INVALID_ARGUMENT;
2569         }
2570
2571         if (task->halting || !task->active || !self->active) {
2572                 /*
2573                  * Task or current thread is already being terminated.
2574                  * Hurry up and return out of the current kernel context
2575                  * so that we run our AST special handler to terminate
2576                  * ourselves.
2577                  */
2578                 return KERN_FAILURE;
2579         }
2580
2581         task->halting = TRUE;
2582
2583         /*
2584          * Mark all the threads to keep them from starting any more
2585          * user-level execution.  The thread_terminate_internal code
2586          * would do this on a thread by thread basis anyway, but this
2587          * gives us a better chance of not having to wait there.
2588          */
2589         task_hold_locked(task);
2590         dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
2591
2592         /*
2593          * Terminate all the other threads in the task.
2594          */
2595         queue_iterate(&task->threads, thread, thread_t, task_threads)
2596         {
2597                 if (should_mark_corpse) {
2598                         thread_mtx_lock(thread);
2599                         thread->inspection = TRUE;
2600                         thread_mtx_unlock(thread);
2601                 }
2602                 if (thread != self) {
2603                         thread_terminate_internal(thread);
2604                 }
2605         }
2606         task->dispatchqueue_offset = dispatchqueue_offset;
2607
2608         task_release_locked(task);
2609
2610         return KERN_SUCCESS;
2611 }
2612
2613
2614 /*
2615  * task_complete_halt:
2616  *
2617  *      Complete task halt by waiting for threads to terminate, then clean
2618  *      up task resources (VM, port namespace, etc...) and then let the
2619  *      current thread go in the (practically empty) task context.
2620  *
2621  *      Note: task->halting flag is not cleared in order to avoid creation
2622  *      of new thread in old exec'ed task.
2623  */
2624 void
2625 task_complete_halt(task_t task)
2626 {
2627         task_lock(task);
2628         assert(task->halting);
2629         assert(task == current_task());
2630
2631         /*
2632          *      Wait for the other threads to get shut down.
2633          *      When the last other thread is reaped, we'll be
2634          *      woken up.
2635          */
2636         if (task->thread_count > 1) {
2637                 assert_wait((event_t)&task->halting, THREAD_UNINT);
2638                 task_unlock(task);
2639                 thread_block(THREAD_CONTINUE_NULL);
2640         } else {
2641                 task_unlock(task);
2642         }
2643
2644         /*
2645          *      Give the machine dependent code a chance
2646          *      to perform cleanup of task-level resources
2647          *      associated with the current thread before
2648          *      ripping apart the task.
2649          */
2650         machine_task_terminate(task);
2651
2652         /*
2653          *      Destroy all synchronizers owned by the task.
2654          */
2655         task_synchronizer_destroy_all(task);
2656
2657         /*
2658          *      Destroy the contents of the IPC space, leaving just
2659          *      a reference for it.
2660          */
2661         ipc_space_clean(task->itk_space);
2662
2663         /*
2664          * Clean out the address space, as we are going to be
2665          * getting a new one.
2666          */
2667         vm_map_remove(task->map, task->map->min_offset,
2668             task->map->max_offset,
2669             /*
2670              * Final cleanup:
2671              * + no unnesting
2672              * + remove immutable mappings
2673              * + allow gaps in the range
2674              */
2675             (VM_MAP_REMOVE_NO_UNNESTING |
2676             VM_MAP_REMOVE_IMMUTABLE |
2677             VM_MAP_REMOVE_GAPS_OK));
2678
2679         /*
2680          * Kick out any IOKitUser handles to the task. At best they're stale,
2681          * at worst someone is racing a SUID exec.
2682          */
2683         iokit_task_terminate(task);
2684 }
2685
2686 /*
2687  *      task_hold_locked:
2688  *
2689  *      Suspend execution of the specified task.
2690  *      This is a recursive-style suspension of the task, a count of
2691  *      suspends is maintained.
2692  *
2693  *      CONDITIONS: the task is locked and active.
2694  */
2695 void
2696 task_hold_locked(
2697         task_t          task)
2698 {
2699         thread_t        thread;
2700
2701         assert(task->active);
2702
2703         if (task->suspend_count++ > 0) {
2704                 return;
2705         }
2706
2707         if (task->bsd_info) {
2708                 workq_proc_suspended(task->bsd_info);
2709         }
2710
2711         /*
2712          *      Iterate through all the threads and hold them.
2713          */
2714         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2715                 thread_mtx_lock(thread);
2716                 thread_hold(thread);
2717                 thread_mtx_unlock(thread);
2718         }
2719 }
2720
2721 /*
2722  *      task_hold:
2723  *
2724  *      Same as the internal routine above, except that is must lock
2725  *      and verify that the task is active.  This differs from task_suspend
2726  *      in that it places a kernel hold on the task rather than just a
2727  *      user-level hold.  This keeps users from over resuming and setting
2728  *      it running out from under the kernel.
2729  *
2730  *      CONDITIONS: the caller holds a reference on the task
2731  */
2732 kern_return_t
2733 task_hold(
2734         task_t          task)
2735 {
2736         if (task == TASK_NULL) {
2737                 return KERN_INVALID_ARGUMENT;
2738         }
2739
2740         task_lock(task);
2741
2742         if (!task->active) {
2743                 task_unlock(task);
2744
2745                 return KERN_FAILURE;
2746         }
2747
2748         task_hold_locked(task);
2749         task_unlock(task);
2750
2751         return KERN_SUCCESS;
2752 }
2753
2754 kern_return_t
2755 task_wait(
2756         task_t          task,
2757         boolean_t       until_not_runnable)
2758 {
2759         if (task == TASK_NULL) {
2760                 return KERN_INVALID_ARGUMENT;
2761         }
2762
2763         task_lock(task);
2764
2765         if (!task->active) {
2766                 task_unlock(task);
2767
2768                 return KERN_FAILURE;
2769         }
2770
2771         task_wait_locked(task, until_not_runnable);
2772         task_unlock(task);
2773
2774         return KERN_SUCCESS;
2775 }
2776
2777 /*
2778  *      task_wait_locked:
2779  *
2780  *      Wait for all threads in task to stop.
2781  *
2782  * Conditions:
2783  *      Called with task locked, active, and held.
2784  */
2785 void
2786 task_wait_locked(
2787         task_t          task,
2788         boolean_t               until_not_runnable)
2789 {
2790         thread_t        thread, self;
2791
2792         assert(task->active);
2793         assert(task->suspend_count > 0);
2794
2795         self = current_thread();
2796
2797         /*
2798          *      Iterate through all the threads and wait for them to
2799          *      stop.  Do not wait for the current thread if it is within
2800          *      the task.
2801          */
2802         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2803                 if (thread != self) {
2804                         thread_wait(thread, until_not_runnable);
2805                 }
2806         }
2807 }
2808
2809 /*
2810  *      task_release_locked:
2811  *
2812  *      Release a kernel hold on a task.
2813  *
2814  *      CONDITIONS: the task is locked and active
2815  */
2816 void
2817 task_release_locked(
2818         task_t          task)
2819 {
2820         thread_t        thread;
2821
2822         assert(task->active);
2823         assert(task->suspend_count > 0);
2824
2825         if (--task->suspend_count > 0) {
2826                 return;
2827         }
2828
2829         if (task->bsd_info) {
2830                 workq_proc_resumed(task->bsd_info);
2831         }
2832
2833         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2834                 thread_mtx_lock(thread);
2835                 thread_release(thread);
2836                 thread_mtx_unlock(thread);
2837         }
2838 }
2839
2840 /*
2841  *      task_release:
2842  *
2843  *      Same as the internal routine above, except that it must lock
2844  *      and verify that the task is active.
2845  *
2846  *      CONDITIONS: The caller holds a reference to the task
2847  */
2848 kern_return_t
2849 task_release(
2850         task_t          task)
2851 {
2852         if (task == TASK_NULL) {
2853                 return KERN_INVALID_ARGUMENT;
2854         }
2855
2856         task_lock(task);
2857
2858         if (!task->active) {
2859                 task_unlock(task);
2860
2861                 return KERN_FAILURE;
2862         }
2863
2864         task_release_locked(task);
2865         task_unlock(task);
2866
2867         return KERN_SUCCESS;
2868 }
2869
2870 kern_return_t
2871 task_threads(
2872         task_t                                  task,
2873         thread_act_array_t              *threads_out,
2874         mach_msg_type_number_t  *count)
2875 {
2876         mach_msg_type_number_t  actual;
2877         thread_t                                *thread_list;
2878         thread_t                                thread;
2879         vm_size_t                               size, size_needed;
2880         void                                    *addr;
2881         unsigned int                    i, j;
2882
2883         if (task == TASK_NULL) {
2884                 return KERN_INVALID_ARGUMENT;
2885         }
2886
2887         size = 0; addr = NULL;
2888
2889         for (;;) {
2890                 task_lock(task);
2891                 if (!task->active) {
2892                         task_unlock(task);
2893
2894                         if (size != 0) {
2895                                 kfree(addr, size);
2896                         }
2897
2898                         return KERN_FAILURE;
2899                 }
2900
2901                 actual = task->thread_count;
2902
2903                 /* do we have the memory we need? */
2904                 size_needed = actual * sizeof(mach_port_t);
2905                 if (size_needed <= size) {
2906                         break;
2907                 }
2908
2909                 /* unlock the task and allocate more memory */
2910                 task_unlock(task);
2911
2912                 if (size != 0) {
2913                         kfree(addr, size);
2914                 }
2915
2916                 assert(size_needed > 0);
2917                 size = size_needed;
2918
2919                 addr = kalloc(size);
2920                 if (addr == 0) {
2921                         return KERN_RESOURCE_SHORTAGE;
2922                 }
2923         }
2924
2925         /* OK, have memory and the task is locked & active */
2926         thread_list = (thread_t *)addr;
2927
2928         i = j = 0;
2929
2930         for (thread = (thread_t)queue_first(&task->threads); i < actual;
2931             ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
2932                 thread_reference_internal(thread);
2933                 thread_list[j++] = thread;
2934         }
2935
2936         assert(queue_end(&task->threads, (queue_entry_t)thread));
2937
2938         actual = j;
2939         size_needed = actual * sizeof(mach_port_t);
2940
2941         /* can unlock task now that we've got the thread refs */
2942         task_unlock(task);
2943
2944         if (actual == 0) {
2945                 /* no threads, so return null pointer and deallocate memory */
2946
2947                 *threads_out = NULL;
2948                 *count = 0;
2949
2950                 if (size != 0) {
2951                         kfree(addr, size);
2952                 }
2953         } else {
2954                 /* if we allocated too much, must copy */
2955
2956                 if (size_needed < size) {
2957                         void *newaddr;
2958
2959                         newaddr = kalloc(size_needed);
2960                         if (newaddr == 0) {
2961                                 for (i = 0; i < actual; ++i) {
2962                                         thread_deallocate(thread_list[i]);
2963                                 }
2964                                 kfree(addr, size);
2965                                 return KERN_RESOURCE_SHORTAGE;
2966                         }
2967
2968                         bcopy(addr, newaddr, size_needed);
2969                         kfree(addr, size);
2970                         thread_list = (thread_t *)newaddr;
2971                 }
2972
2973                 *threads_out = thread_list;
2974                 *count = actual;
2975
2976                 /* do the conversion that Mig should handle */
2977
2978                 for (i = 0; i < actual; ++i) {
2979                         ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
2980                 }
2981         }
2982
2983         return KERN_SUCCESS;
2984 }
2985
2986 #define TASK_HOLD_NORMAL        0
2987 #define TASK_HOLD_PIDSUSPEND    1
2988 #define TASK_HOLD_LEGACY        2
2989 #define TASK_HOLD_LEGACY_ALL    3
2990
2991 static kern_return_t
2992 place_task_hold(
2993         task_t task,
2994         int mode)
2995 {
2996         if (!task->active && !task_is_a_corpse(task)) {
2997                 return KERN_FAILURE;
2998         }
2999
3000         /* Return success for corpse task */
3001         if (task_is_a_corpse(task)) {
3002                 return KERN_SUCCESS;
3003         }
3004
3005         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3006             MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
3007             task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3008             task->user_stop_count, task->user_stop_count + 1, 0);
3009
3010 #if MACH_ASSERT
3011         current_task()->suspends_outstanding++;
3012 #endif
3013
3014         if (mode == TASK_HOLD_LEGACY) {
3015                 task->legacy_stop_count++;
3016         }
3017
3018         if (task->user_stop_count++ > 0) {
3019                 /*
3020                  *      If the stop count was positive, the task is
3021                  *      already stopped and we can exit.
3022                  */
3023                 return KERN_SUCCESS;
3024         }
3025
3026         /*
3027          * Put a kernel-level hold on the threads in the task (all
3028          * user-level task suspensions added together represent a
3029          * single kernel-level hold).  We then wait for the threads
3030          * to stop executing user code.
3031          */
3032         task_hold_locked(task);
3033         task_wait_locked(task, FALSE);
3034
3035         return KERN_SUCCESS;
3036 }
3037
3038 static kern_return_t
3039 release_task_hold(
3040         task_t          task,
3041         int                     mode)
3042 {
3043         boolean_t release = FALSE;
3044
3045         if (!task->active && !task_is_a_corpse(task)) {
3046                 return KERN_FAILURE;
3047         }
3048
3049         /* Return success for corpse task */
3050         if (task_is_a_corpse(task)) {
3051                 return KERN_SUCCESS;
3052         }
3053
3054         if (mode == TASK_HOLD_PIDSUSPEND) {
3055                 if (task->pidsuspended == FALSE) {
3056                         return KERN_FAILURE;
3057                 }
3058                 task->pidsuspended = FALSE;
3059         }
3060
3061         if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
3062                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3063                     MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_RESUME) | DBG_FUNC_NONE,
3064                     task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3065                     task->user_stop_count, mode, task->legacy_stop_count);
3066
3067 #if MACH_ASSERT
3068                 /*
3069                  * This is obviously not robust; if we suspend one task and then resume a different one,
3070                  * we'll fly under the radar. This is only meant to catch the common case of a crashed
3071                  * or buggy suspender.
3072                  */
3073                 current_task()->suspends_outstanding--;
3074 #endif
3075
3076                 if (mode == TASK_HOLD_LEGACY_ALL) {
3077                         if (task->legacy_stop_count >= task->user_stop_count) {
3078                                 task->user_stop_count = 0;
3079                                 release = TRUE;
3080                         } else {
3081                                 task->user_stop_count -= task->legacy_stop_count;
3082                         }
3083                         task->legacy_stop_count = 0;
3084                 } else {
3085                         if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0) {
3086                                 task->legacy_stop_count--;
3087                         }
3088                         if (--task->user_stop_count == 0) {
3089                                 release = TRUE;
3090                         }
3091                 }
3092         } else {
3093                 return KERN_FAILURE;
3094         }
3095
3096         /*
3097          *      Release the task if necessary.
3098          */
3099         if (release) {
3100                 task_release_locked(task);
3101         }
3102
3103         return KERN_SUCCESS;
3104 }
3105
3106
3107 /*
3108  *      task_suspend:
3109  *
3110  *      Implement an (old-fashioned) user-level suspension on a task.
3111  *
3112  *      Because the user isn't expecting to have to manage a suspension
3113  *      token, we'll track it for him in the kernel in the form of a naked
3114  *      send right to the task's resume port.  All such send rights
3115  *      account for a single suspension against the task (unlike task_suspend2()
3116  *      where each caller gets a unique suspension count represented by a
3117  *      unique send-once right).
3118  *
3119  * Conditions:
3120  *      The caller holds a reference to the task
3121  */
3122 kern_return_t
3123 task_suspend(
3124         task_t          task)
3125 {
3126         kern_return_t                   kr;
3127         mach_port_t                     port, send, old_notify;
3128         mach_port_name_t                name;
3129
3130         if (task == TASK_NULL || task == kernel_task) {
3131                 return KERN_INVALID_ARGUMENT;
3132         }
3133
3134         task_lock(task);
3135
3136         /*
3137          * Claim a send right on the task resume port, and request a no-senders
3138          * notification on that port (if none outstanding).
3139          */
3140         if (task->itk_resume == IP_NULL) {
3141                 task->itk_resume = ipc_port_alloc_kernel();
3142                 if (!IP_VALID(task->itk_resume)) {
3143                         panic("failed to create resume port");
3144                 }
3145                 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
3146         }
3147
3148         port = task->itk_resume;
3149         ip_lock(port);
3150         assert(ip_active(port));
3151
3152         send = ipc_port_make_send_locked(port);
3153         assert(IP_VALID(send));
3154
3155         if (port->ip_nsrequest == IP_NULL) {
3156                 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3157                 assert(old_notify == IP_NULL);
3158                 /* port unlocked */
3159         } else {
3160                 ip_unlock(port);
3161         }
3162
3163         /*
3164          * place a legacy hold on the task.
3165          */
3166         kr = place_task_hold(task, TASK_HOLD_LEGACY);
3167         if (kr != KERN_SUCCESS) {
3168                 task_unlock(task);
3169                 ipc_port_release_send(send);
3170                 return kr;
3171         }
3172
3173         task_unlock(task);
3174
3175         /*
3176          * Copyout the send right into the calling task's IPC space.  It won't know it is there,
3177          * but we'll look it up when calling a traditional resume.  Any IPC operations that
3178          * deallocate the send right will auto-release the suspension.
3179          */
3180         if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
3181             MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
3182                 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
3183                     proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3184                     task_pid(task), kr);
3185                 return kr;
3186         }
3187
3188         return kr;
3189 }
3190
3191 /*
3192  *      task_resume:
3193  *              Release a user hold on a task.
3194  *
3195  * Conditions:
3196  *              The caller holds a reference to the task
3197  */
3198 kern_return_t
3199 task_resume(
3200         task_t  task)
3201 {
3202         kern_return_t    kr;
3203         mach_port_name_t resume_port_name;
3204         ipc_entry_t              resume_port_entry;
3205         ipc_space_t              space = current_task()->itk_space;
3206
3207         if (task == TASK_NULL || task == kernel_task) {
3208                 return KERN_INVALID_ARGUMENT;
3209         }
3210
3211         /* release a legacy task hold */
3212         task_lock(task);
3213         kr = release_task_hold(task, TASK_HOLD_LEGACY);
3214         task_unlock(task);
3215
3216         is_write_lock(space);
3217         if (is_active(space) && IP_VALID(task->itk_resume) &&
3218             ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
3219                 /*
3220                  * We found a suspension token in the caller's IPC space. Release a send right to indicate that
3221                  * we are holding one less legacy hold on the task from this caller.  If the release failed,
3222                  * go ahead and drop all the rights, as someone either already released our holds or the task
3223                  * is gone.
3224                  */
3225                 if (kr == KERN_SUCCESS) {
3226                         ipc_right_dealloc(space, resume_port_name, resume_port_entry);
3227                 } else {
3228                         ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
3229                 }
3230                 /* space unlocked */
3231         } else {
3232                 is_write_unlock(space);
3233                 if (kr == KERN_SUCCESS) {
3234                         printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
3235                             proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3236                             task_pid(task));
3237                 }
3238         }
3239
3240         return kr;
3241 }
3242
3243 /*
3244  * Suspend the target task.
3245  * Making/holding a token/reference/port is the callers responsibility.
3246  */
3247 kern_return_t
3248 task_suspend_internal(task_t task)
3249 {
3250         kern_return_t    kr;
3251
3252         if (task == TASK_NULL || task == kernel_task) {
3253                 return KERN_INVALID_ARGUMENT;
3254         }
3255
3256         task_lock(task);
3257         kr = place_task_hold(task, TASK_HOLD_NORMAL);
3258         task_unlock(task);
3259         return kr;
3260 }
3261
3262 /*
3263  * Suspend the target task, and return a suspension token. The token
3264  * represents a reference on the suspended task.
3265  */
3266 kern_return_t
3267 task_suspend2(
3268         task_t                  task,
3269         task_suspension_token_t *suspend_token)
3270 {
3271         kern_return_t    kr;
3272
3273         kr = task_suspend_internal(task);
3274         if (kr != KERN_SUCCESS) {
3275                 *suspend_token = TASK_NULL;
3276                 return kr;
3277         }
3278
3279         /*
3280          * Take a reference on the target task and return that to the caller
3281          * as a "suspension token," which can be converted into an SO right to
3282          * the now-suspended task's resume port.
3283          */
3284         task_reference_internal(task);
3285         *suspend_token = task;
3286
3287         return KERN_SUCCESS;
3288 }
3289
3290 /*
3291  * Resume the task
3292  * (reference/token/port management is caller's responsibility).
3293  */
3294 kern_return_t
3295 task_resume_internal(
3296         task_suspension_token_t         task)
3297 {
3298         kern_return_t kr;
3299
3300         if (task == TASK_NULL || task == kernel_task) {
3301                 return KERN_INVALID_ARGUMENT;
3302         }
3303
3304         task_lock(task);
3305         kr = release_task_hold(task, TASK_HOLD_NORMAL);
3306         task_unlock(task);
3307         return kr;
3308 }
3309
3310 /*
3311  * Resume the task using a suspension token. Consumes the token's ref.
3312  */
3313 kern_return_t
3314 task_resume2(
3315         task_suspension_token_t         task)
3316 {
3317         kern_return_t kr;
3318
3319         kr = task_resume_internal(task);
3320         task_suspension_token_deallocate(task);
3321
3322         return kr;
3323 }
3324
3325 boolean_t
3326 task_suspension_notify(mach_msg_header_t *request_header)
3327 {
3328         ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
3329         task_t task = convert_port_to_task_suspension_token(port);
3330         mach_msg_type_number_t not_count;
3331
3332         if (task == TASK_NULL || task == kernel_task) {
3333                 return TRUE;  /* nothing to do */
3334         }
3335         switch (request_header->msgh_id) {
3336         case MACH_NOTIFY_SEND_ONCE:
3337                 /* release the hold held by this specific send-once right */
3338                 task_lock(task);
3339                 release_task_hold(task, TASK_HOLD_NORMAL);
3340                 task_unlock(task);
3341                 break;
3342
3343         case MACH_NOTIFY_NO_SENDERS:
3344                 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
3345
3346                 task_lock(task);
3347                 ip_lock(port);
3348                 if (port->ip_mscount == not_count) {
3349                         /* release all the [remaining] outstanding legacy holds */
3350                         assert(port->ip_nsrequest == IP_NULL);
3351                         ip_unlock(port);
3352                         release_task_hold(task, TASK_HOLD_LEGACY_ALL);
3353                         task_unlock(task);
3354                 } else if (port->ip_nsrequest == IP_NULL) {
3355                         ipc_port_t old_notify;
3356
3357                         task_unlock(task);
3358                         /* new send rights, re-arm notification at current make-send count */
3359                         ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3360                         assert(old_notify == IP_NULL);
3361                         /* port unlocked */
3362                 } else {
3363                         ip_unlock(port);
3364                         task_unlock(task);
3365                 }
3366                 break;
3367
3368         default:
3369                 break;
3370         }
3371
3372         task_suspension_token_deallocate(task); /* drop token reference */
3373         return TRUE;
3374 }
3375
3376 kern_return_t
3377 task_pidsuspend_locked(task_t task)
3378 {
3379         kern_return_t kr;
3380
3381         if (task->pidsuspended) {
3382                 kr = KERN_FAILURE;
3383                 goto out;
3384         }
3385
3386         task->pidsuspended = TRUE;
3387
3388         kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
3389         if (kr != KERN_SUCCESS) {
3390                 task->pidsuspended = FALSE;
3391         }
3392 out:
3393         return kr;
3394 }
3395
3396
3397 /*
3398  *      task_pidsuspend:
3399  *
3400  *      Suspends a task by placing a hold on its threads.
3401  *
3402  * Conditions:
3403  *      The caller holds a reference to the task
3404  */
3405 kern_return_t
3406 task_pidsuspend(
3407         task_t          task)
3408 {
3409         kern_return_t    kr;
3410
3411         if (task == TASK_NULL || task == kernel_task) {
3412                 return KERN_INVALID_ARGUMENT;
3413         }
3414
3415         task_lock(task);
3416
3417         kr = task_pidsuspend_locked(task);
3418
3419         task_unlock(task);
3420
3421         return kr;
3422 }
3423
3424 /*
3425  *      task_pidresume:
3426  *              Resumes a previously suspended task.
3427  *
3428  * Conditions:
3429  *              The caller holds a reference to the task
3430  */
3431 kern_return_t
3432 task_pidresume(
3433         task_t  task)
3434 {
3435         kern_return_t    kr;
3436
3437         if (task == TASK_NULL || task == kernel_task) {
3438                 return KERN_INVALID_ARGUMENT;
3439         }
3440
3441         task_lock(task);
3442
3443 #if CONFIG_FREEZE
3444
3445         while (task->changing_freeze_state) {
3446                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3447                 task_unlock(task);
3448                 thread_block(THREAD_CONTINUE_NULL);
3449
3450                 task_lock(task);
3451         }
3452         task->changing_freeze_state = TRUE;
3453 #endif
3454
3455         kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
3456
3457         task_unlock(task);
3458
3459 #if CONFIG_FREEZE
3460
3461         task_lock(task);
3462
3463         if (kr == KERN_SUCCESS) {
3464                 task->frozen = FALSE;
3465         }
3466         task->changing_freeze_state = FALSE;
3467         thread_wakeup(&task->changing_freeze_state);
3468
3469         task_unlock(task);
3470 #endif
3471
3472         return kr;
3473 }
3474
3475
3476 #if DEVELOPMENT || DEBUG
3477
3478 extern void IOSleep(int);
3479
3480 kern_return_t
3481 task_disconnect_page_mappings(task_t task)
3482 {
3483         int     n;
3484
3485         if (task == TASK_NULL || task == kernel_task) {
3486                 return KERN_INVALID_ARGUMENT;
3487         }
3488
3489         /*
3490          * this function is used to strip all of the mappings from
3491          * the pmap for the specified task to force the task to
3492          * re-fault all of the pages it is actively using... this
3493          * allows us to approximate the true working set of the
3494          * specified task.  We only engage if at least 1 of the
3495          * threads in the task is runnable, but we want to continuously
3496          * sweep (at least for a while - I've arbitrarily set the limit at
3497          * 100 sweeps to be re-looked at as we gain experience) to get a better
3498          * view into what areas within a page are being visited (as opposed to only
3499          * seeing the first fault of a page after the task becomes
3500          * runnable)...  in the future I may
3501          * try to block until awakened by a thread in this task
3502          * being made runnable, but for now we'll periodically poll from the
3503          * user level debug tool driving the sysctl
3504          */
3505         for (n = 0; n < 100; n++) {
3506                 thread_t        thread;
3507                 boolean_t       runnable;
3508                 boolean_t       do_unnest;
3509                 int             page_count;
3510
3511                 runnable = FALSE;
3512                 do_unnest = FALSE;
3513
3514                 task_lock(task);
3515
3516                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3517                         if (thread->state & TH_RUN) {
3518                                 runnable = TRUE;
3519                                 break;
3520                         }
3521                 }
3522                 if (n == 0) {
3523                         task->task_disconnected_count++;
3524                 }
3525
3526                 if (task->task_unnested == FALSE) {
3527                         if (runnable == TRUE) {
3528                                 task->task_unnested = TRUE;
3529                                 do_unnest = TRUE;
3530                         }
3531                 }
3532                 task_unlock(task);
3533
3534                 if (runnable == FALSE) {
3535                         break;
3536                 }
3537
3538                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
3539                     task, do_unnest, task->task_disconnected_count, 0, 0);
3540
3541                 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
3542
3543                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
3544                     task, page_count, 0, 0, 0);
3545
3546                 if ((n % 5) == 4) {
3547                         IOSleep(1);
3548                 }
3549         }
3550         return KERN_SUCCESS;
3551 }
3552
3553 #endif
3554
3555
3556 #if CONFIG_FREEZE
3557
3558 /*
3559  *      task_freeze:
3560  *
3561  *      Freeze a task.
3562  *
3563  * Conditions:
3564  *      The caller holds a reference to the task
3565  */
3566 extern void             vm_wake_compactor_swapper(void);
3567 extern queue_head_t     c_swapout_list_head;
3568
3569 kern_return_t
3570 task_freeze(
3571         task_t    task,
3572         uint32_t           *purgeable_count,
3573         uint32_t           *wired_count,
3574         uint32_t           *clean_count,
3575         uint32_t           *dirty_count,
3576         uint32_t           dirty_budget,
3577         uint32_t           *shared_count,
3578         int                *freezer_error_code,
3579         boolean_t          eval_only)
3580 {
3581         kern_return_t kr = KERN_SUCCESS;
3582
3583         if (task == TASK_NULL || task == kernel_task) {
3584                 return KERN_INVALID_ARGUMENT;
3585         }
3586
3587         task_lock(task);
3588
3589         while (task->changing_freeze_state) {
3590                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3591                 task_unlock(task);
3592                 thread_block(THREAD_CONTINUE_NULL);
3593
3594                 task_lock(task);
3595         }
3596         if (task->frozen) {
3597                 task_unlock(task);
3598                 return KERN_FAILURE;
3599         }
3600         task->changing_freeze_state = TRUE;
3601
3602         task_unlock(task);
3603
3604         kr = vm_map_freeze(task->map,
3605             purgeable_count,
3606             wired_count,
3607             clean_count,
3608             dirty_count,
3609             dirty_budget,
3610             shared_count,
3611             freezer_error_code,
3612             eval_only);
3613
3614         task_lock(task);
3615
3616         if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
3617                 task->frozen = TRUE;
3618         }
3619
3620         task->changing_freeze_state = FALSE;
3621         thread_wakeup(&task->changing_freeze_state);
3622
3623         task_unlock(task);
3624
3625         if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
3626             (eval_only == FALSE)) {
3627                 vm_wake_compactor_swapper();
3628                 /*
3629                  * We do an explicit wakeup of the swapout thread here
3630                  * because the compact_and_swap routines don't have
3631                  * knowledge about these kind of "per-task packed c_segs"
3632                  * and so will not be evaluating whether we need to do
3633                  * a wakeup there.
3634                  */
3635                 thread_wakeup((event_t)&c_swapout_list_head);
3636         }
3637
3638         return kr;
3639 }
3640
3641 /*
3642  *      task_thaw:
3643  *
3644  *      Thaw a currently frozen task.
3645  *
3646  * Conditions:
3647  *      The caller holds a reference to the task
3648  */
3649 kern_return_t
3650 task_thaw(
3651         task_t          task)
3652 {
3653         if (task == TASK_NULL || task == kernel_task) {
3654                 return KERN_INVALID_ARGUMENT;
3655         }
3656
3657         task_lock(task);
3658
3659         while (task->changing_freeze_state) {
3660                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3661                 task_unlock(task);
3662                 thread_block(THREAD_CONTINUE_NULL);
3663
3664                 task_lock(task);
3665         }
3666         if (!task->frozen) {
3667                 task_unlock(task);
3668                 return KERN_FAILURE;
3669         }
3670         task->frozen = FALSE;
3671
3672         task_unlock(task);
3673
3674         return KERN_SUCCESS;
3675 }
3676
3677 #endif /* CONFIG_FREEZE */
3678
3679 kern_return_t
3680 host_security_set_task_token(
3681         host_security_t  host_security,
3682         task_t           task,
3683         security_token_t sec_token,
3684         audit_token_t    audit_token,
3685         host_priv_t      host_priv)
3686 {
3687         ipc_port_t       host_port;
3688         kern_return_t    kr;
3689
3690         if (task == TASK_NULL) {
3691                 return KERN_INVALID_ARGUMENT;
3692         }
3693
3694         if (host_security == HOST_NULL) {
3695                 return KERN_INVALID_SECURITY;
3696         }
3697
3698         task_lock(task);
3699         task->sec_token = sec_token;
3700         task->audit_token = audit_token;
3701
3702         task_unlock(task);
3703
3704         if (host_priv != HOST_PRIV_NULL) {
3705                 kr = host_get_host_priv_port(host_priv, &host_port);
3706         } else {
3707                 kr = host_get_host_port(host_priv_self(), &host_port);
3708         }
3709         assert(kr == KERN_SUCCESS);
3710         kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
3711         return kr;
3712 }
3713
3714 kern_return_t
3715 task_send_trace_memory(
3716         __unused task_t   target_task,
3717         __unused uint32_t pid,
3718         __unused uint64_t uniqueid)
3719 {
3720         return KERN_INVALID_ARGUMENT;
3721 }
3722
3723 /*
3724  * This routine was added, pretty much exclusively, for registering the
3725  * RPC glue vector for in-kernel short circuited tasks.  Rather than
3726  * removing it completely, I have only disabled that feature (which was
3727  * the only feature at the time).  It just appears that we are going to
3728  * want to add some user data to tasks in the future (i.e. bsd info,
3729  * task names, etc...), so I left it in the formal task interface.
3730  */
3731 kern_return_t
3732 task_set_info(
3733         task_t          task,
3734         task_flavor_t   flavor,
3735         __unused task_info_t    task_info_in,           /* pointer to IN array */
3736         __unused mach_msg_type_number_t task_info_count)
3737 {
3738         if (task == TASK_NULL) {
3739                 return KERN_INVALID_ARGUMENT;
3740         }
3741
3742         switch (flavor) {
3743 #if CONFIG_ATM
3744         case TASK_TRACE_MEMORY_INFO:
3745         {
3746                 if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT) {
3747                         return KERN_INVALID_ARGUMENT;
3748                 }
3749
3750                 assert(task_info_in != NULL);
3751                 task_trace_memory_info_t mem_info;
3752                 mem_info = (task_trace_memory_info_t) task_info_in;
3753                 kern_return_t kr = atm_register_trace_memory(task,
3754                     mem_info->user_memory_address,
3755                     mem_info->buffer_size);
3756                 return kr;
3757         }
3758
3759 #endif
3760         default:
3761                 return KERN_INVALID_ARGUMENT;
3762         }
3763         return KERN_SUCCESS;
3764 }
3765
3766 int radar_20146450 = 1;
3767 kern_return_t
3768 task_info(
3769         task_t                  task,
3770         task_flavor_t           flavor,
3771         task_info_t             task_info_out,
3772         mach_msg_type_number_t  *task_info_count)
3773 {
3774         kern_return_t error = KERN_SUCCESS;
3775         mach_msg_type_number_t  original_task_info_count;
3776
3777         if (task == TASK_NULL) {
3778                 return KERN_INVALID_ARGUMENT;
3779         }
3780
3781         original_task_info_count = *task_info_count;
3782         task_lock(task);
3783
3784         if ((task != current_task()) && (!task->active)) {
3785                 task_unlock(task);
3786                 return KERN_INVALID_ARGUMENT;
3787         }
3788
3789         switch (flavor) {
3790         case TASK_BASIC_INFO_32:
3791         case TASK_BASIC2_INFO_32:
3792 #if defined(__arm__) || defined(__arm64__)
3793         case TASK_BASIC_INFO_64:
3794 #endif
3795                 {
3796                         task_basic_info_32_t    basic_info;
3797                         vm_map_t                                map;
3798                         clock_sec_t                             secs;
3799                         clock_usec_t                    usecs;
3800
3801                         if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
3802                                 error = KERN_INVALID_ARGUMENT;
3803                                 break;
3804                         }
3805
3806                         basic_info = (task_basic_info_32_t)task_info_out;
3807
3808                         map = (task == kernel_task)? kernel_map: task->map;
3809                         basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
3810                         if (flavor == TASK_BASIC2_INFO_32) {
3811                                 /*
3812                                  * The "BASIC2" flavor gets the maximum resident
3813                                  * size instead of the current resident size...
3814                                  */
3815                                 basic_info->resident_size = pmap_resident_max(map->pmap);
3816                         } else {
3817                                 basic_info->resident_size = pmap_resident_count(map->pmap);
3818                         }
3819                         basic_info->resident_size *= PAGE_SIZE;
3820
3821                         basic_info->policy = ((task != kernel_task)?
3822                             POLICY_TIMESHARE: POLICY_RR);
3823                         basic_info->suspend_count = task->user_stop_count;
3824
3825                         absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3826                         basic_info->user_time.seconds =
3827                             (typeof(basic_info->user_time.seconds))secs;
3828                         basic_info->user_time.microseconds = usecs;
3829
3830                         absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3831                         basic_info->system_time.seconds =
3832                             (typeof(basic_info->system_time.seconds))secs;
3833                         basic_info->system_time.microseconds = usecs;
3834
3835                         *task_info_count = TASK_BASIC_INFO_32_COUNT;
3836                         break;
3837                 }
3838
3839 #if defined(__arm__) || defined(__arm64__)
3840         case TASK_BASIC_INFO_64_2:
3841         {
3842                 task_basic_info_64_2_t  basic_info;
3843                 vm_map_t                                map;
3844                 clock_sec_t                             secs;
3845                 clock_usec_t                    usecs;
3846
3847                 if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
3848                         error = KERN_INVALID_ARGUMENT;
3849                         break;
3850                 }
3851
3852                 basic_info = (task_basic_info_64_2_t)task_info_out;
3853
3854                 map = (task == kernel_task)? kernel_map: task->map;
3855                 basic_info->virtual_size  = map->size;
3856                 basic_info->resident_size =
3857                     (mach_vm_size_t)(pmap_resident_count(map->pmap))
3858                     * PAGE_SIZE_64;
3859
3860                 basic_info->policy = ((task != kernel_task)?
3861                     POLICY_TIMESHARE: POLICY_RR);
3862                 basic_info->suspend_count = task->user_stop_count;
3863
3864                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3865                 basic_info->user_time.seconds =
3866                     (typeof(basic_info->user_time.seconds))secs;
3867                 basic_info->user_time.microseconds = usecs;
3868
3869                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3870                 basic_info->system_time.seconds =
3871                     (typeof(basic_info->system_time.seconds))secs;
3872                 basic_info->system_time.microseconds = usecs;
3873
3874                 *task_info_count = TASK_BASIC_INFO_64_2_COUNT;
3875                 break;
3876         }
3877
3878 #else /* defined(__arm__) || defined(__arm64__) */
3879         case TASK_BASIC_INFO_64:
3880         {
3881                 task_basic_info_64_t    basic_info;
3882                 vm_map_t                                map;
3883                 clock_sec_t                             secs;
3884                 clock_usec_t                    usecs;
3885
3886                 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
3887                         error = KERN_INVALID_ARGUMENT;
3888                         break;
3889                 }
3890
3891                 basic_info = (task_basic_info_64_t)task_info_out;
3892
3893                 map = (task == kernel_task)? kernel_map: task->map;
3894                 basic_info->virtual_size  = map->size;
3895                 basic_info->resident_size =
3896                     (mach_vm_size_t)(pmap_resident_count(map->pmap))
3897                     * PAGE_SIZE_64;
3898
3899                 basic_info->policy = ((task != kernel_task)?
3900                     POLICY_TIMESHARE: POLICY_RR);
3901                 basic_info->suspend_count = task->user_stop_count;
3902
3903                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3904                 basic_info->user_time.seconds =
3905                     (typeof(basic_info->user_time.seconds))secs;
3906                 basic_info->user_time.microseconds = usecs;
3907
3908                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3909                 basic_info->system_time.seconds =
3910                     (typeof(basic_info->system_time.seconds))secs;
3911                 basic_info->system_time.microseconds = usecs;
3912
3913                 *task_info_count = TASK_BASIC_INFO_64_COUNT;
3914                 break;
3915         }
3916 #endif /* defined(__arm__) || defined(__arm64__) */
3917
3918         case MACH_TASK_BASIC_INFO:
3919         {
3920                 mach_task_basic_info_t  basic_info;
3921                 vm_map_t                map;
3922                 clock_sec_t             secs;
3923                 clock_usec_t            usecs;
3924
3925                 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
3926                         error = KERN_INVALID_ARGUMENT;
3927                         break;
3928                 }
3929
3930                 basic_info = (mach_task_basic_info_t)task_info_out;
3931
3932                 map = (task == kernel_task) ? kernel_map : task->map;
3933
3934                 basic_info->virtual_size  = map->size;
3935
3936                 basic_info->resident_size =
3937                     (mach_vm_size_t)(pmap_resident_count(map->pmap));
3938                 basic_info->resident_size *= PAGE_SIZE_64;
3939
3940                 basic_info->resident_size_max =
3941                     (mach_vm_size_t)(pmap_resident_max(map->pmap));
3942                 basic_info->resident_size_max *= PAGE_SIZE_64;
3943
3944                 basic_info->policy = ((task != kernel_task) ?
3945                     POLICY_TIMESHARE : POLICY_RR);
3946
3947                 basic_info->suspend_count = task->user_stop_count;
3948
3949                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3950                 basic_info->user_time.seconds =
3951                     (typeof(basic_info->user_time.seconds))secs;
3952                 basic_info->user_time.microseconds = usecs;
3953
3954                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3955                 basic_info->system_time.seconds =
3956                     (typeof(basic_info->system_time.seconds))secs;
3957                 basic_info->system_time.microseconds = usecs;
3958
3959                 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
3960                 break;
3961         }
3962
3963         case TASK_THREAD_TIMES_INFO:
3964         {
3965                 task_thread_times_info_t        times_info;
3966                 thread_t                                        thread;
3967
3968                 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
3969                         error = KERN_INVALID_ARGUMENT;
3970                         break;
3971                 }
3972
3973                 times_info = (task_thread_times_info_t) task_info_out;
3974                 times_info->user_time.seconds = 0;
3975                 times_info->user_time.microseconds = 0;
3976                 times_info->system_time.seconds = 0;
3977                 times_info->system_time.microseconds = 0;
3978
3979
3980                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3981                         time_value_t    user_time, system_time;
3982
3983                         if (thread->options & TH_OPT_IDLE_THREAD) {
3984                                 continue;
3985                         }
3986
3987                         thread_read_times(thread, &user_time, &system_time, NULL);
3988
3989                         time_value_add(&times_info->user_time, &user_time);
3990                         time_value_add(&times_info->system_time, &system_time);
3991                 }
3992
3993                 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
3994                 break;
3995         }
3996
3997         case TASK_ABSOLUTETIME_INFO:
3998         {
3999                 task_absolutetime_info_t        info;
4000                 thread_t                        thread;
4001
4002                 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
4003                         error = KERN_INVALID_ARGUMENT;
4004                         break;
4005                 }
4006
4007                 info = (task_absolutetime_info_t)task_info_out;
4008                 info->threads_user = info->threads_system = 0;
4009
4010
4011                 info->total_user = task->total_user_time;
4012                 info->total_system = task->total_system_time;
4013
4014                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4015                         uint64_t        tval;
4016                         spl_t           x;
4017
4018                         if (thread->options & TH_OPT_IDLE_THREAD) {
4019                                 continue;
4020                         }
4021
4022                         x = splsched();
4023                         thread_lock(thread);
4024
4025                         tval = timer_grab(&thread->user_timer);
4026                         info->threads_user += tval;
4027                         info->total_user += tval;
4028
4029                         tval = timer_grab(&thread->system_timer);
4030                         if (thread->precise_user_kernel_time) {
4031                                 info->threads_system += tval;
4032                                 info->total_system += tval;
4033                         } else {
4034                                 /* system_timer may represent either sys or user */
4035                                 info->threads_user += tval;
4036                                 info->total_user += tval;
4037                         }
4038
4039                         thread_unlock(thread);
4040                         splx(x);
4041                 }
4042
4043
4044                 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
4045                 break;
4046         }
4047
4048         case TASK_DYLD_INFO:
4049         {
4050                 task_dyld_info_t info;
4051
4052                 /*
4053                  * We added the format field to TASK_DYLD_INFO output.  For
4054                  * temporary backward compatibility, accept the fact that
4055                  * clients may ask for the old version - distinquished by the
4056                  * size of the expected result structure.
4057                  */
4058 #define TASK_LEGACY_DYLD_INFO_COUNT \
4059                 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
4060
4061                 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
4062                         error = KERN_INVALID_ARGUMENT;
4063                         break;
4064                 }
4065
4066                 info = (task_dyld_info_t)task_info_out;
4067                 info->all_image_info_addr = task->all_image_info_addr;
4068                 info->all_image_info_size = task->all_image_info_size;
4069
4070                 /* only set format on output for those expecting it */
4071                 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
4072                         info->all_image_info_format = task_has_64Bit_addr(task) ?
4073                             TASK_DYLD_ALL_IMAGE_INFO_64 :
4074                             TASK_DYLD_ALL_IMAGE_INFO_32;
4075                         *task_info_count = TASK_DYLD_INFO_COUNT;
4076                 } else {
4077                         *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
4078                 }
4079                 break;
4080         }
4081
4082         case TASK_EXTMOD_INFO:
4083         {
4084                 task_extmod_info_t info;
4085                 void *p;
4086
4087                 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
4088                         error = KERN_INVALID_ARGUMENT;
4089                         break;
4090                 }
4091
4092                 info = (task_extmod_info_t)task_info_out;
4093
4094                 p = get_bsdtask_info(task);
4095                 if (p) {
4096                         proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
4097                 } else {
4098                         bzero(info->task_uuid, sizeof(info->task_uuid));
4099                 }
4100                 info->extmod_statistics = task->extmod_statistics;
4101                 *task_info_count = TASK_EXTMOD_INFO_COUNT;
4102
4103                 break;
4104         }
4105
4106         case TASK_KERNELMEMORY_INFO:
4107         {
4108                 task_kernelmemory_info_t        tkm_info;
4109                 ledger_amount_t                 credit, debit;
4110
4111                 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
4112                         error = KERN_INVALID_ARGUMENT;
4113                         break;
4114                 }
4115
4116                 tkm_info = (task_kernelmemory_info_t) task_info_out;
4117                 tkm_info->total_palloc = 0;
4118                 tkm_info->total_pfree = 0;
4119                 tkm_info->total_salloc = 0;
4120                 tkm_info->total_sfree = 0;
4121
4122                 if (task == kernel_task) {
4123                         /*
4124                          * All shared allocs/frees from other tasks count against
4125                          * the kernel private memory usage.  If we are looking up
4126                          * info for the kernel task, gather from everywhere.
4127                          */
4128                         task_unlock(task);
4129
4130                         /* start by accounting for all the terminated tasks against the kernel */
4131                         tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
4132                         tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
4133
4134                         /* count all other task/thread shared alloc/free against the kernel */
4135                         lck_mtx_lock(&tasks_threads_lock);
4136
4137                         /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
4138                         queue_iterate(&tasks, task, task_t, tasks) {
4139                                 if (task == kernel_task) {
4140                                         if (ledger_get_entries(task->ledger,
4141                                             task_ledgers.tkm_private, &credit,
4142                                             &debit) == KERN_SUCCESS) {
4143                                                 tkm_info->total_palloc += credit;
4144                                                 tkm_info->total_pfree += debit;
4145                                         }
4146                                 }
4147                                 if (!ledger_get_entries(task->ledger,
4148                                     task_ledgers.tkm_shared, &credit, &debit)) {
4149                                         tkm_info->total_palloc += credit;
4150                                         tkm_info->total_pfree += debit;
4151                                 }
4152                         }
4153                         lck_mtx_unlock(&tasks_threads_lock);
4154                 } else {
4155                         if (!ledger_get_entries(task->ledger,
4156                             task_ledgers.tkm_private, &credit, &debit)) {
4157                                 tkm_info->total_palloc = credit;
4158                                 tkm_info->total_pfree = debit;
4159                         }
4160                         if (!ledger_get_entries(task->ledger,
4161                             task_ledgers.tkm_shared, &credit, &debit)) {
4162                                 tkm_info->total_salloc = credit;
4163                                 tkm_info->total_sfree = debit;
4164                         }
4165                         task_unlock(task);
4166                 }
4167
4168                 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
4169                 return KERN_SUCCESS;
4170         }
4171
4172         /* OBSOLETE */
4173         case TASK_SCHED_FIFO_INFO:
4174         {
4175                 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
4176                         error = KERN_INVALID_ARGUMENT;
4177                         break;
4178                 }
4179
4180                 error = KERN_INVALID_POLICY;
4181                 break;
4182         }
4183
4184         /* OBSOLETE */
4185         case TASK_SCHED_RR_INFO:
4186         {
4187                 policy_rr_base_t        rr_base;
4188                 uint32_t quantum_time;
4189                 uint64_t quantum_ns;
4190
4191                 if (*task_info_count < POLICY_RR_BASE_COUNT) {
4192                         error = KERN_INVALID_ARGUMENT;
4193                         break;
4194                 }
4195
4196                 rr_base = (policy_rr_base_t) task_info_out;
4197
4198                 if (task != kernel_task) {
4199                         error = KERN_INVALID_POLICY;
4200                         break;
4201                 }
4202
4203                 rr_base->base_priority = task->priority;
4204
4205                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
4206                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
4207
4208                 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
4209
4210                 *task_info_count = POLICY_RR_BASE_COUNT;
4211                 break;
4212         }
4213
4214         /* OBSOLETE */
4215         case TASK_SCHED_TIMESHARE_INFO:
4216         {
4217                 policy_timeshare_base_t ts_base;
4218
4219                 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
4220                         error = KERN_INVALID_ARGUMENT;
4221                         break;
4222                 }
4223
4224                 ts_base = (policy_timeshare_base_t) task_info_out;
4225
4226                 if (task == kernel_task) {
4227                         error = KERN_INVALID_POLICY;
4228                         break;
4229                 }
4230
4231                 ts_base->base_priority = task->priority;
4232
4233                 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
4234                 break;
4235         }
4236
4237         case TASK_SECURITY_TOKEN:
4238         {
4239                 security_token_t        *sec_token_p;
4240
4241                 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
4242                         error = KERN_INVALID_ARGUMENT;
4243                         break;
4244                 }
4245
4246                 sec_token_p = (security_token_t *) task_info_out;
4247
4248                 *sec_token_p = task->sec_token;
4249
4250                 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
4251                 break;
4252         }
4253
4254         case TASK_AUDIT_TOKEN:
4255         {
4256                 audit_token_t   *audit_token_p;
4257
4258                 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
4259                         error = KERN_INVALID_ARGUMENT;
4260                         break;
4261                 }
4262
4263                 audit_token_p = (audit_token_t *) task_info_out;
4264
4265                 *audit_token_p = task->audit_token;
4266
4267                 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
4268                 break;
4269         }
4270
4271         case TASK_SCHED_INFO:
4272                 error = KERN_INVALID_ARGUMENT;
4273                 break;
4274
4275         case TASK_EVENTS_INFO:
4276         {
4277                 task_events_info_t      events_info;
4278                 thread_t                        thread;
4279
4280                 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
4281                         error = KERN_INVALID_ARGUMENT;
4282                         break;
4283                 }
4284
4285                 events_info = (task_events_info_t) task_info_out;
4286
4287
4288                 events_info->faults = task->faults;
4289                 events_info->pageins = task->pageins;
4290                 events_info->cow_faults = task->cow_faults;
4291                 events_info->messages_sent = task->messages_sent;
4292                 events_info->messages_received = task->messages_received;
4293                 events_info->syscalls_mach = task->syscalls_mach;
4294                 events_info->syscalls_unix = task->syscalls_unix;
4295
4296                 events_info->csw = task->c_switch;
4297
4298                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4299                         events_info->csw           += thread->c_switch;
4300                         events_info->syscalls_mach += thread->syscalls_mach;
4301                         events_info->syscalls_unix += thread->syscalls_unix;
4302                 }
4303
4304
4305                 *task_info_count = TASK_EVENTS_INFO_COUNT;
4306                 break;
4307         }
4308         case TASK_AFFINITY_TAG_INFO:
4309         {
4310                 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
4311                         error = KERN_INVALID_ARGUMENT;
4312                         break;
4313                 }
4314
4315                 error = task_affinity_info(task, task_info_out, task_info_count);
4316                 break;
4317         }
4318         case TASK_POWER_INFO:
4319         {
4320                 if (*task_info_count < TASK_POWER_INFO_COUNT) {
4321                         error = KERN_INVALID_ARGUMENT;
4322                         break;
4323                 }
4324
4325                 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL);
4326                 break;
4327         }
4328
4329         case TASK_POWER_INFO_V2:
4330         {
4331                 if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
4332                         error = KERN_INVALID_ARGUMENT;
4333                         break;
4334                 }
4335                 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
4336                 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2);
4337                 break;
4338         }
4339
4340         case TASK_VM_INFO:
4341         case TASK_VM_INFO_PURGEABLE:
4342         {
4343                 task_vm_info_t          vm_info;
4344                 vm_map_t                map;
4345
4346                 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
4347                         error = KERN_INVALID_ARGUMENT;
4348                         break;
4349                 }
4350
4351                 vm_info = (task_vm_info_t)task_info_out;
4352
4353                 if (task == kernel_task) {
4354                         map = kernel_map;
4355                         /* no lock */
4356                 } else {
4357                         map = task->map;
4358                         vm_map_lock_read(map);
4359                 }
4360
4361                 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
4362                 vm_info->region_count = map->hdr.nentries;
4363                 vm_info->page_size = vm_map_page_size(map);
4364
4365                 vm_info->resident_size = pmap_resident_count(map->pmap);
4366                 vm_info->resident_size *= PAGE_SIZE;
4367                 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
4368                 vm_info->resident_size_peak *= PAGE_SIZE;
4369
4370 #define _VM_INFO(_name) \
4371         vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
4372
4373                 _VM_INFO(device);
4374                 _VM_INFO(device_peak);
4375                 _VM_INFO(external);
4376                 _VM_INFO(external_peak);
4377                 _VM_INFO(internal);
4378                 _VM_INFO(internal_peak);
4379                 _VM_INFO(reusable);
4380                 _VM_INFO(reusable_peak);
4381                 _VM_INFO(compressed);
4382                 _VM_INFO(compressed_peak);
4383                 _VM_INFO(compressed_lifetime);
4384
4385                 vm_info->purgeable_volatile_pmap = 0;
4386                 vm_info->purgeable_volatile_resident = 0;
4387                 vm_info->purgeable_volatile_virtual = 0;
4388                 if (task == kernel_task) {
4389                         /*
4390                          * We do not maintain the detailed stats for the
4391                          * kernel_pmap, so just count everything as
4392                          * "internal"...
4393                          */
4394                         vm_info->internal = vm_info->resident_size;
4395                         /*
4396                          * ... but since the memory held by the VM compressor
4397                          * in the kernel address space ought to be attributed
4398                          * to user-space tasks, we subtract it from "internal"
4399                          * to give memory reporting tools a more accurate idea
4400                          * of what the kernel itself is actually using, instead
4401                          * of making it look like the kernel is leaking memory
4402                          * when the system is under memory pressure.
4403                          */
4404                         vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
4405                             PAGE_SIZE);
4406                 } else {
4407                         mach_vm_size_t  volatile_virtual_size;
4408                         mach_vm_size_t  volatile_resident_size;
4409                         mach_vm_size_t  volatile_compressed_size;
4410                         mach_vm_size_t  volatile_pmap_size;
4411                         mach_vm_size_t  volatile_compressed_pmap_size;
4412                         kern_return_t   kr;
4413
4414                         if (flavor == TASK_VM_INFO_PURGEABLE) {
4415                                 kr = vm_map_query_volatile(
4416                                         map,
4417                                         &volatile_virtual_size,
4418                                         &volatile_resident_size,
4419                                         &volatile_compressed_size,
4420                                         &volatile_pmap_size,
4421                                         &volatile_compressed_pmap_size);
4422                                 if (kr == KERN_SUCCESS) {
4423                                         vm_info->purgeable_volatile_pmap =
4424                                             volatile_pmap_size;
4425                                         if (radar_20146450) {
4426                                                 vm_info->compressed -=
4427                                                     volatile_compressed_pmap_size;
4428                                         }
4429                                         vm_info->purgeable_volatile_resident =
4430                                             volatile_resident_size;
4431                                         vm_info->purgeable_volatile_virtual =
4432                                             volatile_virtual_size;
4433                                 }
4434                         }
4435                 }
4436                 *task_info_count = TASK_VM_INFO_REV0_COUNT;
4437
4438                 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
4439                         vm_info->phys_footprint =
4440                             (mach_vm_size_t) get_task_phys_footprint(task);
4441                         *task_info_count = TASK_VM_INFO_REV1_COUNT;
4442                 }
4443                 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
4444                         vm_info->min_address = map->min_offset;
4445                         vm_info->max_address = map->max_offset;
4446                         *task_info_count = TASK_VM_INFO_REV2_COUNT;
4447                 }
4448
4449                 if (task != kernel_task) {
4450                         vm_map_unlock_read(map);
4451                 }
4452
4453                 break;
4454         }
4455
4456         case TASK_WAIT_STATE_INFO:
4457         {
4458                 /*
4459                  * Deprecated flavor. Currently allowing some results until all users
4460                  * stop calling it. The results may not be accurate.
4461                  */
4462                 task_wait_state_info_t  wait_state_info;
4463                 uint64_t total_sfi_ledger_val = 0;
4464
4465                 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
4466                         error = KERN_INVALID_ARGUMENT;
4467                         break;
4468                 }
4469
4470                 wait_state_info = (task_wait_state_info_t) task_info_out;
4471
4472                 wait_state_info->total_wait_state_time = 0;
4473                 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
4474
4475 #if CONFIG_SCHED_SFI
4476                 int i, prev_lentry = -1;
4477                 int64_t  val_credit, val_debit;
4478
4479                 for (i = 0; i < MAX_SFI_CLASS_ID; i++) {
4480                         val_credit = 0;
4481                         /*
4482                          * checking with prev_lentry != entry ensures adjacent classes
4483                          * which share the same ledger do not add wait times twice.
4484                          * Note: Use ledger() call to get data for each individual sfi class.
4485                          */
4486                         if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
4487                             KERN_SUCCESS == ledger_get_entries(task->ledger,
4488                             task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
4489                                 total_sfi_ledger_val += val_credit;
4490                         }
4491                         prev_lentry = task_ledgers.sfi_wait_times[i];
4492                 }
4493
4494 #endif /* CONFIG_SCHED_SFI */
4495                 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
4496                 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
4497
4498                 break;
4499         }
4500         case TASK_VM_INFO_PURGEABLE_ACCOUNT:
4501         {
4502 #if DEVELOPMENT || DEBUG
4503                 pvm_account_info_t      acnt_info;
4504
4505                 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
4506                         error = KERN_INVALID_ARGUMENT;
4507                         break;
4508                 }
4509
4510                 if (task_info_out == NULL) {
4511                         error = KERN_INVALID_ARGUMENT;
4512                         break;
4513                 }
4514
4515                 acnt_info = (pvm_account_info_t) task_info_out;
4516
4517                 error = vm_purgeable_account(task, acnt_info);
4518
4519                 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
4520
4521                 break;
4522 #else /* DEVELOPMENT || DEBUG */
4523                 error = KERN_NOT_SUPPORTED;
4524                 break;
4525 #endif /* DEVELOPMENT || DEBUG */
4526         }
4527         case TASK_FLAGS_INFO:
4528         {
4529                 task_flags_info_t               flags_info;
4530
4531                 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
4532                         error = KERN_INVALID_ARGUMENT;
4533                         break;
4534                 }
4535
4536                 flags_info = (task_flags_info_t)task_info_out;
4537
4538                 /* only publish the 64-bit flag of the task */
4539                 flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA);
4540
4541                 *task_info_count = TASK_FLAGS_INFO_COUNT;
4542                 break;
4543         }
4544
4545         case TASK_DEBUG_INFO_INTERNAL:
4546         {
4547 #if DEVELOPMENT || DEBUG
4548                 task_debug_info_internal_t dbg_info;
4549                 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
4550                         error = KERN_NOT_SUPPORTED;
4551                         break;
4552                 }
4553
4554                 if (task_info_out == NULL) {
4555                         error = KERN_INVALID_ARGUMENT;
4556                         break;
4557                 }
4558                 dbg_info = (task_debug_info_internal_t) task_info_out;
4559                 dbg_info->ipc_space_size = 0;
4560                 if (task->itk_space) {
4561                         dbg_info->ipc_space_size = task->itk_space->is_table_size;
4562                 }
4563
4564                 dbg_info->suspend_count = task->suspend_count;
4565
4566                 error = KERN_SUCCESS;
4567                 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
4568                 break;
4569 #else /* DEVELOPMENT || DEBUG */
4570                 error = KERN_NOT_SUPPORTED;
4571                 break;
4572 #endif /* DEVELOPMENT || DEBUG */
4573         }
4574         default:
4575                 error = KERN_INVALID_ARGUMENT;
4576         }
4577
4578         task_unlock(task);
4579         return error;
4580 }
4581
4582 /*
4583  * task_info_from_user
4584  *
4585  * When calling task_info from user space,
4586  * this function will be executed as mig server side
4587  * instead of calling directly into task_info.
4588  * This gives the possibility to perform more security
4589  * checks on task_port.
4590  *
4591  * In the case of TASK_DYLD_INFO, we require the more
4592  * privileged task_port not the less-privileged task_name_port.
4593  *
4594  */
4595 kern_return_t
4596 task_info_from_user(
4597         mach_port_t             task_port,
4598         task_flavor_t           flavor,
4599         task_info_t             task_info_out,
4600         mach_msg_type_number_t  *task_info_count)
4601 {
4602         task_t task;
4603         kern_return_t ret;
4604
4605         if (flavor == TASK_DYLD_INFO) {
4606                 task = convert_port_to_task(task_port);
4607         } else {
4608                 task = convert_port_to_task_name(task_port);
4609         }
4610
4611         ret = task_info(task, flavor, task_info_out, task_info_count);
4612
4613         task_deallocate(task);
4614
4615         return ret;
4616 }
4617
4618 /*
4619  *      task_power_info
4620  *
4621  *      Returns power stats for the task.
4622  *      Note: Called with task locked.
4623  */
4624 void
4625 task_power_info_locked(
4626         task_t                  task,
4627         task_power_info_t       info,
4628         gpu_energy_data_t       ginfo,
4629         task_power_info_v2_t    infov2)
4630 {
4631         thread_t                thread;
4632         ledger_amount_t         tmp;
4633
4634         task_lock_assert_owned(task);
4635
4636         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
4637             (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
4638         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
4639             (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
4640
4641         info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
4642         info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
4643
4644         info->total_user = task->total_user_time;
4645         info->total_system = task->total_system_time;
4646
4647 #if CONFIG_EMBEDDED
4648         if (infov2) {
4649                 infov2->task_energy = task->task_energy;
4650         }
4651 #endif
4652
4653         if (ginfo) {
4654                 ginfo->task_gpu_utilisation = task->task_gpu_ns;
4655         }
4656
4657         if (infov2) {
4658                 infov2->task_ptime = task->total_ptime;
4659                 infov2->task_pset_switches = task->ps_switch;
4660         }
4661
4662         queue_iterate(&task->threads, thread, thread_t, task_threads) {
4663                 uint64_t        tval;
4664                 spl_t           x;
4665
4666                 if (thread->options & TH_OPT_IDLE_THREAD) {
4667                         continue;
4668                 }
4669
4670                 x = splsched();
4671                 thread_lock(thread);
4672
4673                 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
4674                 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
4675
4676 #if CONFIG_EMBEDDED
4677                 if (infov2) {
4678                         infov2->task_energy += ml_energy_stat(thread);
4679                 }
4680 #endif
4681
4682                 tval = timer_grab(&thread->user_timer);
4683                 info->total_user += tval;
4684
4685                 if (infov2) {
4686                         tval = timer_grab(&thread->ptime);
4687                         infov2->task_ptime += tval;
4688                         infov2->task_pset_switches += thread->ps_switch;
4689                 }
4690
4691                 tval = timer_grab(&thread->system_timer);
4692                 if (thread->precise_user_kernel_time) {
4693                         info->total_system += tval;
4694                 } else {
4695                         /* system_timer may represent either sys or user */
4696                         info->total_user += tval;
4697                 }
4698
4699                 if (ginfo) {
4700                         ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
4701                 }
4702                 thread_unlock(thread);
4703                 splx(x);
4704         }
4705 }
4706
4707 /*
4708  *      task_gpu_utilisation
4709  *
4710  *      Returns the total gpu time used by the all the threads of the task
4711  *  (both dead and alive)
4712  */
4713 uint64_t
4714 task_gpu_utilisation(
4715         task_t  task)
4716 {
4717         uint64_t gpu_time = 0;
4718 #if !CONFIG_EMBEDDED
4719         thread_t thread;
4720
4721         task_lock(task);
4722         gpu_time += task->task_gpu_ns;
4723
4724         queue_iterate(&task->threads, thread, thread_t, task_threads) {
4725                 spl_t x;
4726                 x = splsched();
4727                 thread_lock(thread);
4728                 gpu_time += ml_gpu_stat(thread);
4729                 thread_unlock(thread);
4730                 splx(x);
4731         }
4732
4733         task_unlock(task);
4734 #else /* CONFIG_EMBEDDED */
4735         /* silence compiler warning */
4736         (void)task;
4737 #endif /* !CONFIG_EMBEDDED */
4738         return gpu_time;
4739 }
4740
4741 /*
4742  *      task_energy
4743  *
4744  *      Returns the total energy used by the all the threads of the task
4745  *  (both dead and alive)
4746  */
4747 uint64_t
4748 task_energy(
4749         task_t  task)
4750 {
4751         uint64_t energy = 0;
4752         thread_t thread;
4753
4754         task_lock(task);
4755         energy += task->task_energy;
4756
4757         queue_iterate(&task->threads, thread, thread_t, task_threads) {
4758                 spl_t x;
4759                 x = splsched();
4760                 thread_lock(thread);
4761                 energy += ml_energy_stat(thread);
4762                 thread_unlock(thread);
4763                 splx(x);
4764         }
4765
4766         task_unlock(task);
4767         return energy;
4768 }
4769
4770
4771 uint64_t
4772 task_cpu_ptime(
4773         __unused task_t  task)
4774 {
4775         return 0;
4776 }
4777
4778
4779 /* This function updates the cpu time in the arrays for each
4780  * effective and requested QoS class
4781  */
4782 void
4783 task_update_cpu_time_qos_stats(
4784         task_t  task,
4785         uint64_t *eqos_stats,
4786         uint64_t *rqos_stats)
4787 {
4788         if (!eqos_stats && !rqos_stats) {
4789                 return;
4790         }
4791
4792         task_lock(task);
4793         thread_t thread;
4794         queue_iterate(&task->threads, thread, thread_t, task_threads) {
4795                 if (thread->options & TH_OPT_IDLE_THREAD) {
4796                         continue;
4797                 }
4798
4799                 thread_update_qos_cpu_time(thread);
4800         }
4801
4802         if (eqos_stats) {
4803                 eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
4804                 eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
4805                 eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
4806                 eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
4807                 eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
4808                 eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
4809                 eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
4810         }
4811
4812         if (rqos_stats) {
4813                 rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
4814                 rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
4815                 rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
4816                 rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
4817                 rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
4818                 rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
4819                 rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
4820         }
4821
4822         task_unlock(task);
4823 }
4824
4825 kern_return_t
4826 task_purgable_info(
4827         task_t                  task,
4828         task_purgable_info_t    *stats)
4829 {
4830         if (task == TASK_NULL || stats == NULL) {
4831                 return KERN_INVALID_ARGUMENT;
4832         }
4833         /* Take task reference */
4834         task_reference(task);
4835         vm_purgeable_stats((vm_purgeable_info_t)stats, task);
4836         /* Drop task reference */
4837         task_deallocate(task);
4838         return KERN_SUCCESS;
4839 }
4840
4841 void
4842 task_vtimer_set(
4843         task_t          task,
4844         integer_t       which)
4845 {
4846         thread_t        thread;
4847         spl_t           x;
4848
4849         task_lock(task);
4850
4851         task->vtimers |= which;
4852
4853         switch (which) {
4854         case TASK_VTIMER_USER:
4855                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4856                         x = splsched();
4857                         thread_lock(thread);
4858                         if (thread->precise_user_kernel_time) {
4859                                 thread->vtimer_user_save = timer_grab(&thread->user_timer);
4860                         } else {
4861                                 thread->vtimer_user_save = timer_grab(&thread->system_timer);
4862                         }
4863                         thread_unlock(thread);
4864                         splx(x);
4865                 }
4866                 break;
4867
4868         case TASK_VTIMER_PROF:
4869                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4870                         x = splsched();
4871                         thread_lock(thread);
4872                         thread->vtimer_prof_save = timer_grab(&thread->user_timer);
4873                         thread->vtimer_prof_save += timer_grab(&thread->system_timer);
4874                         thread_unlock(thread);
4875                         splx(x);
4876                 }
4877                 break;
4878
4879         case TASK_VTIMER_RLIM:
4880                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4881                         x = splsched();
4882                         thread_lock(thread);
4883                         thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
4884                         thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
4885                         thread_unlock(thread);
4886                         splx(x);
4887                 }
4888                 break;
4889         }
4890
4891         task_unlock(task);
4892 }
4893
4894 void
4895 task_vtimer_clear(
4896         task_t          task,
4897         integer_t       which)
4898 {
4899         assert(task == current_task());
4900
4901         task_lock(task);
4902
4903         task->vtimers &= ~which;
4904
4905         task_unlock(task);
4906 }
4907
4908 void
4909 task_vtimer_update(
4910         __unused
4911         task_t          task,
4912         integer_t       which,
4913         uint32_t        *microsecs)
4914 {
4915         thread_t        thread = current_thread();
4916         uint32_t        tdelt = 0;
4917         clock_sec_t     secs = 0;
4918         uint64_t        tsum;
4919
4920         assert(task == current_task());
4921
4922         spl_t s = splsched();
4923         thread_lock(thread);
4924
4925         if ((task->vtimers & which) != (uint32_t)which) {
4926                 thread_unlock(thread);
4927                 splx(s);
4928                 return;
4929         }
4930
4931         switch (which) {
4932         case TASK_VTIMER_USER:
4933                 if (thread->precise_user_kernel_time) {
4934                         tdelt = (uint32_t)timer_delta(&thread->user_timer,
4935                             &thread->vtimer_user_save);
4936                 } else {
4937                         tdelt = (uint32_t)timer_delta(&thread->system_timer,
4938                             &thread->vtimer_user_save);
4939                 }
4940                 absolutetime_to_microtime(tdelt, &secs, microsecs);
4941                 break;
4942
4943         case TASK_VTIMER_PROF:
4944                 tsum = timer_grab(&thread->user_timer);
4945                 tsum += timer_grab(&thread->system_timer);
4946                 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
4947                 absolutetime_to_microtime(tdelt, &secs, microsecs);
4948                 /* if the time delta is smaller than a usec, ignore */
4949                 if (*microsecs != 0) {
4950                         thread->vtimer_prof_save = tsum;
4951                 }
4952                 break;
4953
4954         case TASK_VTIMER_RLIM:
4955                 tsum = timer_grab(&thread->user_timer);
4956                 tsum += timer_grab(&thread->system_timer);
4957                 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
4958                 thread->vtimer_rlim_save = tsum;
4959                 absolutetime_to_microtime(tdelt, &secs, microsecs);
4960                 break;
4961         }
4962
4963         thread_unlock(thread);
4964         splx(s);
4965 }
4966
4967 /*
4968  *      task_assign:
4969  *
4970  *      Change the assigned processor set for the task
4971  */
4972 kern_return_t
4973 task_assign(
4974         __unused task_t         task,
4975         __unused processor_set_t        new_pset,
4976         __unused boolean_t      assign_threads)
4977 {
4978         return KERN_FAILURE;
4979 }
4980
4981 /*
4982  *      task_assign_default:
4983  *
4984  *      Version of task_assign to assign to default processor set.
4985  */
4986 kern_return_t
4987 task_assign_default(
4988         task_t          task,
4989         boolean_t       assign_threads)
4990 {
4991         return task_assign(task, &pset0, assign_threads);
4992 }
4993
4994 /*
4995  *      task_get_assignment
4996  *
4997  *      Return name of processor set that task is assigned to.
4998  */
4999 kern_return_t
5000 task_get_assignment(
5001         task_t          task,
5002         processor_set_t *pset)
5003 {
5004         if (!task || !task->active) {
5005                 return KERN_FAILURE;
5006         }
5007
5008         *pset = &pset0;
5009
5010         return KERN_SUCCESS;
5011 }
5012
5013 uint64_t
5014 get_task_dispatchqueue_offset(
5015         task_t          task)
5016 {
5017         return task->dispatchqueue_offset;
5018 }
5019
5020 /*
5021  *      task_policy
5022  *
5023  *      Set scheduling policy and parameters, both base and limit, for
5024  *      the given task. Policy must be a policy which is enabled for the
5025  *      processor set. Change contained threads if requested.
5026  */
5027 kern_return_t
5028 task_policy(
5029         __unused task_t                 task,
5030         __unused policy_t                       policy_id,
5031         __unused policy_base_t          base,
5032         __unused mach_msg_type_number_t count,
5033         __unused boolean_t                      set_limit,
5034         __unused boolean_t                      change)
5035 {
5036         return KERN_FAILURE;
5037 }
5038
5039 /*
5040  *      task_set_policy
5041  *
5042  *      Set scheduling policy and parameters, both base and limit, for
5043  *      the given task. Policy can be any policy implemented by the
5044  *      processor set, whether enabled or not. Change contained threads
5045  *      if requested.
5046  */
5047 kern_return_t
5048 task_set_policy(
5049         __unused task_t                 task,
5050         __unused processor_set_t                pset,
5051         __unused policy_t                       policy_id,
5052         __unused policy_base_t          base,
5053         __unused mach_msg_type_number_t base_count,
5054         __unused policy_limit_t         limit,
5055         __unused mach_msg_type_number_t limit_count,
5056         __unused boolean_t                      change)
5057 {
5058         return KERN_FAILURE;
5059 }
5060
5061 kern_return_t
5062 task_set_ras_pc(
5063         __unused task_t task,
5064         __unused vm_offset_t    pc,
5065         __unused vm_offset_t    endpc)
5066 {
5067         return KERN_FAILURE;
5068 }
5069
5070 void
5071 task_synchronizer_destroy_all(task_t task)
5072 {
5073         /*
5074          *  Destroy owned semaphores
5075          */
5076         semaphore_destroy_all(task);
5077 }
5078
5079 /*
5080  * Install default (machine-dependent) initial thread state
5081  * on the task.  Subsequent thread creation will have this initial
5082  * state set on the thread by machine_thread_inherit_taskwide().
5083  * Flavors and structures are exactly the same as those to thread_set_state()
5084  */
5085 kern_return_t
5086 task_set_state(
5087         task_t task,
5088         int flavor,
5089         thread_state_t state,
5090         mach_msg_type_number_t state_count)
5091 {
5092         kern_return_t ret;
5093
5094         if (task == TASK_NULL) {
5095                 return KERN_INVALID_ARGUMENT;
5096         }
5097
5098         task_lock(task);
5099
5100         if (!task->active) {
5101                 task_unlock(task);
5102                 return KERN_FAILURE;
5103         }
5104
5105         ret = machine_task_set_state(task, flavor, state, state_count);
5106
5107         task_unlock(task);
5108         return ret;
5109 }
5110
5111 /*
5112  * Examine the default (machine-dependent) initial thread state
5113  * on the task, as set by task_set_state().  Flavors and structures
5114  * are exactly the same as those passed to thread_get_state().
5115  */
5116 kern_return_t
5117 task_get_state(
5118         task_t  task,
5119         int     flavor,
5120         thread_state_t state,
5121         mach_msg_type_number_t *state_count)
5122 {
5123         kern_return_t ret;
5124
5125         if (task == TASK_NULL) {
5126                 return KERN_INVALID_ARGUMENT;
5127         }
5128
5129         task_lock(task);
5130
5131         if (!task->active) {
5132                 task_unlock(task);
5133                 return KERN_FAILURE;
5134         }
5135
5136         ret = machine_task_get_state(task, flavor, state, state_count);
5137
5138         task_unlock(task);
5139         return ret;
5140 }
5141
5142
5143 static kern_return_t __attribute__((noinline, not_tail_called))
5144 PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(
5145         mach_exception_code_t code,
5146         mach_exception_subcode_t subcode,
5147         void *reason)
5148 {
5149 #ifdef MACH_BSD
5150         if (1 == proc_selfpid()) {
5151                 return KERN_NOT_SUPPORTED;              // initproc is immune
5152         }
5153 #endif
5154         mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
5155                 [0] = code,
5156                 [1] = subcode,
5157         };
5158         task_t task = current_task();
5159         kern_return_t kr;
5160
5161         /* (See jetsam-related comments below) */
5162
5163         proc_memstat_terminated(task->bsd_info, TRUE);
5164         kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason);
5165         proc_memstat_terminated(task->bsd_info, FALSE);
5166         return kr;
5167 }
5168
5169 kern_return_t
5170 task_violated_guard(
5171         mach_exception_code_t code,
5172         mach_exception_subcode_t subcode,
5173         void *reason)
5174 {
5175         return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code, subcode, reason);
5176 }
5177
5178
5179 #if CONFIG_MEMORYSTATUS
5180
5181 boolean_t
5182 task_get_memlimit_is_active(task_t task)
5183 {
5184         assert(task != NULL);
5185
5186         if (task->memlimit_is_active == 1) {
5187                 return TRUE;
5188         } else {
5189                 return FALSE;
5190         }
5191 }
5192
5193 void
5194 task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
5195 {
5196         assert(task != NULL);
5197
5198         if (memlimit_is_active) {
5199                 task->memlimit_is_active = 1;
5200         } else {
5201                 task->memlimit_is_active = 0;
5202         }
5203 }
5204
5205 boolean_t
5206 task_get_memlimit_is_fatal(task_t task)
5207 {
5208         assert(task != NULL);
5209
5210         if (task->memlimit_is_fatal == 1) {
5211                 return TRUE;
5212         } else {
5213                 return FALSE;
5214         }
5215 }
5216
5217 void
5218 task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
5219 {
5220         assert(task != NULL);
5221
5222         if (memlimit_is_fatal) {
5223                 task->memlimit_is_fatal = 1;
5224         } else {
5225                 task->memlimit_is_fatal = 0;
5226         }
5227 }
5228
5229 boolean_t
5230 task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
5231 {
5232         boolean_t triggered = FALSE;
5233
5234         assert(task == current_task());
5235
5236         /*
5237          * Returns true, if task has already triggered an exc_resource exception.
5238          */
5239
5240         if (memlimit_is_active) {
5241                 triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
5242         } else {
5243                 triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
5244         }
5245
5246         return triggered;
5247 }
5248
5249 void
5250 task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
5251 {
5252         assert(task == current_task());
5253
5254         /*
5255          * We allow one exc_resource per process per active/inactive limit.
5256          * The limit's fatal attribute does not come into play.
5257          */
5258
5259         if (memlimit_is_active) {
5260                 task->memlimit_active_exc_resource = 1;
5261         } else {
5262                 task->memlimit_inactive_exc_resource = 1;
5263         }
5264 }
5265
5266 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
5267
5268 void __attribute__((noinline))
5269 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
5270 {
5271         task_t                                          task            = current_task();
5272         int                                                     pid         = 0;
5273         const char                                      *procname       = "unknown";
5274         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
5275         boolean_t send_sync_exc_resource = FALSE;
5276
5277 #ifdef MACH_BSD
5278         pid = proc_selfpid();
5279
5280         if (pid == 1) {
5281                 /*
5282                  * Cannot have ReportCrash analyzing
5283                  * a suspended initproc.
5284                  */
5285                 return;
5286         }
5287
5288         if (task->bsd_info != NULL) {
5289                 procname = proc_name_address(current_task()->bsd_info);
5290                 send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(current_task()->bsd_info);
5291         }
5292 #endif
5293 #if CONFIG_COREDUMP
5294         if (hwm_user_cores) {
5295                 int                             error;
5296                 uint64_t                starttime, end;
5297                 clock_sec_t             secs = 0;
5298                 uint32_t                microsecs = 0;
5299
5300                 starttime = mach_absolute_time();
5301                 /*
5302                  * Trigger a coredump of this process. Don't proceed unless we know we won't
5303                  * be filling up the disk; and ignore the core size resource limit for this
5304                  * core file.
5305                  */
5306                 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
5307                         printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
5308                 }
5309                 /*
5310                  * coredump() leaves the task suspended.
5311                  */
5312                 task_resume_internal(current_task());
5313
5314                 end = mach_absolute_time();
5315                 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
5316                 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
5317                     proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
5318         }
5319 #endif /* CONFIG_COREDUMP */
5320
5321         if (disable_exc_resource) {
5322                 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
5323                     "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
5324                 return;
5325         }
5326
5327         /*
5328          * A task that has triggered an EXC_RESOURCE, should not be
5329          * jetsammed when the device is under memory pressure.  Here
5330          * we set the P_MEMSTAT_TERMINATED flag so that the process
5331          * will be skipped if the memorystatus_thread wakes up.
5332          */
5333         proc_memstat_terminated(current_task()->bsd_info, TRUE);
5334
5335         code[0] = code[1] = 0;
5336         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
5337         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
5338         EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
5339
5340         /*
5341          * Do not generate a corpse fork if the violation is a fatal one
5342          * or the process wants synchronous EXC_RESOURCE exceptions.
5343          */
5344         if (is_fatal || send_sync_exc_resource || exc_via_corpse_forking == 0) {
5345                 /* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */
5346                 if (send_sync_exc_resource || corpse_for_fatal_memkill == 0) {
5347                         /*
5348                          * Use the _internal_ variant so that no user-space
5349                          * process can resume our task from under us.
5350                          */
5351                         task_suspend_internal(task);
5352                         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5353                         task_resume_internal(task);
5354                 }
5355         } else {
5356                 if (audio_active) {
5357                         printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
5358                             "supressed due to audio playback.\n", procname, pid, max_footprint_mb);
5359                 } else {
5360                         task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
5361                             code, EXCEPTION_CODE_MAX, NULL);
5362                 }
5363         }
5364
5365         /*
5366          * After the EXC_RESOURCE has been handled, we must clear the
5367          * P_MEMSTAT_TERMINATED flag so that the process can again be
5368          * considered for jetsam if the memorystatus_thread wakes up.
5369          */
5370         proc_memstat_terminated(current_task()->bsd_info, FALSE);  /* clear the flag */
5371 }
5372
5373 /*
5374  * Callback invoked when a task exceeds its physical footprint limit.
5375  */
5376 void
5377 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
5378 {
5379         ledger_amount_t max_footprint, max_footprint_mb;
5380         task_t task;
5381         boolean_t is_warning;
5382         boolean_t memlimit_is_active;
5383         boolean_t memlimit_is_fatal;
5384
5385         if (warning == LEDGER_WARNING_DIPPED_BELOW) {
5386                 /*
5387                  * Task memory limits only provide a warning on the way up.
5388                  */
5389                 return;
5390         } else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
5391                 /*
5392                  * This task is in danger of violating a memory limit,
5393                  * It has exceeded a percentage level of the limit.
5394                  */
5395                 is_warning = TRUE;
5396         } else {
5397                 /*
5398                  * The task has exceeded the physical footprint limit.
5399                  * This is not a warning but a true limit violation.
5400                  */
5401                 is_warning = FALSE;
5402         }
5403
5404         task = current_task();
5405
5406         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
5407         max_footprint_mb = max_footprint >> 20;
5408
5409         memlimit_is_active = task_get_memlimit_is_active(task);
5410         memlimit_is_fatal = task_get_memlimit_is_fatal(task);
5411
5412         /*
5413          * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
5414          * We only generate the exception once per process per memlimit (active/inactive limit).
5415          * To enforce this, we monitor state based on the  memlimit's active/inactive attribute
5416          * and we disable it by marking that memlimit as exception triggered.
5417          */
5418         if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
5419                 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
5420                 memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
5421                 task_mark_has_triggered_exc_resource(task, memlimit_is_active);
5422         }
5423
5424         memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
5425 }
5426
5427 extern int proc_check_footprint_priv(void);
5428
5429 kern_return_t
5430 task_set_phys_footprint_limit(
5431         task_t task,
5432         int new_limit_mb,
5433         int *old_limit_mb)
5434 {
5435         kern_return_t error;
5436
5437         boolean_t memlimit_is_active;
5438         boolean_t memlimit_is_fatal;
5439
5440         if ((error = proc_check_footprint_priv())) {
5441                 return KERN_NO_ACCESS;
5442         }
5443
5444         /*
5445          * This call should probably be obsoleted.
5446          * But for now, we default to current state.
5447          */
5448         memlimit_is_active = task_get_memlimit_is_active(task);
5449         memlimit_is_fatal = task_get_memlimit_is_fatal(task);
5450
5451         return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
5452 }
5453
5454 kern_return_t
5455 task_convert_phys_footprint_limit(
5456         int limit_mb,
5457         int *converted_limit_mb)
5458 {
5459         if (limit_mb == -1) {
5460                 /*
5461                  * No limit
5462                  */
5463                 if (max_task_footprint != 0) {
5464                         *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024);   /* bytes to MB */
5465                 } else {
5466                         *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
5467                 }
5468         } else {
5469                 /* nothing to convert */
5470                 *converted_limit_mb = limit_mb;
5471         }
5472         return KERN_SUCCESS;
5473 }
5474
5475
5476 kern_return_t
5477 task_set_phys_footprint_limit_internal(
5478         task_t task,
5479         int new_limit_mb,
5480         int *old_limit_mb,
5481         boolean_t memlimit_is_active,
5482         boolean_t memlimit_is_fatal)
5483 {
5484         ledger_amount_t old;
5485
5486         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
5487
5488         /*
5489          * Check that limit >> 20 will not give an "unexpected" 32-bit
5490          * result. There are, however, implicit assumptions that -1 mb limit
5491          * equates to LEDGER_LIMIT_INFINITY.
5492          */
5493         assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
5494
5495         if (old_limit_mb) {
5496                 *old_limit_mb = (int)(old >> 20);
5497         }
5498
5499         if (new_limit_mb == -1) {
5500                 /*
5501                  * Caller wishes to remove the limit.
5502                  */
5503                 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
5504                     max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
5505                     max_task_footprint ? max_task_footprint_warning_level : 0);
5506
5507                 task_lock(task);
5508                 task_set_memlimit_is_active(task, memlimit_is_active);
5509                 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
5510                 task_unlock(task);
5511
5512                 return KERN_SUCCESS;
5513         }
5514
5515 #ifdef CONFIG_NOMONITORS
5516         return KERN_SUCCESS;
5517 #endif /* CONFIG_NOMONITORS */
5518
5519         task_lock(task);
5520
5521         if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
5522             (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
5523             (((ledger_amount_t)new_limit_mb << 20) == old)) {
5524                 /*
5525                  * memlimit state is not changing
5526                  */
5527                 task_unlock(task);
5528                 return KERN_SUCCESS;
5529         }
5530
5531         task_set_memlimit_is_active(task, memlimit_is_active);
5532         task_set_memlimit_is_fatal(task, memlimit_is_fatal);
5533
5534         ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
5535             (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
5536
5537         if (task == current_task()) {
5538                 ledger_check_new_balance(current_thread(), task->ledger,
5539                     task_ledgers.phys_footprint);
5540         }
5541
5542         task_unlock(task);
5543
5544         return KERN_SUCCESS;
5545 }
5546
5547 kern_return_t
5548 task_get_phys_footprint_limit(
5549         task_t task,
5550         int *limit_mb)
5551 {
5552         ledger_amount_t limit;
5553
5554         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
5555         /*
5556          * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
5557          * result. There are, however, implicit assumptions that -1 mb limit
5558          * equates to LEDGER_LIMIT_INFINITY.
5559          */
5560         assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
5561         *limit_mb = (int)(limit >> 20);
5562
5563         return KERN_SUCCESS;
5564 }
5565 #else /* CONFIG_MEMORYSTATUS */
5566 kern_return_t
5567 task_set_phys_footprint_limit(
5568         __unused task_t task,
5569         __unused int new_limit_mb,
5570         __unused int *old_limit_mb)
5571 {
5572         return KERN_FAILURE;
5573 }
5574
5575 kern_return_t
5576 task_get_phys_footprint_limit(
5577         __unused task_t task,
5578         __unused int *limit_mb)
5579 {
5580         return KERN_FAILURE;
5581 }
5582 #endif /* CONFIG_MEMORYSTATUS */
5583
5584 void
5585 task_set_thread_limit(task_t task, uint16_t thread_limit)
5586 {
5587         assert(task != kernel_task);
5588         if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
5589                 task_lock(task);
5590                 task->task_thread_limit = thread_limit;
5591                 task_unlock(task);
5592         }
5593 }
5594
5595 /*
5596  * We need to export some functions to other components that
5597  * are currently implemented in macros within the osfmk
5598  * component.  Just export them as functions of the same name.
5599  */
5600 boolean_t
5601 is_kerneltask(task_t t)
5602 {
5603         if (t == kernel_task) {
5604                 return TRUE;
5605         }
5606
5607         return FALSE;
5608 }
5609
5610 boolean_t
5611 is_corpsetask(task_t t)
5612 {
5613         return task_is_a_corpse(t);
5614 }
5615
5616 #undef current_task
5617 task_t current_task(void);
5618 task_t
5619 current_task(void)
5620 {
5621         return current_task_fast();
5622 }
5623
5624 #undef task_reference
5625 void task_reference(task_t task);
5626 void
5627 task_reference(
5628         task_t          task)
5629 {
5630         if (task != TASK_NULL) {
5631                 task_reference_internal(task);
5632         }
5633 }
5634
5635 /* defined in bsd/kern/kern_prot.c */
5636 extern int get_audit_token_pid(audit_token_t *audit_token);
5637
5638 int
5639 task_pid(task_t task)
5640 {
5641         if (task) {
5642                 return get_audit_token_pid(&task->audit_token);
5643         }
5644         return -1;
5645 }
5646
5647
5648 /*
5649  * This routine finds a thread in a task by its unique id
5650  * Returns a referenced thread or THREAD_NULL if the thread was not found
5651  *
5652  * TODO: This is super inefficient - it's an O(threads in task) list walk!
5653  *       We should make a tid hash, or transition all tid clients to thread ports
5654  *
5655  * Precondition: No locks held (will take task lock)
5656  */
5657 thread_t
5658 task_findtid(task_t task, uint64_t tid)
5659 {
5660         thread_t self           = current_thread();
5661         thread_t found_thread   = THREAD_NULL;
5662         thread_t iter_thread    = THREAD_NULL;
5663
5664         /* Short-circuit the lookup if we're looking up ourselves */
5665         if (tid == self->thread_id || tid == TID_NULL) {
5666                 assert(self->task == task);
5667
5668                 thread_reference(self);
5669
5670                 return self;
5671         }
5672
5673         task_lock(task);
5674
5675         queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
5676                 if (iter_thread->thread_id == tid) {
5677                         found_thread = iter_thread;
5678                         thread_reference(found_thread);
5679                         break;
5680                 }
5681         }
5682
5683         task_unlock(task);
5684
5685         return found_thread;
5686 }
5687
5688 int
5689 pid_from_task(task_t task)
5690 {
5691         int pid = -1;
5692
5693         if (task->bsd_info) {
5694                 pid = proc_pid(task->bsd_info);
5695         } else {
5696                 pid = task_pid(task);
5697         }
5698
5699         return pid;
5700 }
5701
5702 /*
5703  * Control the CPU usage monitor for a task.
5704  */
5705 kern_return_t
5706 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
5707 {
5708         int error = KERN_SUCCESS;
5709
5710         if (*flags & CPUMON_MAKE_FATAL) {
5711                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
5712         } else {
5713                 error = KERN_INVALID_ARGUMENT;
5714         }
5715
5716         return error;
5717 }
5718
5719 /*
5720  * Control the wakeups monitor for a task.
5721  */
5722 kern_return_t
5723 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
5724 {
5725         ledger_t ledger = task->ledger;
5726
5727         task_lock(task);
5728         if (*flags & WAKEMON_GET_PARAMS) {
5729                 ledger_amount_t limit;
5730                 uint64_t                period;
5731
5732                 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
5733                 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
5734
5735                 if (limit != LEDGER_LIMIT_INFINITY) {
5736                         /*
5737                          * An active limit means the wakeups monitor is enabled.
5738                          */
5739                         *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
5740                         *flags = WAKEMON_ENABLE;
5741                         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
5742                                 *flags |= WAKEMON_MAKE_FATAL;
5743                         }
5744                 } else {
5745                         *flags = WAKEMON_DISABLE;
5746                         *rate_hz = -1;
5747                 }
5748
5749                 /*
5750                  * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
5751                  */
5752                 task_unlock(task);
5753                 return KERN_SUCCESS;
5754         }
5755
5756         if (*flags & WAKEMON_ENABLE) {
5757                 if (*flags & WAKEMON_SET_DEFAULTS) {
5758                         *rate_hz = task_wakeups_monitor_rate;
5759                 }
5760
5761 #ifndef CONFIG_NOMONITORS
5762                 if (*flags & WAKEMON_MAKE_FATAL) {
5763                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5764                 }
5765 #endif /* CONFIG_NOMONITORS */
5766
5767                 if (*rate_hz <= 0) {
5768                         task_unlock(task);
5769                         return KERN_INVALID_ARGUMENT;
5770                 }
5771
5772 #ifndef CONFIG_NOMONITORS
5773                 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
5774                     task_wakeups_monitor_ustackshots_trigger_pct);
5775                 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
5776                 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
5777 #endif /* CONFIG_NOMONITORS */
5778         } else if (*flags & WAKEMON_DISABLE) {
5779                 /*
5780                  * Caller wishes to disable wakeups monitor on the task.
5781                  *
5782                  * Disable telemetry if it was triggered by the wakeups monitor, and
5783                  * remove the limit & callback on the wakeups ledger entry.
5784                  */
5785 #if CONFIG_TELEMETRY
5786                 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
5787 #endif
5788                 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
5789                 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
5790         }
5791
5792         task_unlock(task);
5793         return KERN_SUCCESS;
5794 }
5795
5796 void
5797 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
5798 {
5799         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
5800 #if CONFIG_TELEMETRY
5801                 /*
5802                  * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
5803                  * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
5804                  */
5805                 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
5806 #endif
5807                 return;
5808         }
5809
5810 #if CONFIG_TELEMETRY
5811         /*
5812          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
5813          * exceeded the limit, turn telemetry off for the task.
5814          */
5815         telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
5816 #endif
5817
5818         if (warning == 0) {
5819                 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
5820         }
5821 }
5822
5823 void __attribute__((noinline))
5824 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
5825 {
5826         task_t                      task        = current_task();
5827         int                         pid         = 0;
5828         const char                  *procname   = "unknown";
5829         boolean_t                   fatal;
5830         kern_return_t               kr;
5831 #ifdef EXC_RESOURCE_MONITORS
5832         mach_exception_data_type_t  code[EXCEPTION_CODE_MAX];
5833 #endif /* EXC_RESOURCE_MONITORS */
5834         struct ledger_entry_info    lei;
5835
5836 #ifdef MACH_BSD
5837         pid = proc_selfpid();
5838         if (task->bsd_info != NULL) {
5839                 procname = proc_name_address(current_task()->bsd_info);
5840         }
5841 #endif
5842
5843         ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
5844
5845         /*
5846          * Disable the exception notification so we don't overwhelm
5847          * the listener with an endless stream of redundant exceptions.
5848          * TODO: detect whether another thread is already reporting the violation.
5849          */
5850         uint32_t flags = WAKEMON_DISABLE;
5851         task_wakeups_monitor_ctl(task, &flags, NULL);
5852
5853         fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5854         trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
5855         os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
5856             "over ~%llu seconds, averaging %llu wakes / second and "
5857             "violating a %slimit of %llu wakes over %llu seconds.\n",
5858             procname, pid,
5859             lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
5860             lei.lei_last_refill == 0 ? 0 :
5861             (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
5862             fatal ? "FATAL " : "",
5863             lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
5864
5865         kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
5866             fatal ? kRNFatalLimitFlag : 0);
5867         if (kr) {
5868                 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
5869         }
5870
5871 #ifdef EXC_RESOURCE_MONITORS
5872         if (disable_exc_resource) {
5873                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5874                     "supressed by a boot-arg\n", procname, pid);
5875                 return;
5876         }
5877         if (audio_active) {
5878                 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5879                     "supressed due to audio playback\n", procname, pid);
5880                 return;
5881         }
5882         if (lei.lei_last_refill == 0) {
5883                 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5884                     "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
5885         }
5886
5887         code[0] = code[1] = 0;
5888         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
5889         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
5890         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
5891             NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
5892         EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
5893             lei.lei_last_refill);
5894         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
5895             NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
5896         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5897 #endif /* EXC_RESOURCE_MONITORS */
5898
5899         if (fatal) {
5900                 task_terminate_internal(task);
5901         }
5902 }
5903
5904 static boolean_t
5905 global_update_logical_writes(int64_t io_delta)
5906 {
5907         int64_t old_count, new_count;
5908         boolean_t needs_telemetry;
5909
5910         do {
5911                 new_count = old_count = global_logical_writes_count;
5912                 new_count += io_delta;
5913                 if (new_count >= io_telemetry_limit) {
5914                         new_count = 0;
5915                         needs_telemetry = TRUE;
5916                 } else {
5917                         needs_telemetry = FALSE;
5918                 }
5919         } while (!OSCompareAndSwap64(old_count, new_count, &global_logical_writes_count));
5920         return needs_telemetry;
5921 }
5922
5923 void
5924 task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
5925 {
5926         int64_t io_delta = 0;
5927         boolean_t needs_telemetry = FALSE;
5928
5929         if ((!task) || (!io_size) || (!vp)) {
5930                 return;
5931         }
5932
5933         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
5934             task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
5935         DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
5936         switch (flags) {
5937         case TASK_WRITE_IMMEDIATE:
5938                 OSAddAtomic64(io_size, (SInt64 *)&(task->task_immediate_writes));
5939                 ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5940                 break;
5941         case TASK_WRITE_DEFERRED:
5942                 OSAddAtomic64(io_size, (SInt64 *)&(task->task_deferred_writes));
5943                 ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5944                 break;
5945         case TASK_WRITE_INVALIDATED:
5946                 OSAddAtomic64(io_size, (SInt64 *)&(task->task_invalidated_writes));
5947                 ledger_debit(task->ledger, task_ledgers.logical_writes, io_size);
5948                 break;
5949         case TASK_WRITE_METADATA:
5950                 OSAddAtomic64(io_size, (SInt64 *)&(task->task_metadata_writes));
5951                 ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5952                 break;
5953         }
5954
5955         io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
5956         if (io_telemetry_limit != 0) {
5957                 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
5958                 needs_telemetry = global_update_logical_writes(io_delta);
5959                 if (needs_telemetry) {
5960                         act_set_io_telemetry_ast(current_thread());
5961                 }
5962         }
5963 }
5964
5965 /*
5966  * Control the I/O monitor for a task.
5967  */
5968 kern_return_t
5969 task_io_monitor_ctl(task_t task, uint32_t *flags)
5970 {
5971         ledger_t ledger = task->ledger;
5972
5973         task_lock(task);
5974         if (*flags & IOMON_ENABLE) {
5975                 /* Configure the physical I/O ledger */
5976                 ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
5977                 ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5978
5979                 /* Configure the logical I/O ledger */
5980                 ledger_set_limit(ledger, task_ledgers.logical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
5981                 ledger_set_period(ledger, task_ledgers.logical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5982         } else if (*flags & IOMON_DISABLE) {
5983                 /*
5984                  * Caller wishes to disable I/O monitor on the task.
5985                  */
5986                 ledger_disable_refill(ledger, task_ledgers.physical_writes);
5987                 ledger_disable_callback(ledger, task_ledgers.physical_writes);
5988                 ledger_disable_refill(ledger, task_ledgers.logical_writes);
5989                 ledger_disable_callback(ledger, task_ledgers.logical_writes);
5990         }
5991
5992         task_unlock(task);
5993         return KERN_SUCCESS;
5994 }
5995
5996 void
5997 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
5998 {
5999         if (warning == 0) {
6000                 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
6001         }
6002 }
6003
6004 void __attribute__((noinline))
6005 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
6006 {
6007         int                             pid = 0;
6008         task_t                          task = current_task();
6009 #ifdef EXC_RESOURCE_MONITORS
6010         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
6011 #endif /* EXC_RESOURCE_MONITORS */
6012         struct ledger_entry_info        lei;
6013         kern_return_t                   kr;
6014
6015 #ifdef MACH_BSD
6016         pid = proc_selfpid();
6017 #endif
6018         /*
6019          * Get the ledger entry info. We need to do this before disabling the exception
6020          * to get correct values for all fields.
6021          */
6022         switch (flavor) {
6023         case FLAVOR_IO_PHYSICAL_WRITES:
6024                 ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
6025                 break;
6026         case FLAVOR_IO_LOGICAL_WRITES:
6027                 ledger_get_entry_info(task->ledger, task_ledgers.logical_writes, &lei);
6028                 break;
6029         }
6030
6031
6032         /*
6033          * Disable the exception notification so we don't overwhelm
6034          * the listener with an endless stream of redundant exceptions.
6035          * TODO: detect whether another thread is already reporting the violation.
6036          */
6037         uint32_t flags = IOMON_DISABLE;
6038         task_io_monitor_ctl(task, &flags);
6039
6040         if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
6041                 trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
6042         }
6043         os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
6044             pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
6045
6046         kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
6047         if (kr) {
6048                 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
6049         }
6050
6051 #ifdef EXC_RESOURCE_MONITORS
6052         code[0] = code[1] = 0;
6053         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
6054         EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
6055         EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
6056         EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
6057         EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
6058         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6059 #endif /* EXC_RESOURCE_MONITORS */
6060 }
6061
6062 /* Placeholders for the task set/get voucher interfaces */
6063 kern_return_t
6064 task_get_mach_voucher(
6065         task_t                  task,
6066         mach_voucher_selector_t __unused which,
6067         ipc_voucher_t           *voucher)
6068 {
6069         if (TASK_NULL == task) {
6070                 return KERN_INVALID_TASK;
6071         }
6072
6073         *voucher = NULL;
6074         return KERN_SUCCESS;
6075 }
6076
6077 kern_return_t
6078 task_set_mach_voucher(
6079         task_t                  task,
6080         ipc_voucher_t           __unused voucher)
6081 {
6082         if (TASK_NULL == task) {
6083                 return KERN_INVALID_TASK;
6084         }
6085
6086         return KERN_SUCCESS;
6087 }
6088
6089 kern_return_t
6090 task_swap_mach_voucher(
6091         __unused task_t         task,
6092         __unused ipc_voucher_t  new_voucher,
6093         ipc_voucher_t          *in_out_old_voucher)
6094 {
6095         /*
6096          * Currently this function is only called from a MIG generated
6097          * routine which doesn't release the reference on the voucher
6098          * addressed by in_out_old_voucher. To avoid leaking this reference,
6099          * a call to release it has been added here.
6100          */
6101         ipc_voucher_release(*in_out_old_voucher);
6102         return KERN_NOT_SUPPORTED;
6103 }
6104
6105 void
6106 task_set_gpu_denied(task_t task, boolean_t denied)
6107 {
6108         task_lock(task);
6109
6110         if (denied) {
6111                 task->t_flags |= TF_GPU_DENIED;
6112         } else {
6113                 task->t_flags &= ~TF_GPU_DENIED;
6114         }
6115
6116         task_unlock(task);
6117 }
6118
6119 boolean_t
6120 task_is_gpu_denied(task_t task)
6121 {
6122         /* We don't need the lock to read this flag */
6123         return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
6124 }
6125
6126
6127 uint64_t
6128 get_task_memory_region_count(task_t task)
6129 {
6130         vm_map_t map;
6131         map = (task == kernel_task) ? kernel_map: task->map;
6132         return (uint64_t)get_map_nentries(map);
6133 }
6134
6135 static void
6136 kdebug_trace_dyld_internal(uint32_t base_code,
6137     struct dyld_kernel_image_info *info)
6138 {
6139         static_assert(sizeof(info->uuid) >= 16);
6140
6141 #if defined(__LP64__)
6142         uint64_t *uuid = (uint64_t *)&(info->uuid);
6143
6144         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6145             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
6146             uuid[1], info->load_addr,
6147             (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
6148             0);
6149         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6150             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
6151             (uint64_t)info->fsobjid.fid_objno |
6152             ((uint64_t)info->fsobjid.fid_generation << 32),
6153             0, 0, 0, 0);
6154 #else /* defined(__LP64__) */
6155         uint32_t *uuid = (uint32_t *)&(info->uuid);
6156
6157         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6158             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
6159             uuid[1], uuid[2], uuid[3], 0);
6160         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6161             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
6162             (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
6163             info->fsobjid.fid_objno, 0);
6164         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6165             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
6166             info->fsobjid.fid_generation, 0, 0, 0, 0);
6167 #endif /* !defined(__LP64__) */
6168 }
6169
6170 static kern_return_t
6171 kdebug_trace_dyld(task_t task, uint32_t base_code,
6172     vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
6173 {
6174         kern_return_t kr;
6175         dyld_kernel_image_info_array_t infos;
6176         vm_map_offset_t map_data;
6177         vm_offset_t data;
6178
6179         if (!infos_copy) {
6180                 return KERN_INVALID_ADDRESS;
6181         }
6182
6183         if (!kdebug_enable ||
6184             !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0))) {
6185                 vm_map_copy_discard(infos_copy);
6186                 return KERN_SUCCESS;
6187         }
6188
6189         if (task == NULL || task != current_task()) {
6190                 return KERN_INVALID_TASK;
6191         }
6192
6193         kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
6194         if (kr != KERN_SUCCESS) {
6195                 return kr;
6196         }
6197
6198         infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
6199
6200         for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
6201                 kdebug_trace_dyld_internal(base_code, &(infos[i]));
6202         }
6203
6204         data = CAST_DOWN(vm_offset_t, map_data);
6205         mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
6206         return KERN_SUCCESS;
6207 }
6208
6209 kern_return_t
6210 task_register_dyld_image_infos(task_t task,
6211     dyld_kernel_image_info_array_t infos_copy,
6212     mach_msg_type_number_t infos_len)
6213 {
6214         return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
6215                    (vm_map_copy_t)infos_copy, infos_len);
6216 }
6217
6218 kern_return_t
6219 task_unregister_dyld_image_infos(task_t task,
6220     dyld_kernel_image_info_array_t infos_copy,
6221     mach_msg_type_number_t infos_len)
6222 {
6223         return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
6224                    (vm_map_copy_t)infos_copy, infos_len);
6225 }
6226
6227 kern_return_t
6228 task_get_dyld_image_infos(__unused task_t task,
6229     __unused dyld_kernel_image_info_array_t * dyld_images,
6230     __unused mach_msg_type_number_t * dyld_imagesCnt)
6231 {
6232         return KERN_NOT_SUPPORTED;
6233 }
6234
6235 kern_return_t
6236 task_register_dyld_shared_cache_image_info(task_t task,
6237     dyld_kernel_image_info_t cache_img,
6238     __unused boolean_t no_cache,
6239     __unused boolean_t private_cache)
6240 {
6241         if (task == NULL || task != current_task()) {
6242                 return KERN_INVALID_TASK;
6243         }
6244
6245         kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
6246         return KERN_SUCCESS;
6247 }
6248
6249 kern_return_t
6250 task_register_dyld_set_dyld_state(__unused task_t task,
6251     __unused uint8_t dyld_state)
6252 {
6253         return KERN_NOT_SUPPORTED;
6254 }
6255
6256 kern_return_t
6257 task_register_dyld_get_process_state(__unused task_t task,
6258     __unused dyld_kernel_process_info_t * dyld_process_state)
6259 {
6260         return KERN_NOT_SUPPORTED;
6261 }
6262
6263 kern_return_t
6264 task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
6265     task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
6266 {
6267 #if MONOTONIC
6268         task_t task = (task_t)task_insp;
6269         kern_return_t kr = KERN_SUCCESS;
6270         mach_msg_type_number_t size;
6271
6272         if (task == TASK_NULL) {
6273                 return KERN_INVALID_ARGUMENT;
6274         }
6275
6276         size = *size_in_out;
6277
6278         switch (flavor) {
6279         case TASK_INSPECT_BASIC_COUNTS: {
6280                 struct task_inspect_basic_counts *bc;
6281                 uint64_t task_counts[MT_CORE_NFIXED] = { 0 };
6282
6283                 if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
6284                         kr = KERN_INVALID_ARGUMENT;
6285                         break;
6286                 }
6287
6288                 mt_fixed_task_counts(task, task_counts);
6289                 bc = (struct task_inspect_basic_counts *)info_out;
6290 #ifdef MT_CORE_INSTRS
6291                 bc->instructions = task_counts[MT_CORE_INSTRS];
6292 #else /* defined(MT_CORE_INSTRS) */
6293                 bc->instructions = 0;
6294 #endif /* !defined(MT_CORE_INSTRS) */
6295                 bc->cycles = task_counts[MT_CORE_CYCLES];
6296                 size = TASK_INSPECT_BASIC_COUNTS_COUNT;
6297                 break;
6298         }
6299         default:
6300                 kr = KERN_INVALID_ARGUMENT;
6301                 break;
6302         }
6303
6304         if (kr == KERN_SUCCESS) {
6305                 *size_in_out = size;
6306         }
6307         return kr;
6308 #else /* MONOTONIC */
6309 #pragma unused(task_insp, flavor, info_out, size_in_out)
6310         return KERN_NOT_SUPPORTED;
6311 #endif /* !MONOTONIC */
6312 }
6313
6314 #if CONFIG_SECLUDED_MEMORY
6315 int num_tasks_can_use_secluded_mem = 0;
6316
6317 void
6318 task_set_can_use_secluded_mem(
6319         task_t          task,
6320         boolean_t       can_use_secluded_mem)
6321 {
6322         if (!task->task_could_use_secluded_mem) {
6323                 return;
6324         }
6325         task_lock(task);
6326         task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
6327         task_unlock(task);
6328 }
6329
6330 void
6331 task_set_can_use_secluded_mem_locked(
6332         task_t          task,
6333         boolean_t       can_use_secluded_mem)
6334 {
6335         assert(task->task_could_use_secluded_mem);
6336         if (can_use_secluded_mem &&
6337             secluded_for_apps && /* global boot-arg */
6338             !task->task_can_use_secluded_mem) {
6339                 assert(num_tasks_can_use_secluded_mem >= 0);
6340                 OSAddAtomic(+1,
6341                     (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
6342                 task->task_can_use_secluded_mem = TRUE;
6343         } else if (!can_use_secluded_mem &&
6344             task->task_can_use_secluded_mem) {
6345                 assert(num_tasks_can_use_secluded_mem > 0);
6346                 OSAddAtomic(-1,
6347                     (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
6348                 task->task_can_use_secluded_mem = FALSE;
6349         }
6350 }
6351
6352 void
6353 task_set_could_use_secluded_mem(
6354         task_t          task,
6355         boolean_t       could_use_secluded_mem)
6356 {
6357         task->task_could_use_secluded_mem = could_use_secluded_mem;
6358 }
6359
6360 void
6361 task_set_could_also_use_secluded_mem(
6362         task_t          task,
6363         boolean_t       could_also_use_secluded_mem)
6364 {
6365         task->task_could_also_use_secluded_mem = could_also_use_secluded_mem;
6366 }
6367
6368 boolean_t
6369 task_can_use_secluded_mem(
6370         task_t          task,
6371         boolean_t       is_alloc)
6372 {
6373         if (task->task_can_use_secluded_mem) {
6374                 assert(task->task_could_use_secluded_mem);
6375                 assert(num_tasks_can_use_secluded_mem > 0);
6376                 return TRUE;
6377         }
6378         if (task->task_could_also_use_secluded_mem &&
6379             num_tasks_can_use_secluded_mem > 0) {
6380                 assert(num_tasks_can_use_secluded_mem > 0);
6381                 return TRUE;
6382         }
6383
6384         /*
6385          * If a single task is using more than some amount of
6386          * memory, allow it to dip into secluded and also begin
6387          * suppression of secluded memory until the tasks exits.
6388          */
6389         if (is_alloc && secluded_shutoff_trigger != 0) {
6390                 uint64_t phys_used = get_task_phys_footprint(task);
6391                 if (phys_used > secluded_shutoff_trigger) {
6392                         start_secluded_suppression(task);
6393                         return TRUE;
6394                 }
6395         }
6396
6397         return FALSE;
6398 }
6399
6400 boolean_t
6401 task_could_use_secluded_mem(
6402         task_t  task)
6403 {
6404         return task->task_could_use_secluded_mem;
6405 }
6406 #endif /* CONFIG_SECLUDED_MEMORY */
6407
6408 queue_head_t *
6409 task_io_user_clients(task_t task)
6410 {
6411         return &task->io_user_clients;
6412 }
6413
6414 void
6415 task_copy_fields_for_exec(task_t dst_task, task_t src_task)
6416 {
6417         dst_task->vtimers = src_task->vtimers;
6418 }
6419
6420 #if DEVELOPMENT || DEBUG
6421 int vm_region_footprint = 0;
6422 #endif /* DEVELOPMENT || DEBUG */
6423
6424 boolean_t
6425 task_self_region_footprint(void)
6426 {
6427 #if DEVELOPMENT || DEBUG
6428         if (vm_region_footprint) {
6429                 /* system-wide override */
6430                 return TRUE;
6431         }
6432 #endif /* DEVELOPMENT || DEBUG */
6433         return current_task()->task_region_footprint;
6434 }
6435
6436 void
6437 task_self_region_footprint_set(
6438         boolean_t newval)
6439 {
6440         task_t  curtask;
6441
6442         curtask = current_task();
6443         task_lock(curtask);
6444         if (newval) {
6445                 curtask->task_region_footprint = TRUE;
6446         } else {
6447                 curtask->task_region_footprint = FALSE;
6448         }
6449         task_unlock(curtask);
6450 }
6451
6452 void
6453 task_set_darkwake_mode(task_t task, boolean_t set_mode)
6454 {
6455         assert(task);
6456
6457         task_lock(task);
6458
6459         if (set_mode) {
6460                 task->t_flags |= TF_DARKWAKE_MODE;
6461         } else {
6462                 task->t_flags &= ~(TF_DARKWAKE_MODE);
6463         }
6464
6465         task_unlock(task);
6466 }
6467
6468 boolean_t
6469 task_get_darkwake_mode(task_t task)
6470 {
6471         assert(task);
6472         return (task->t_flags & TF_DARKWAKE_MODE) != 0;
6473 }
6474
6475 #if __arm64__
6476 void
6477 task_set_legacy_footprint(
6478         task_t          task,
6479         boolean_t       new_val)
6480 {
6481         task_lock(task);
6482         task->task_legacy_footprint = new_val;
6483         task_unlock(task);
6484 }
6485 #endif /* __arm64__ */