osfmk/kern/task.c

   1 /*
   2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  *      File:   kern/task.c
  58  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
  59  *              David Black
  60  *
  61  *      Task management primitives implementation.
  62  */
  63 /*
  64  * Copyright (c) 1993 The University of Utah and
  65  * the Computer Systems Laboratory (CSL).  All rights reserved.
  66  *
  67  * Permission to use, copy, modify and distribute this software and its
  68  * documentation is hereby granted, provided that both the copyright
  69  * notice and this permission notice appear in all copies of the
  70  * software, derivative works or modified versions, and any portions
  71  * thereof, and that both notices appear in supporting documentation.
  72  *
  73  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  74  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  75  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  76  *
  77  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  78  * improvements that they make and grant CSL redistribution rights.
  79  *
  80  */
  81 /*
  82  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
  83  * support for mandatory and extensible security protections.  This notice
  84  * is included in support of clause 2.2 (b) of the Apple Public License,
  85  * Version 2.0.
  86  * Copyright (c) 2005 SPARTA, Inc.
  87  */
  88
  89 #include <mach/mach_types.h>
  90 #include <mach/boolean.h>
  91 #include <mach/host_priv.h>
  92 #include <mach/machine/vm_types.h>
  93 #include <mach/vm_param.h>
  94 #include <mach/mach_vm.h>
  95 #include <mach/semaphore.h>
  96 #include <mach/task_info.h>
  97 #include <mach/task_inspect.h>
  98 #include <mach/task_special_ports.h>
  99 #include <mach/sdt.h>
 100
 101 #include <ipc/ipc_importance.h>
 102 #include <ipc/ipc_types.h>
 103 #include <ipc/ipc_space.h>
 104 #include <ipc/ipc_entry.h>
 105 #include <ipc/ipc_hash.h>
 106
 107 #include <kern/kern_types.h>
 108 #include <kern/mach_param.h>
 109 #include <kern/misc_protos.h>
 110 #include <kern/task.h>
 111 #include <kern/thread.h>
 112 #include <kern/coalition.h>
 113 #include <kern/zalloc.h>
 114 #include <kern/kalloc.h>
 115 #include <kern/kern_cdata.h>
 116 #include <kern/processor.h>
 117 #include <kern/sched_prim.h>    /* for thread_wakeup */
 118 #include <kern/ipc_tt.h>
 119 #include <kern/host.h>
 120 #include <kern/clock.h>
 121 #include <kern/timer.h>
 122 #include <kern/assert.h>
 123 #include <kern/sync_lock.h>
 124 #include <kern/affinity.h>
 125 #include <kern/exc_resource.h>
 126 #include <kern/machine.h>
 127 #include <kern/policy_internal.h>
 128 #include <kern/restartable.h>
 129
 130 #include <corpses/task_corpse.h>
 131 #if CONFIG_TELEMETRY
 132 #include <kern/telemetry.h>
 133 #endif
 134
 135 #if MONOTONIC
 136 #include <kern/monotonic.h>
 137 #include <machine/monotonic.h>
 138 #endif /* MONOTONIC */
 139
 140 #include <os/log.h>
 141
 142 #include <vm/pmap.h>
 143 #include <vm/vm_map.h>
 144 #include <vm/vm_kern.h>         /* for kernel_map, ipc_kernel_map */
 145 #include <vm/vm_pageout.h>
 146 #include <vm/vm_protos.h>
 147 #include <vm/vm_purgeable_internal.h>
 148 #include <vm/vm_compressor_pager.h>
 149
 150 #include <sys/resource.h>
 151 #include <sys/signalvar.h> /* for coredump */
 152 #include <sys/bsdtask_info.h>
 153 /*
 154  * Exported interfaces
 155  */
 156
 157 #include <mach/task_server.h>
 158 #include <mach/mach_host_server.h>
 159 #include <mach/host_security_server.h>
 160 #include <mach/mach_port_server.h>
 161
 162 #include <vm/vm_shared_region.h>
 163
 164 #include <libkern/OSDebug.h>
 165 #include <libkern/OSAtomic.h>
 166 #include <libkern/section_keywords.h>
 167
 168 #include <mach-o/loader.h>
 169
 170 #if CONFIG_ATM
 171 #include <atm/atm_internal.h>
 172 #endif
 173
 174 #include <kern/sfi.h>           /* picks up ledger.h */
 175
 176 #if CONFIG_MACF
 177 #include <security/mac_mach_internal.h>
 178 #endif
 179
 180 #if KPERF
 181 extern int kpc_force_all_ctrs(task_t, int);
 182 #endif
 183
 184 task_t                  kernel_task;
 185 zone_t                  task_zone;
 186 lck_attr_t      task_lck_attr;
 187 lck_grp_t       task_lck_grp;
 188 lck_grp_attr_t  task_lck_grp_attr;
 189
 190 extern int exc_via_corpse_forking;
 191 extern int corpse_for_fatal_memkill;
 192 extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
 193
 194 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
 195 int audio_active = 0;
 196
 197 zinfo_usage_store_t tasks_tkm_private;
 198 zinfo_usage_store_t tasks_tkm_shared;
 199
 200 /* A container to accumulate statistics for expired tasks */
 201 expired_task_statistics_t               dead_task_statistics;
 202 lck_spin_t              dead_task_statistics_lock;
 203
 204 ledger_template_t task_ledger_template = NULL;
 205
 206 SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
 207 {.cpu_time = -1,
 208  .tkm_private = -1,
 209  .tkm_shared = -1,
 210  .phys_mem = -1,
 211  .wired_mem = -1,
 212  .internal = -1,
 213  .iokit_mapped = -1,
 214  .alternate_accounting = -1,
 215  .alternate_accounting_compressed = -1,
 216  .page_table = -1,
 217  .phys_footprint = -1,
 218  .internal_compressed = -1,
 219  .purgeable_volatile = -1,
 220  .purgeable_nonvolatile = -1,
 221  .purgeable_volatile_compressed = -1,
 222  .purgeable_nonvolatile_compressed = -1,
 223  .tagged_nofootprint = -1,
 224  .tagged_footprint = -1,
 225  .tagged_nofootprint_compressed = -1,
 226  .tagged_footprint_compressed = -1,
 227  .network_volatile = -1,
 228  .network_nonvolatile = -1,
 229  .network_volatile_compressed = -1,
 230  .network_nonvolatile_compressed = -1,
 231  .media_nofootprint = -1,
 232  .media_footprint = -1,
 233  .media_nofootprint_compressed = -1,
 234  .media_footprint_compressed = -1,
 235  .graphics_nofootprint = -1,
 236  .graphics_footprint = -1,
 237  .graphics_nofootprint_compressed = -1,
 238  .graphics_footprint_compressed = -1,
 239  .neural_nofootprint = -1,
 240  .neural_footprint = -1,
 241  .neural_nofootprint_compressed = -1,
 242  .neural_footprint_compressed = -1,
 243  .platform_idle_wakeups = -1,
 244  .interrupt_wakeups = -1,
 245 #if !CONFIG_EMBEDDED
 246  .sfi_wait_times = { 0 /* initialized at runtime */},
 247 #endif /* !CONFIG_EMBEDDED */
 248  .cpu_time_billed_to_me = -1,
 249  .cpu_time_billed_to_others = -1,
 250  .physical_writes = -1,
 251  .logical_writes = -1,
 252  .logical_writes_to_external = -1,
 253 #if DEBUG || DEVELOPMENT
 254  .pages_grabbed = -1,
 255  .pages_grabbed_kern = -1,
 256  .pages_grabbed_iopl = -1,
 257  .pages_grabbed_upl = -1,
 258 #endif
 259  .energy_billed_to_me = -1,
 260  .energy_billed_to_others = -1};
 261
 262 /* System sleep state */
 263 boolean_t tasks_suspend_state;
 264
 265
 266 void init_task_ledgers(void);
 267 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 268 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 269 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
 270 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
 271 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
 272 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
 273
 274 kern_return_t task_suspend_internal(task_t);
 275 kern_return_t task_resume_internal(task_t);
 276 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
 277
 278 extern kern_return_t iokit_task_terminate(task_t task);
 279 extern void          iokit_task_app_suspended_changed(task_t task);
 280
 281 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
 282 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
 283 extern kern_return_t thread_resume(thread_t thread);
 284
 285 // Warn tasks when they hit 80% of their memory limit.
 286 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
 287
 288 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT              150 /* wakeups per second */
 289 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL   300 /* in seconds. */
 290
 291 /*
 292  * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
 293  *
 294  * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
 295  *  stacktraces, aka micro-stackshots)
 296  */
 297 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER        70
 298
 299 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
 300 int task_wakeups_monitor_rate;     /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
 301
 302 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 303
 304 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
 305
 306 ledger_amount_t max_task_footprint = 0;  /* Per-task limit on physical memory consumption in bytes     */
 307 int max_task_footprint_warning_level = 0;  /* Per-task limit warning percentage */
 308 int max_task_footprint_mb = 0;  /* Per-task limit on physical memory consumption in megabytes */
 309
 310 /* I/O Monitor Limits */
 311 #define IOMON_DEFAULT_LIMIT                     (20480ull)      /* MB of logical/physical I/O */
 312 #define IOMON_DEFAULT_INTERVAL                  (86400ull)      /* in seconds */
 313
 314 uint64_t task_iomon_limit_mb;           /* Per-task I/O monitor limit in MBs */
 315 uint64_t task_iomon_interval_secs;      /* Per-task I/O monitor interval in secs */
 316
 317 #define IO_TELEMETRY_DEFAULT_LIMIT              (10ll * 1024ll * 1024ll)
 318 int64_t io_telemetry_limit;                     /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
 319 int64_t global_logical_writes_count = 0;        /* Global count for logical writes */
 320 int64_t global_logical_writes_to_external_count = 0;        /* Global count for logical writes to external storage*/
 321 static boolean_t global_update_logical_writes(int64_t, int64_t*);
 322
 323 #define TASK_MAX_THREAD_LIMIT 256
 324
 325 #if MACH_ASSERT
 326 int pmap_ledgers_panic = 1;
 327 int pmap_ledgers_panic_leeway = 3;
 328 #endif /* MACH_ASSERT */
 329
 330 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
 331
 332 #if CONFIG_COREDUMP
 333 int hwm_user_cores = 0; /* high watermark violations generate user core files */
 334 #endif
 335
 336 #ifdef MACH_BSD
 337 extern uint32_t proc_platform(struct proc *);
 338 extern uint32_t proc_sdk(struct proc *);
 339 extern void     proc_getexecutableuuid(void *, unsigned char *, unsigned long);
 340 extern int      proc_pid(struct proc *p);
 341 extern int      proc_selfpid(void);
 342 extern struct proc *current_proc(void);
 343 extern char     *proc_name_address(struct proc *p);
 344 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 345 extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, int bufsize);
 346 extern void workq_proc_suspended(struct proc *p);
 347 extern void workq_proc_resumed(struct proc *p);
 348
 349 #if CONFIG_MEMORYSTATUS
 350 extern void     proc_memstat_terminated(struct proc* p, boolean_t set);
 351 extern void     memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
 352 extern void     memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
 353 extern boolean_t memorystatus_allowed_vm_map_fork(task_t task);
 354 extern uint64_t  memorystatus_available_memory_internal(proc_t p);
 355
 356 #if DEVELOPMENT || DEBUG
 357 extern void memorystatus_abort_vm_map_fork(task_t);
 358 #endif
 359
 360 #endif /* CONFIG_MEMORYSTATUS */
 361
 362 #endif /* MACH_BSD */
 363
 364 #if DEVELOPMENT || DEBUG
 365 int exc_resource_threads_enabled;
 366 #endif /* DEVELOPMENT || DEBUG */
 367
 368 #if (DEVELOPMENT || DEBUG)
 369 uint32_t task_exc_guard_default = TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_ONCE | TASK_EXC_GUARD_MP_CORPSE |
 370     TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_ONCE | TASK_EXC_GUARD_VM_CORPSE;
 371 #else
 372 uint32_t task_exc_guard_default = 0;
 373 #endif
 374
 375 /* Forwards */
 376
 377 static void task_hold_locked(task_t task);
 378 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
 379 static void task_release_locked(task_t task);
 380
 381 static void task_synchronizer_destroy_all(task_t task);
 382 static os_ref_count_t
 383 task_add_turnstile_watchports_locked(
 384         task_t                      task,
 385         struct task_watchports      *watchports,
 386         struct task_watchport_elem  **previous_elem_array,
 387         ipc_port_t                  *portwatch_ports,
 388         uint32_t                    portwatch_count);
 389
 390 static os_ref_count_t
 391 task_remove_turnstile_watchports_locked(
 392         task_t                 task,
 393         struct task_watchports *watchports,
 394         ipc_port_t             *port_freelist);
 395
 396 static struct task_watchports *
 397 task_watchports_alloc_init(
 398         task_t        task,
 399         thread_t      thread,
 400         uint32_t      count);
 401
 402 static void
 403 task_watchports_deallocate(
 404         struct task_watchports *watchports);
 405
 406 void
 407 task_set_64bit(
 408         task_t task,
 409         boolean_t is_64bit,
 410         boolean_t is_64bit_data)
 411 {
 412 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
 413         thread_t thread;
 414 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
 415
 416         task_lock(task);
 417
 418         /*
 419          * Switching to/from 64-bit address spaces
 420          */
 421         if (is_64bit) {
 422                 if (!task_has_64Bit_addr(task)) {
 423                         task_set_64Bit_addr(task);
 424                 }
 425         } else {
 426                 if (task_has_64Bit_addr(task)) {
 427                         task_clear_64Bit_addr(task);
 428                 }
 429         }
 430
 431         /*
 432          * Switching to/from 64-bit register state.
 433          */
 434         if (is_64bit_data) {
 435                 if (task_has_64Bit_data(task)) {
 436                         goto out;
 437                 }
 438
 439                 task_set_64Bit_data(task);
 440         } else {
 441                 if (!task_has_64Bit_data(task)) {
 442                         goto out;
 443                 }
 444
 445                 task_clear_64Bit_data(task);
 446         }
 447
 448         /* FIXME: On x86, the thread save state flavor can diverge from the
 449          * task's 64-bit feature flag due to the 32-bit/64-bit register save
 450          * state dichotomy. Since we can be pre-empted in this interval,
 451          * certain routines may observe the thread as being in an inconsistent
 452          * state with respect to its task's 64-bitness.
 453          */
 454
 455 #if defined(__x86_64__) || defined(__arm64__)
 456         queue_iterate(&task->threads, thread, thread_t, task_threads) {
 457                 thread_mtx_lock(thread);
 458                 machine_thread_switch_addrmode(thread);
 459                 thread_mtx_unlock(thread);
 460
 461 #if defined(__arm64__)
 462                 /* specifically, if running on H9 */
 463                 if (thread == current_thread()) {
 464                         uint64_t arg1, arg2;
 465                         int urgency;
 466                         spl_t spl = splsched();
 467                         /*
 468                          * This call tell that the current thread changed it's 32bitness.
 469                          * Other thread were no more on core when 32bitness was changed,
 470                          * but current_thread() is on core and the previous call to
 471                          * machine_thread_going_on_core() gave 32bitness which is now wrong.
 472                          *
 473                          * This is needed for bring-up, a different callback should be used
 474                          * in the future.
 475                          *
 476                          * TODO: Remove this callout when we no longer support 32-bit code on H9
 477                          */
 478                         thread_lock(thread);
 479                         urgency = thread_get_urgency(thread, &arg1, &arg2);
 480                         machine_thread_going_on_core(thread, urgency, 0, 0, mach_approximate_time());
 481                         thread_unlock(thread);
 482                         splx(spl);
 483                 }
 484 #endif /* defined(__arm64__) */
 485         }
 486 #endif /* defined(__x86_64__) || defined(__arm64__) */
 487
 488 out:
 489         task_unlock(task);
 490 }
 491
 492 boolean_t
 493 task_get_64bit_data(task_t task)
 494 {
 495         return task_has_64Bit_data(task);
 496 }
 497
 498 void
 499 task_set_platform_binary(
 500         task_t task,
 501         boolean_t is_platform)
 502 {
 503         task_lock(task);
 504         if (is_platform) {
 505                 task->t_flags |= TF_PLATFORM;
 506                 /* set exc guard default behavior for first-party code */
 507                 task->task_exc_guard = (task_exc_guard_default & TASK_EXC_GUARD_ALL);
 508         } else {
 509                 task->t_flags &= ~(TF_PLATFORM);
 510                 /* set exc guard default behavior for third-party code */
 511                 task->task_exc_guard = ((task_exc_guard_default >> TASK_EXC_GUARD_THIRD_PARTY_DEFAULT_SHIFT) & TASK_EXC_GUARD_ALL);
 512         }
 513         task_unlock(task);
 514 }
 515
 516 /*
 517  * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
 518  * Returns "false" if flag is already set, and "true" in other cases.
 519  */
 520 bool
 521 task_set_ca_client_wi(
 522         task_t task,
 523         boolean_t set_or_clear)
 524 {
 525         bool ret = true;
 526         task_lock(task);
 527         if (set_or_clear) {
 528                 /* Tasks can have only one CA_CLIENT work interval */
 529                 if (task->t_flags & TF_CA_CLIENT_WI) {
 530                         ret = false;
 531                 } else {
 532                         task->t_flags |= TF_CA_CLIENT_WI;
 533                 }
 534         } else {
 535                 task->t_flags &= ~TF_CA_CLIENT_WI;
 536         }
 537         task_unlock(task);
 538         return ret;
 539 }
 540
 541 void
 542 task_set_dyld_info(
 543         task_t task,
 544         mach_vm_address_t addr,
 545         mach_vm_size_t size)
 546 {
 547         task_lock(task);
 548         task->all_image_info_addr = addr;
 549         task->all_image_info_size = size;
 550         task_unlock(task);
 551 }
 552
 553 void
 554 task_set_mach_header_address(
 555         task_t task,
 556         mach_vm_address_t addr)
 557 {
 558         task_lock(task);
 559         task->mach_header_vm_address = addr;
 560         task_unlock(task);
 561 }
 562
 563 void
 564 task_atm_reset(__unused task_t task)
 565 {
 566 #if CONFIG_ATM
 567         if (task->atm_context != NULL) {
 568                 atm_task_descriptor_destroy(task->atm_context);
 569                 task->atm_context = NULL;
 570         }
 571 #endif
 572 }
 573
 574 void
 575 task_bank_reset(__unused task_t task)
 576 {
 577         if (task->bank_context != NULL) {
 578                 bank_task_destroy(task);
 579         }
 580 }
 581
 582 /*
 583  * NOTE: This should only be called when the P_LINTRANSIT
 584  *       flag is set (the proc_trans lock is held) on the
 585  *       proc associated with the task.
 586  */
 587 void
 588 task_bank_init(__unused task_t task)
 589 {
 590         if (task->bank_context != NULL) {
 591                 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
 592         }
 593         bank_task_initialize(task);
 594 }
 595
 596 void
 597 task_set_did_exec_flag(task_t task)
 598 {
 599         task->t_procflags |= TPF_DID_EXEC;
 600 }
 601
 602 void
 603 task_clear_exec_copy_flag(task_t task)
 604 {
 605         task->t_procflags &= ~TPF_EXEC_COPY;
 606 }
 607
 608 event_t
 609 task_get_return_wait_event(task_t task)
 610 {
 611         return (event_t)&task->returnwait_inheritor;
 612 }
 613
 614 void
 615 task_clear_return_wait(task_t task, uint32_t flags)
 616 {
 617         if (flags & TCRW_CLEAR_INITIAL_WAIT) {
 618                 thread_wakeup(task_get_return_wait_event(task));
 619         }
 620
 621         if (flags & TCRW_CLEAR_FINAL_WAIT) {
 622                 is_write_lock(task->itk_space);
 623
 624                 task->t_returnwaitflags &= ~TRW_LRETURNWAIT;
 625                 task->returnwait_inheritor = NULL;
 626
 627                 if (task->t_returnwaitflags & TRW_LRETURNWAITER) {
 628                         struct turnstile *turnstile = turnstile_prepare((uintptr_t) task_get_return_wait_event(task),
 629                             NULL, TURNSTILE_NULL, TURNSTILE_ULOCK);
 630
 631                         waitq_wakeup64_all(&turnstile->ts_waitq,
 632                             CAST_EVENT64_T(task_get_return_wait_event(task)),
 633                             THREAD_AWAKENED, 0);
 634
 635                         turnstile_update_inheritor(turnstile, NULL,
 636                             TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD);
 637                         turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_HELD);
 638
 639                         turnstile_complete((uintptr_t) task_get_return_wait_event(task), NULL, NULL, TURNSTILE_ULOCK);
 640                         turnstile_cleanup();
 641                         task->t_returnwaitflags &= ~TRW_LRETURNWAITER;
 642                 }
 643                 is_write_unlock(task->itk_space);
 644         }
 645 }
 646
 647 void __attribute__((noreturn))
 648 task_wait_to_return(void)
 649 {
 650         task_t task = current_task();
 651
 652         is_write_lock(task->itk_space);
 653
 654         if (task->t_returnwaitflags & TRW_LRETURNWAIT) {
 655                 struct turnstile *turnstile = turnstile_prepare((uintptr_t) task_get_return_wait_event(task),
 656                     NULL, TURNSTILE_NULL, TURNSTILE_ULOCK);
 657
 658                 do {
 659                         task->t_returnwaitflags |= TRW_LRETURNWAITER;
 660                         turnstile_update_inheritor(turnstile, task->returnwait_inheritor,
 661                             (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
 662
 663                         waitq_assert_wait64(&turnstile->ts_waitq,
 664                             CAST_EVENT64_T(task_get_return_wait_event(task)),
 665                             THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
 666
 667                         is_write_unlock(task->itk_space);
 668
 669                         turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
 670
 671                         thread_block(THREAD_CONTINUE_NULL);
 672
 673                         is_write_lock(task->itk_space);
 674                 } while (task->t_returnwaitflags & TRW_LRETURNWAIT);
 675
 676                 turnstile_complete((uintptr_t) task_get_return_wait_event(task), NULL, NULL, TURNSTILE_ULOCK);
 677         }
 678
 679         is_write_unlock(task->itk_space);
 680         turnstile_cleanup();
 681
 682
 683 #if CONFIG_MACF
 684         /*
 685          * Before jumping to userspace and allowing this process to execute any code,
 686          * notify any interested parties.
 687          */
 688         mac_proc_notify_exec_complete(current_proc());
 689 #endif
 690
 691         thread_bootstrap_return();
 692 }
 693
 694 #ifdef CONFIG_32BIT_TELEMETRY
 695 boolean_t
 696 task_consume_32bit_log_flag(task_t task)
 697 {
 698         if ((task->t_procflags & TPF_LOG_32BIT_TELEMETRY) != 0) {
 699                 task->t_procflags &= ~TPF_LOG_32BIT_TELEMETRY;
 700                 return TRUE;
 701         } else {
 702                 return FALSE;
 703         }
 704 }
 705
 706 void
 707 task_set_32bit_log_flag(task_t task)
 708 {
 709         task->t_procflags |= TPF_LOG_32BIT_TELEMETRY;
 710 }
 711 #endif /* CONFIG_32BIT_TELEMETRY */
 712
 713 boolean_t
 714 task_is_exec_copy(task_t task)
 715 {
 716         return task_is_exec_copy_internal(task);
 717 }
 718
 719 boolean_t
 720 task_did_exec(task_t task)
 721 {
 722         return task_did_exec_internal(task);
 723 }
 724
 725 boolean_t
 726 task_is_active(task_t task)
 727 {
 728         return task->active;
 729 }
 730
 731 boolean_t
 732 task_is_halting(task_t task)
 733 {
 734         return task->halting;
 735 }
 736
 737 #if TASK_REFERENCE_LEAK_DEBUG
 738 #include <kern/btlog.h>
 739
 740 static btlog_t *task_ref_btlog;
 741 #define TASK_REF_OP_INCR        0x1
 742 #define TASK_REF_OP_DECR        0x2
 743
 744 #define TASK_REF_NUM_RECORDS    100000
 745 #define TASK_REF_BTDEPTH        7
 746
 747 void
 748 task_reference_internal(task_t task)
 749 {
 750         void *       bt[TASK_REF_BTDEPTH];
 751         int             numsaved = 0;
 752
 753         os_ref_retain(&task->ref_count);
 754
 755         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 756         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
 757             bt, numsaved);
 758 }
 759
 760 os_ref_count_t
 761 task_deallocate_internal(task_t task)
 762 {
 763         void *       bt[TASK_REF_BTDEPTH];
 764         int             numsaved = 0;
 765
 766         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 767         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
 768             bt, numsaved);
 769
 770         return os_ref_release(&task->ref_count);
 771 }
 772
 773 #endif /* TASK_REFERENCE_LEAK_DEBUG */
 774
 775 void
 776 task_init(void)
 777 {
 778         lck_grp_attr_setdefault(&task_lck_grp_attr);
 779         lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
 780         lck_attr_setdefault(&task_lck_attr);
 781         lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
 782         lck_mtx_init(&tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
 783
 784         task_zone = zinit(
 785                 sizeof(struct task),
 786                 task_max * sizeof(struct task),
 787                 TASK_CHUNK * sizeof(struct task),
 788                 "tasks");
 789
 790         zone_change(task_zone, Z_NOENCRYPT, TRUE);
 791
 792 #if CONFIG_EMBEDDED
 793         task_watch_init();
 794 #endif /* CONFIG_EMBEDDED */
 795
 796         /*
 797          * Configure per-task memory limit.
 798          * The boot-arg is interpreted as Megabytes,
 799          * and takes precedence over the device tree.
 800          * Setting the boot-arg to 0 disables task limits.
 801          */
 802         if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
 803             sizeof(max_task_footprint_mb))) {
 804                 /*
 805                  * No limit was found in boot-args, so go look in the device tree.
 806                  */
 807                 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
 808                     sizeof(max_task_footprint_mb))) {
 809                         /*
 810                          * No limit was found in device tree.
 811                          */
 812                         max_task_footprint_mb = 0;
 813                 }
 814         }
 815
 816         if (max_task_footprint_mb != 0) {
 817 #if CONFIG_MEMORYSTATUS
 818                 if (max_task_footprint_mb < 50) {
 819                         printf("Warning: max_task_pmem %d below minimum.\n",
 820                             max_task_footprint_mb);
 821                         max_task_footprint_mb = 50;
 822                 }
 823                 printf("Limiting task physical memory footprint to %d MB\n",
 824                     max_task_footprint_mb);
 825
 826                 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
 827
 828                 /*
 829                  * Configure the per-task memory limit warning level.
 830                  * This is computed as a percentage.
 831                  */
 832                 max_task_footprint_warning_level = 0;
 833
 834                 if (max_mem < 0x40000000) {
 835                         /*
 836                          * On devices with < 1GB of memory:
 837                          *    -- set warnings to 50MB below the per-task limit.
 838                          */
 839                         if (max_task_footprint_mb > 50) {
 840                                 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
 841                         }
 842                 } else {
 843                         /*
 844                          * On devices with >= 1GB of memory:
 845                          *    -- set warnings to 100MB below the per-task limit.
 846                          */
 847                         if (max_task_footprint_mb > 100) {
 848                                 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
 849                         }
 850                 }
 851
 852                 /*
 853                  * Never allow warning level to land below the default.
 854                  */
 855                 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
 856                         max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
 857                 }
 858
 859                 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
 860
 861 #else
 862                 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
 863 #endif /* CONFIG_MEMORYSTATUS */
 864         }
 865
 866 #if DEVELOPMENT || DEBUG
 867         if (!PE_parse_boot_argn("exc_resource_threads",
 868             &exc_resource_threads_enabled,
 869             sizeof(exc_resource_threads_enabled))) {
 870                 exc_resource_threads_enabled = 1;
 871         }
 872         PE_parse_boot_argn("task_exc_guard_default",
 873             &task_exc_guard_default,
 874             sizeof(task_exc_guard_default));
 875 #endif /* DEVELOPMENT || DEBUG */
 876
 877 #if CONFIG_COREDUMP
 878         if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
 879             sizeof(hwm_user_cores))) {
 880                 hwm_user_cores = 0;
 881         }
 882 #endif
 883
 884         proc_init_cpumon_params();
 885
 886         if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof(task_wakeups_monitor_rate))) {
 887                 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
 888         }
 889
 890         if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof(task_wakeups_monitor_interval))) {
 891                 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
 892         }
 893
 894         if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
 895             sizeof(task_wakeups_monitor_ustackshots_trigger_pct))) {
 896                 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
 897         }
 898
 899         if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
 900             sizeof(disable_exc_resource))) {
 901                 disable_exc_resource = 0;
 902         }
 903
 904         if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof(task_iomon_limit_mb))) {
 905                 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
 906         }
 907
 908         if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof(task_iomon_interval_secs))) {
 909                 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
 910         }
 911
 912         if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof(io_telemetry_limit))) {
 913                 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
 914         }
 915
 916 /*
 917  * If we have coalitions, coalition_init() will call init_task_ledgers() as it
 918  * sets up the ledgers for the default coalition. If we don't have coalitions,
 919  * then we have to call it now.
 920  */
 921 #if CONFIG_COALITIONS
 922         assert(task_ledger_template);
 923 #else /* CONFIG_COALITIONS */
 924         init_task_ledgers();
 925 #endif /* CONFIG_COALITIONS */
 926
 927 #if TASK_REFERENCE_LEAK_DEBUG
 928         task_ref_btlog = btlog_create(TASK_REF_NUM_RECORDS, TASK_REF_BTDEPTH, TRUE /* caller_will_remove_entries_for_element? */);
 929         assert(task_ref_btlog);
 930 #endif
 931
 932         /*
 933          * Create the kernel task as the first task.
 934          */
 935 #ifdef __LP64__
 936         if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TRUE, TF_NONE, TPF_NONE, TWF_NONE, &kernel_task) != KERN_SUCCESS)
 937 #else
 938         if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, FALSE, TF_NONE, TPF_NONE, TWF_NONE, &kernel_task) != KERN_SUCCESS)
 939 #endif
 940         { panic("task_init\n");}
 941
 942 #if defined(HAS_APPLE_PAC)
 943         kernel_task->rop_pid = KERNEL_ROP_ID;
 944         // kernel_task never runs at EL0, but machine_thread_state_convert_from/to_user() relies on
 945         // disable_user_jop to be false for kernel threads (e.g. in exception delivery on thread_exception_daemon)
 946         ml_task_set_disable_user_jop(kernel_task, FALSE);
 947 #endif
 948
 949         vm_map_deallocate(kernel_task->map);
 950         kernel_task->map = kernel_map;
 951         lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
 952 }
 953
 954 /*
 955  * Create a task running in the kernel address space.  It may
 956  * have its own map of size mem_size and may have ipc privileges.
 957  */
 958 kern_return_t
 959 kernel_task_create(
 960         __unused task_t         parent_task,
 961         __unused vm_offset_t            map_base,
 962         __unused vm_size_t              map_size,
 963         __unused task_t         *child_task)
 964 {
 965         return KERN_INVALID_ARGUMENT;
 966 }
 967
 968 kern_return_t
 969 task_create(
 970         task_t                          parent_task,
 971         __unused ledger_port_array_t    ledger_ports,
 972         __unused mach_msg_type_number_t num_ledger_ports,
 973         __unused boolean_t              inherit_memory,
 974         __unused task_t                 *child_task)    /* OUT */
 975 {
 976         if (parent_task == TASK_NULL) {
 977                 return KERN_INVALID_ARGUMENT;
 978         }
 979
 980         /*
 981          * No longer supported: too many calls assume that a task has a valid
 982          * process attached.
 983          */
 984         return KERN_FAILURE;
 985 }
 986
 987 kern_return_t
 988 host_security_create_task_token(
 989         host_security_t                 host_security,
 990         task_t                          parent_task,
 991         __unused security_token_t       sec_token,
 992         __unused audit_token_t          audit_token,
 993         __unused host_priv_t            host_priv,
 994         __unused ledger_port_array_t    ledger_ports,
 995         __unused mach_msg_type_number_t num_ledger_ports,
 996         __unused boolean_t              inherit_memory,
 997         __unused task_t                 *child_task)    /* OUT */
 998 {
 999         if (parent_task == TASK_NULL) {
1000                 return KERN_INVALID_ARGUMENT;
1001         }
1002
1003         if (host_security == HOST_NULL) {
1004                 return KERN_INVALID_SECURITY;
1005         }
1006
1007         /*
1008          * No longer supported.
1009          */
1010         return KERN_FAILURE;
1011 }
1012
1013 /*
1014  * Task ledgers
1015  * ------------
1016  *
1017  * phys_footprint
1018  *   Physical footprint: This is the sum of:
1019  *     + (internal - alternate_accounting)
1020  *     + (internal_compressed - alternate_accounting_compressed)
1021  *     + iokit_mapped
1022  *     + purgeable_nonvolatile
1023  *     + purgeable_nonvolatile_compressed
1024  *     + page_table
1025  *
1026  * internal
1027  *   The task's anonymous memory, which on iOS is always resident.
1028  *
1029  * internal_compressed
1030  *   Amount of this task's internal memory which is held by the compressor.
1031  *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
1032  *   and could be either decompressed back into memory, or paged out to storage, depending
1033  *   on our implementation.
1034  *
1035  * iokit_mapped
1036  *   IOKit mappings: The total size of all IOKit mappings in this task, regardless of
1037  *    clean/dirty or internal/external state].
1038  *
1039  * alternate_accounting
1040  *   The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
1041  *   are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
1042  *   double counting.
1043  *
1044  * pages_grabbed
1045  *   pages_grabbed counts all page grabs in a task.  It is also broken out into three subtypes
1046  *   which track UPL, IOPL and Kernel page grabs.
1047  */
1048 void
1049 init_task_ledgers(void)
1050 {
1051         ledger_template_t t;
1052
1053         assert(task_ledger_template == NULL);
1054         assert(kernel_task == TASK_NULL);
1055
1056 #if MACH_ASSERT
1057         PE_parse_boot_argn("pmap_ledgers_panic",
1058             &pmap_ledgers_panic,
1059             sizeof(pmap_ledgers_panic));
1060         PE_parse_boot_argn("pmap_ledgers_panic_leeway",
1061             &pmap_ledgers_panic_leeway,
1062             sizeof(pmap_ledgers_panic_leeway));
1063 #endif /* MACH_ASSERT */
1064
1065         if ((t = ledger_template_create("Per-task ledger")) == NULL) {
1066                 panic("couldn't create task ledger template");
1067         }
1068
1069         task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
1070         task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
1071             "physmem", "bytes");
1072         task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
1073             "bytes");
1074         task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
1075             "bytes");
1076         task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
1077             "bytes");
1078         task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
1079             "bytes");
1080         task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
1081             "bytes");
1082         task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
1083             "bytes");
1084         task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
1085             "bytes");
1086         task_ledgers.page_table = ledger_entry_add(t, "page_table", "physmem",
1087             "bytes");
1088         task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
1089             "bytes");
1090         task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
1091             "bytes");
1092         task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
1093         task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
1094         task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
1095         task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
1096 #if DEBUG || DEVELOPMENT
1097         task_ledgers.pages_grabbed = ledger_entry_add(t, "pages_grabbed", "physmem", "count");
1098         task_ledgers.pages_grabbed_kern = ledger_entry_add(t, "pages_grabbed_kern", "physmem", "count");
1099         task_ledgers.pages_grabbed_iopl = ledger_entry_add(t, "pages_grabbed_iopl", "physmem", "count");
1100         task_ledgers.pages_grabbed_upl = ledger_entry_add(t, "pages_grabbed_upl", "physmem", "count");
1101 #endif
1102         task_ledgers.tagged_nofootprint = ledger_entry_add(t, "tagged_nofootprint", "physmem", "bytes");
1103         task_ledgers.tagged_footprint = ledger_entry_add(t, "tagged_footprint", "physmem", "bytes");
1104         task_ledgers.tagged_nofootprint_compressed = ledger_entry_add(t, "tagged_nofootprint_compressed", "physmem", "bytes");
1105         task_ledgers.tagged_footprint_compressed = ledger_entry_add(t, "tagged_footprint_compressed", "physmem", "bytes");
1106         task_ledgers.network_volatile = ledger_entry_add(t, "network_volatile", "physmem", "bytes");
1107         task_ledgers.network_nonvolatile = ledger_entry_add(t, "network_nonvolatile", "physmem", "bytes");
1108         task_ledgers.network_volatile_compressed = ledger_entry_add(t, "network_volatile_compressed", "physmem", "bytes");
1109         task_ledgers.network_nonvolatile_compressed = ledger_entry_add(t, "network_nonvolatile_compressed", "physmem", "bytes");
1110         task_ledgers.media_nofootprint = ledger_entry_add(t, "media_nofootprint", "physmem", "bytes");
1111         task_ledgers.media_footprint = ledger_entry_add(t, "media_footprint", "physmem", "bytes");
1112         task_ledgers.media_nofootprint_compressed = ledger_entry_add(t, "media_nofootprint_compressed", "physmem", "bytes");
1113         task_ledgers.media_footprint_compressed = ledger_entry_add(t, "media_footprint_compressed", "physmem", "bytes");
1114         task_ledgers.graphics_nofootprint = ledger_entry_add(t, "graphics_nofootprint", "physmem", "bytes");
1115         task_ledgers.graphics_footprint = ledger_entry_add(t, "graphics_footprint", "physmem", "bytes");
1116         task_ledgers.graphics_nofootprint_compressed = ledger_entry_add(t, "graphics_nofootprint_compressed", "physmem", "bytes");
1117         task_ledgers.graphics_footprint_compressed = ledger_entry_add(t, "graphics_footprint_compressed", "physmem", "bytes");
1118         task_ledgers.neural_nofootprint = ledger_entry_add(t, "neural_nofootprint", "physmem", "bytes");
1119         task_ledgers.neural_footprint = ledger_entry_add(t, "neural_footprint", "physmem", "bytes");
1120         task_ledgers.neural_nofootprint_compressed = ledger_entry_add(t, "neural_nofootprint_compressed", "physmem", "bytes");
1121         task_ledgers.neural_footprint_compressed = ledger_entry_add(t, "neural_footprint_compressed", "physmem", "bytes");
1122
1123
1124         task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
1125             "count");
1126         task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
1127             "count");
1128
1129 #if CONFIG_SCHED_SFI
1130         sfi_class_id_t class_id, ledger_alias;
1131         for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1132                 task_ledgers.sfi_wait_times[class_id] = -1;
1133         }
1134
1135         /* don't account for UNSPECIFIED */
1136         for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
1137                 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1138                 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1139                         /* Check to see if alias has been registered yet */
1140                         if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
1141                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1142                         } else {
1143                                 /* Otherwise, initialize it first */
1144                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
1145                         }
1146                 } else {
1147                         task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
1148                 }
1149
1150                 if (task_ledgers.sfi_wait_times[class_id] < 0) {
1151                         panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1152                 }
1153         }
1154
1155         assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID - 1] != -1);
1156 #endif /* CONFIG_SCHED_SFI */
1157
1158         task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
1159         task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
1160         task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
1161         task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
1162         task_ledgers.logical_writes_to_external = ledger_entry_add(t, "logical_writes_to_external", "res", "bytes");
1163         task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
1164         task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
1165
1166         if ((task_ledgers.cpu_time < 0) ||
1167             (task_ledgers.tkm_private < 0) ||
1168             (task_ledgers.tkm_shared < 0) ||
1169             (task_ledgers.phys_mem < 0) ||
1170             (task_ledgers.wired_mem < 0) ||
1171             (task_ledgers.internal < 0) ||
1172             (task_ledgers.iokit_mapped < 0) ||
1173             (task_ledgers.alternate_accounting < 0) ||
1174             (task_ledgers.alternate_accounting_compressed < 0) ||
1175             (task_ledgers.page_table < 0) ||
1176             (task_ledgers.phys_footprint < 0) ||
1177             (task_ledgers.internal_compressed < 0) ||
1178             (task_ledgers.purgeable_volatile < 0) ||
1179             (task_ledgers.purgeable_nonvolatile < 0) ||
1180             (task_ledgers.purgeable_volatile_compressed < 0) ||
1181             (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
1182             (task_ledgers.tagged_nofootprint < 0) ||
1183             (task_ledgers.tagged_footprint < 0) ||
1184             (task_ledgers.tagged_nofootprint_compressed < 0) ||
1185             (task_ledgers.tagged_footprint_compressed < 0) ||
1186             (task_ledgers.network_volatile < 0) ||
1187             (task_ledgers.network_nonvolatile < 0) ||
1188             (task_ledgers.network_volatile_compressed < 0) ||
1189             (task_ledgers.network_nonvolatile_compressed < 0) ||
1190             (task_ledgers.media_nofootprint < 0) ||
1191             (task_ledgers.media_footprint < 0) ||
1192             (task_ledgers.media_nofootprint_compressed < 0) ||
1193             (task_ledgers.media_footprint_compressed < 0) ||
1194             (task_ledgers.graphics_nofootprint < 0) ||
1195             (task_ledgers.graphics_footprint < 0) ||
1196             (task_ledgers.graphics_nofootprint_compressed < 0) ||
1197             (task_ledgers.graphics_footprint_compressed < 0) ||
1198             (task_ledgers.neural_nofootprint < 0) ||
1199             (task_ledgers.neural_footprint < 0) ||
1200             (task_ledgers.neural_nofootprint_compressed < 0) ||
1201             (task_ledgers.neural_footprint_compressed < 0) ||
1202             (task_ledgers.platform_idle_wakeups < 0) ||
1203             (task_ledgers.interrupt_wakeups < 0) ||
1204             (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
1205             (task_ledgers.physical_writes < 0) ||
1206             (task_ledgers.logical_writes < 0) ||
1207             (task_ledgers.logical_writes_to_external < 0) ||
1208             (task_ledgers.energy_billed_to_me < 0) ||
1209             (task_ledgers.energy_billed_to_others < 0)
1210             ) {
1211                 panic("couldn't create entries for task ledger template");
1212         }
1213
1214         ledger_track_credit_only(t, task_ledgers.phys_footprint);
1215         ledger_track_credit_only(t, task_ledgers.page_table);
1216         ledger_track_credit_only(t, task_ledgers.internal);
1217         ledger_track_credit_only(t, task_ledgers.internal_compressed);
1218         ledger_track_credit_only(t, task_ledgers.iokit_mapped);
1219         ledger_track_credit_only(t, task_ledgers.alternate_accounting);
1220         ledger_track_credit_only(t, task_ledgers.alternate_accounting_compressed);
1221         ledger_track_credit_only(t, task_ledgers.purgeable_volatile);
1222         ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile);
1223         ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed);
1224         ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed);
1225 #if DEBUG || DEVELOPMENT
1226         ledger_track_credit_only(t, task_ledgers.pages_grabbed);
1227         ledger_track_credit_only(t, task_ledgers.pages_grabbed_kern);
1228         ledger_track_credit_only(t, task_ledgers.pages_grabbed_iopl);
1229         ledger_track_credit_only(t, task_ledgers.pages_grabbed_upl);
1230 #endif
1231         ledger_track_credit_only(t, task_ledgers.tagged_nofootprint);
1232         ledger_track_credit_only(t, task_ledgers.tagged_footprint);
1233         ledger_track_credit_only(t, task_ledgers.tagged_nofootprint_compressed);
1234         ledger_track_credit_only(t, task_ledgers.tagged_footprint_compressed);
1235         ledger_track_credit_only(t, task_ledgers.network_volatile);
1236         ledger_track_credit_only(t, task_ledgers.network_nonvolatile);
1237         ledger_track_credit_only(t, task_ledgers.network_volatile_compressed);
1238         ledger_track_credit_only(t, task_ledgers.network_nonvolatile_compressed);
1239         ledger_track_credit_only(t, task_ledgers.media_nofootprint);
1240         ledger_track_credit_only(t, task_ledgers.media_footprint);
1241         ledger_track_credit_only(t, task_ledgers.media_nofootprint_compressed);
1242         ledger_track_credit_only(t, task_ledgers.media_footprint_compressed);
1243         ledger_track_credit_only(t, task_ledgers.graphics_nofootprint);
1244         ledger_track_credit_only(t, task_ledgers.graphics_footprint);
1245         ledger_track_credit_only(t, task_ledgers.graphics_nofootprint_compressed);
1246         ledger_track_credit_only(t, task_ledgers.graphics_footprint_compressed);
1247         ledger_track_credit_only(t, task_ledgers.neural_nofootprint);
1248         ledger_track_credit_only(t, task_ledgers.neural_footprint);
1249         ledger_track_credit_only(t, task_ledgers.neural_nofootprint_compressed);
1250         ledger_track_credit_only(t, task_ledgers.neural_footprint_compressed);
1251
1252         ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
1253 #if MACH_ASSERT
1254         if (pmap_ledgers_panic) {
1255                 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
1256                 ledger_panic_on_negative(t, task_ledgers.page_table);
1257                 ledger_panic_on_negative(t, task_ledgers.internal);
1258                 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
1259                 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1260                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1261                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1262                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1263                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1264                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1265                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1266
1267                 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint);
1268                 ledger_panic_on_negative(t, task_ledgers.tagged_footprint);
1269                 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint_compressed);
1270                 ledger_panic_on_negative(t, task_ledgers.tagged_footprint_compressed);
1271                 ledger_panic_on_negative(t, task_ledgers.network_volatile);
1272                 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1273                 ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1274                 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
1275                 ledger_panic_on_negative(t, task_ledgers.media_nofootprint);
1276                 ledger_panic_on_negative(t, task_ledgers.media_footprint);
1277                 ledger_panic_on_negative(t, task_ledgers.media_nofootprint_compressed);
1278                 ledger_panic_on_negative(t, task_ledgers.media_footprint_compressed);
1279                 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint);
1280                 ledger_panic_on_negative(t, task_ledgers.graphics_footprint);
1281                 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint_compressed);
1282                 ledger_panic_on_negative(t, task_ledgers.graphics_footprint_compressed);
1283                 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint);
1284                 ledger_panic_on_negative(t, task_ledgers.neural_footprint);
1285                 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint_compressed);
1286                 ledger_panic_on_negative(t, task_ledgers.neural_footprint_compressed);
1287         }
1288 #endif /* MACH_ASSERT */
1289
1290 #if CONFIG_MEMORYSTATUS
1291         ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
1292 #endif /* CONFIG_MEMORYSTATUS */
1293
1294         ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1295             task_wakeups_rate_exceeded, NULL, NULL);
1296         ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1297
1298 #if XNU_MONITOR
1299         ledger_template_complete_secure_alloc(t);
1300 #else /* XNU_MONITOR */
1301         ledger_template_complete(t);
1302 #endif /* XNU_MONITOR */
1303         task_ledger_template = t;
1304 }
1305
1306 os_refgrp_decl(static, task_refgrp, "task", NULL);
1307
1308 kern_return_t
1309 task_create_internal(
1310         task_t          parent_task,
1311         coalition_t     *parent_coalitions __unused,
1312         boolean_t       inherit_memory,
1313         __unused boolean_t      is_64bit,
1314         boolean_t is_64bit_data,
1315         uint32_t        t_flags,
1316         uint32_t        t_procflags,
1317         uint8_t         t_returnwaitflags,
1318         task_t          *child_task)            /* OUT */
1319 {
1320         task_t                  new_task;
1321         vm_shared_region_t      shared_region;
1322         ledger_t                ledger = NULL;
1323
1324         new_task = (task_t) zalloc(task_zone);
1325
1326         if (new_task == TASK_NULL) {
1327                 return KERN_RESOURCE_SHORTAGE;
1328         }
1329
1330         /* one ref for just being alive; one for our caller */
1331         os_ref_init_count(&new_task->ref_count, &task_refgrp, 2);
1332
1333         /* allocate with active entries */
1334         assert(task_ledger_template != NULL);
1335         if ((ledger = ledger_instantiate(task_ledger_template,
1336             LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
1337                 zfree(task_zone, new_task);
1338                 return KERN_RESOURCE_SHORTAGE;
1339         }
1340
1341 #if defined(HAS_APPLE_PAC)
1342         ml_task_set_rop_pid(new_task, parent_task, inherit_memory);
1343         ml_task_set_disable_user_jop(new_task, inherit_memory ? parent_task->disable_user_jop : FALSE);
1344 #endif
1345
1346         new_task->ledger = ledger;
1347
1348 #if defined(CONFIG_SCHED_MULTIQ)
1349         new_task->sched_group = sched_group_create();
1350 #endif
1351
1352         /* if inherit_memory is true, parent_task MUST not be NULL */
1353         if (!(t_flags & TF_CORPSE_FORK) && inherit_memory) {
1354                 new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1355         } else {
1356                 unsigned int pmap_flags = is_64bit ? PMAP_CREATE_64BIT : 0;
1357                 new_task->map = vm_map_create(pmap_create_options(ledger, 0, pmap_flags),
1358                     (vm_map_offset_t)(VM_MIN_ADDRESS),
1359                     (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
1360         }
1361
1362         /* Inherit memlock limit from parent */
1363         if (parent_task) {
1364                 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
1365         }
1366
1367         lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1368         queue_init(&new_task->threads);
1369         new_task->suspend_count = 0;
1370         new_task->thread_count = 0;
1371         new_task->active_thread_count = 0;
1372         new_task->user_stop_count = 0;
1373         new_task->legacy_stop_count = 0;
1374         new_task->active = TRUE;
1375         new_task->halting = FALSE;
1376         new_task->priv_flags = 0;
1377         new_task->t_flags = t_flags;
1378         new_task->t_procflags = t_procflags;
1379         new_task->t_returnwaitflags = t_returnwaitflags;
1380         new_task->returnwait_inheritor = current_thread();
1381         new_task->importance = 0;
1382         new_task->crashed_thread_id = 0;
1383         new_task->exec_token = 0;
1384         new_task->watchports = NULL;
1385         new_task->restartable_ranges = NULL;
1386         new_task->task_exc_guard = 0;
1387
1388 #if CONFIG_ATM
1389         new_task->atm_context = NULL;
1390 #endif
1391         new_task->bank_context = NULL;
1392
1393 #ifdef MACH_BSD
1394         new_task->bsd_info = NULL;
1395         new_task->corpse_info = NULL;
1396 #endif /* MACH_BSD */
1397
1398 #if CONFIG_MACF
1399         new_task->crash_label = NULL;
1400 #endif
1401
1402 #if CONFIG_MEMORYSTATUS
1403         if (max_task_footprint != 0) {
1404                 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1405         }
1406 #endif /* CONFIG_MEMORYSTATUS */
1407
1408         if (task_wakeups_monitor_rate != 0) {
1409                 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1410                 int32_t  rate; // Ignored because of WAKEMON_SET_DEFAULTS
1411                 task_wakeups_monitor_ctl(new_task, &flags, &rate);
1412         }
1413
1414 #if CONFIG_IO_ACCOUNTING
1415         uint32_t flags = IOMON_ENABLE;
1416         task_io_monitor_ctl(new_task, &flags);
1417 #endif /* CONFIG_IO_ACCOUNTING */
1418
1419         machine_task_init(new_task, parent_task, inherit_memory);
1420
1421         new_task->task_debug = NULL;
1422
1423 #if DEVELOPMENT || DEBUG
1424         new_task->task_unnested = FALSE;
1425         new_task->task_disconnected_count = 0;
1426 #endif
1427         queue_init(&new_task->semaphore_list);
1428         new_task->semaphores_owned = 0;
1429
1430         ipc_task_init(new_task, parent_task);
1431
1432         new_task->vtimers = 0;
1433
1434         new_task->shared_region = NULL;
1435
1436         new_task->affinity_space = NULL;
1437
1438         new_task->t_kpc = 0;
1439
1440         new_task->pidsuspended = FALSE;
1441         new_task->frozen = FALSE;
1442         new_task->changing_freeze_state = FALSE;
1443         new_task->rusage_cpu_flags = 0;
1444         new_task->rusage_cpu_percentage = 0;
1445         new_task->rusage_cpu_interval = 0;
1446         new_task->rusage_cpu_deadline = 0;
1447         new_task->rusage_cpu_callt = NULL;
1448 #if MACH_ASSERT
1449         new_task->suspends_outstanding = 0;
1450 #endif
1451
1452 #if HYPERVISOR
1453         new_task->hv_task_target = NULL;
1454 #endif /* HYPERVISOR */
1455
1456 #if CONFIG_EMBEDDED
1457         queue_init(&new_task->task_watchers);
1458         new_task->num_taskwatchers  = 0;
1459         new_task->watchapplying  = 0;
1460 #endif /* CONFIG_EMBEDDED */
1461
1462         new_task->mem_notify_reserved = 0;
1463         new_task->memlimit_attrs_reserved = 0;
1464
1465         new_task->requested_policy = default_task_requested_policy;
1466         new_task->effective_policy = default_task_effective_policy;
1467
1468         task_importance_init_from_parent(new_task, parent_task);
1469
1470         if (parent_task != TASK_NULL) {
1471                 new_task->sec_token = parent_task->sec_token;
1472                 new_task->audit_token = parent_task->audit_token;
1473
1474                 /* inherit the parent's shared region */
1475                 shared_region = vm_shared_region_get(parent_task);
1476                 vm_shared_region_set(new_task, shared_region);
1477
1478                 if (task_has_64Bit_addr(parent_task)) {
1479                         task_set_64Bit_addr(new_task);
1480                 }
1481
1482                 if (task_has_64Bit_data(parent_task)) {
1483                         task_set_64Bit_data(new_task);
1484                 }
1485
1486                 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1487                 new_task->all_image_info_size = parent_task->all_image_info_size;
1488                 new_task->mach_header_vm_address = 0;
1489
1490                 if (inherit_memory && parent_task->affinity_space) {
1491                         task_affinity_create(parent_task, new_task);
1492                 }
1493
1494                 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1495
1496 #if DEBUG || DEVELOPMENT
1497                 if (parent_task->t_flags & TF_NO_SMT) {
1498                         new_task->t_flags |= TF_NO_SMT;
1499                 }
1500 #endif
1501
1502                 new_task->priority = BASEPRI_DEFAULT;
1503                 new_task->max_priority = MAXPRI_USER;
1504
1505                 task_policy_create(new_task, parent_task);
1506         } else {
1507                 new_task->sec_token = KERNEL_SECURITY_TOKEN;
1508                 new_task->audit_token = KERNEL_AUDIT_TOKEN;
1509 #ifdef __LP64__
1510                 if (is_64bit) {
1511                         task_set_64Bit_addr(new_task);
1512                 }
1513 #endif
1514
1515                 if (is_64bit_data) {
1516                         task_set_64Bit_data(new_task);
1517                 }
1518
1519                 new_task->all_image_info_addr = (mach_vm_address_t)0;
1520                 new_task->all_image_info_size = (mach_vm_size_t)0;
1521
1522                 new_task->pset_hint = PROCESSOR_SET_NULL;
1523
1524                 if (kernel_task == TASK_NULL) {
1525                         new_task->priority = BASEPRI_KERNEL;
1526                         new_task->max_priority = MAXPRI_KERNEL;
1527                 } else {
1528                         new_task->priority = BASEPRI_DEFAULT;
1529                         new_task->max_priority = MAXPRI_USER;
1530                 }
1531         }
1532
1533         bzero(new_task->coalition, sizeof(new_task->coalition));
1534         for (int i = 0; i < COALITION_NUM_TYPES; i++) {
1535                 queue_chain_init(new_task->task_coalition[i]);
1536         }
1537
1538         /* Allocate I/O Statistics */
1539         new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1540         assert(new_task->task_io_stats != NULL);
1541         bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1542
1543         bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1544         bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
1545
1546         bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1547
1548         /* Copy resource acc. info from Parent for Corpe Forked task. */
1549         if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1550                 task_rollup_accounting_info(new_task, parent_task);
1551         } else {
1552                 /* Initialize to zero for standard fork/spawn case */
1553                 new_task->total_user_time = 0;
1554                 new_task->total_system_time = 0;
1555                 new_task->total_ptime = 0;
1556                 new_task->total_runnable_time = 0;
1557                 new_task->faults = 0;
1558                 new_task->pageins = 0;
1559                 new_task->cow_faults = 0;
1560                 new_task->messages_sent = 0;
1561                 new_task->messages_received = 0;
1562                 new_task->syscalls_mach = 0;
1563                 new_task->syscalls_unix = 0;
1564                 new_task->c_switch = 0;
1565                 new_task->p_switch = 0;
1566                 new_task->ps_switch = 0;
1567                 new_task->decompressions = 0;
1568                 new_task->low_mem_notified_warn = 0;
1569                 new_task->low_mem_notified_critical = 0;
1570                 new_task->purged_memory_warn = 0;
1571                 new_task->purged_memory_critical = 0;
1572                 new_task->low_mem_privileged_listener = 0;
1573                 new_task->memlimit_is_active = 0;
1574                 new_task->memlimit_is_fatal = 0;
1575                 new_task->memlimit_active_exc_resource = 0;
1576                 new_task->memlimit_inactive_exc_resource = 0;
1577                 new_task->task_timer_wakeups_bin_1 = 0;
1578                 new_task->task_timer_wakeups_bin_2 = 0;
1579                 new_task->task_gpu_ns = 0;
1580                 new_task->task_writes_counters_internal.task_immediate_writes = 0;
1581                 new_task->task_writes_counters_internal.task_deferred_writes = 0;
1582                 new_task->task_writes_counters_internal.task_invalidated_writes = 0;
1583                 new_task->task_writes_counters_internal.task_metadata_writes = 0;
1584                 new_task->task_writes_counters_external.task_immediate_writes = 0;
1585                 new_task->task_writes_counters_external.task_deferred_writes = 0;
1586                 new_task->task_writes_counters_external.task_invalidated_writes = 0;
1587                 new_task->task_writes_counters_external.task_metadata_writes = 0;
1588
1589                 new_task->task_energy = 0;
1590 #if MONOTONIC
1591                 memset(&new_task->task_monotonic, 0, sizeof(new_task->task_monotonic));
1592 #endif /* MONOTONIC */
1593         }
1594
1595
1596 #if CONFIG_COALITIONS
1597         if (!(t_flags & TF_CORPSE_FORK)) {
1598                 /* TODO: there is no graceful failure path here... */
1599                 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1600                         coalitions_adopt_task(parent_coalitions, new_task);
1601                 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1602                         /*
1603                          * all tasks at least have a resource coalition, so
1604                          * if the parent has one then inherit all coalitions
1605                          * the parent is a part of
1606                          */
1607                         coalitions_adopt_task(parent_task->coalition, new_task);
1608                 } else {
1609                         /* TODO: assert that new_task will be PID 1 (launchd) */
1610                         coalitions_adopt_init_task(new_task);
1611                 }
1612                 /*
1613                  * on exec, we need to transfer the coalition roles from the
1614                  * parent task to the exec copy task.
1615                  */
1616                 if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1617                         int coal_roles[COALITION_NUM_TYPES];
1618                         task_coalition_roles(parent_task, coal_roles);
1619                         (void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1620                 }
1621         } else {
1622                 coalitions_adopt_corpse_task(new_task);
1623         }
1624
1625         if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1626                 panic("created task is not a member of a resource coalition");
1627         }
1628 #endif /* CONFIG_COALITIONS */
1629
1630         new_task->dispatchqueue_offset = 0;
1631         if (parent_task != NULL) {
1632                 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1633         }
1634
1635         new_task->task_can_transfer_memory_ownership = FALSE;
1636         new_task->task_volatile_objects = 0;
1637         new_task->task_nonvolatile_objects = 0;
1638         new_task->task_objects_disowning = FALSE;
1639         new_task->task_objects_disowned = FALSE;
1640         new_task->task_owned_objects = 0;
1641         queue_init(&new_task->task_objq);
1642         task_objq_lock_init(new_task);
1643
1644 #if __arm64__
1645         new_task->task_legacy_footprint = FALSE;
1646         new_task->task_extra_footprint_limit = FALSE;
1647         new_task->task_ios13extended_footprint_limit = FALSE;
1648 #endif /* __arm64__ */
1649         new_task->task_region_footprint = FALSE;
1650         new_task->task_has_crossed_thread_limit = FALSE;
1651         new_task->task_thread_limit = 0;
1652 #if CONFIG_SECLUDED_MEMORY
1653         new_task->task_can_use_secluded_mem = FALSE;
1654         new_task->task_could_use_secluded_mem = FALSE;
1655         new_task->task_could_also_use_secluded_mem = FALSE;
1656         new_task->task_suppressed_secluded = FALSE;
1657 #endif /* CONFIG_SECLUDED_MEMORY */
1658
1659         /*
1660          * t_flags is set up above. But since we don't
1661          * support darkwake mode being set that way
1662          * currently, we clear it out here explicitly.
1663          */
1664         new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1665
1666         queue_init(&new_task->io_user_clients);
1667         new_task->loadTag = 0;
1668
1669         ipc_task_enable(new_task);
1670
1671         lck_mtx_lock(&tasks_threads_lock);
1672         queue_enter(&tasks, new_task, task_t, tasks);
1673         tasks_count++;
1674         if (tasks_suspend_state) {
1675                 task_suspend_internal(new_task);
1676         }
1677         lck_mtx_unlock(&tasks_threads_lock);
1678
1679         *child_task = new_task;
1680         return KERN_SUCCESS;
1681 }
1682
1683 /*
1684  *      task_rollup_accounting_info
1685  *
1686  *      Roll up accounting stats. Used to rollup stats
1687  *      for exec copy task and corpse fork.
1688  */
1689 void
1690 task_rollup_accounting_info(task_t to_task, task_t from_task)
1691 {
1692         assert(from_task != to_task);
1693
1694         to_task->total_user_time = from_task->total_user_time;
1695         to_task->total_system_time = from_task->total_system_time;
1696         to_task->total_ptime = from_task->total_ptime;
1697         to_task->total_runnable_time = from_task->total_runnable_time;
1698         to_task->faults = from_task->faults;
1699         to_task->pageins = from_task->pageins;
1700         to_task->cow_faults = from_task->cow_faults;
1701         to_task->decompressions = from_task->decompressions;
1702         to_task->messages_sent = from_task->messages_sent;
1703         to_task->messages_received = from_task->messages_received;
1704         to_task->syscalls_mach = from_task->syscalls_mach;
1705         to_task->syscalls_unix = from_task->syscalls_unix;
1706         to_task->c_switch = from_task->c_switch;
1707         to_task->p_switch = from_task->p_switch;
1708         to_task->ps_switch = from_task->ps_switch;
1709         to_task->extmod_statistics = from_task->extmod_statistics;
1710         to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1711         to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1712         to_task->purged_memory_warn = from_task->purged_memory_warn;
1713         to_task->purged_memory_critical = from_task->purged_memory_critical;
1714         to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1715         *to_task->task_io_stats = *from_task->task_io_stats;
1716         to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1717         to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1718         to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1719         to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1720         to_task->task_gpu_ns = from_task->task_gpu_ns;
1721         to_task->task_writes_counters_internal.task_immediate_writes = from_task->task_writes_counters_internal.task_immediate_writes;
1722         to_task->task_writes_counters_internal.task_deferred_writes = from_task->task_writes_counters_internal.task_deferred_writes;
1723         to_task->task_writes_counters_internal.task_invalidated_writes = from_task->task_writes_counters_internal.task_invalidated_writes;
1724         to_task->task_writes_counters_internal.task_metadata_writes = from_task->task_writes_counters_internal.task_metadata_writes;
1725         to_task->task_writes_counters_external.task_immediate_writes = from_task->task_writes_counters_external.task_immediate_writes;
1726         to_task->task_writes_counters_external.task_deferred_writes = from_task->task_writes_counters_external.task_deferred_writes;
1727         to_task->task_writes_counters_external.task_invalidated_writes = from_task->task_writes_counters_external.task_invalidated_writes;
1728         to_task->task_writes_counters_external.task_metadata_writes = from_task->task_writes_counters_external.task_metadata_writes;
1729         to_task->task_energy = from_task->task_energy;
1730
1731         /* Skip ledger roll up for memory accounting entries */
1732         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1733         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1734         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1735 #if CONFIG_SCHED_SFI
1736         for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1737                 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1738         }
1739 #endif
1740         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1741         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1742         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1743         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1744         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1745         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
1746 }
1747
1748 int task_dropped_imp_count = 0;
1749
1750 /*
1751  *      task_deallocate:
1752  *
1753  *      Drop a reference on a task.
1754  */
1755 void
1756 task_deallocate(
1757         task_t          task)
1758 {
1759         ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1760         os_ref_count_t refs;
1761
1762         if (task == TASK_NULL) {
1763                 return;
1764         }
1765
1766         refs = task_deallocate_internal(task);
1767
1768 #if IMPORTANCE_INHERITANCE
1769         if (refs == 1) {
1770                 /*
1771                  * If last ref potentially comes from the task's importance,
1772                  * disconnect it.  But more task refs may be added before
1773                  * that completes, so wait for the reference to go to zero
1774                  * naturally (it may happen on a recursive task_deallocate()
1775                  * from the ipc_importance_disconnect_task() call).
1776                  */
1777                 if (IIT_NULL != task->task_imp_base) {
1778                         ipc_importance_disconnect_task(task);
1779                 }
1780                 return;
1781         }
1782 #endif /* IMPORTANCE_INHERITANCE */
1783
1784         if (refs > 0) {
1785                 return;
1786         }
1787
1788         /*
1789          * The task should be dead at this point. Ensure other resources
1790          * like threads, are gone before we trash the world.
1791          */
1792         assert(queue_empty(&task->threads));
1793         assert(task->bsd_info == NULL);
1794         assert(!is_active(task->itk_space));
1795         assert(!task->active);
1796         assert(task->active_thread_count == 0);
1797
1798         lck_mtx_lock(&tasks_threads_lock);
1799         assert(terminated_tasks_count > 0);
1800         queue_remove(&terminated_tasks, task, task_t, tasks);
1801         terminated_tasks_count--;
1802         lck_mtx_unlock(&tasks_threads_lock);
1803
1804         /*
1805          * remove the reference on atm descriptor
1806          */
1807         task_atm_reset(task);
1808
1809         /*
1810          * remove the reference on bank context
1811          */
1812         task_bank_reset(task);
1813
1814         if (task->task_io_stats) {
1815                 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1816         }
1817
1818         /*
1819          *      Give the machine dependent code a chance
1820          *      to perform cleanup before ripping apart
1821          *      the task.
1822          */
1823         machine_task_terminate(task);
1824
1825         ipc_task_terminate(task);
1826
1827         /* let iokit know */
1828         iokit_task_terminate(task);
1829
1830         if (task->affinity_space) {
1831                 task_affinity_deallocate(task);
1832         }
1833
1834 #if MACH_ASSERT
1835         if (task->ledger != NULL &&
1836             task->map != NULL &&
1837             task->map->pmap != NULL &&
1838             task->map->pmap->ledger != NULL) {
1839                 assert(task->ledger == task->map->pmap->ledger);
1840         }
1841 #endif /* MACH_ASSERT */
1842
1843         vm_owned_objects_disown(task);
1844         assert(task->task_objects_disowned);
1845         if (task->task_volatile_objects != 0 ||
1846             task->task_nonvolatile_objects != 0 ||
1847             task->task_owned_objects != 0) {
1848                 panic("task_deallocate(%p): "
1849                     "volatile_objects=%d nonvolatile_objects=%d owned=%d\n",
1850                     task,
1851                     task->task_volatile_objects,
1852                     task->task_nonvolatile_objects,
1853                     task->task_owned_objects);
1854         }
1855
1856         vm_map_deallocate(task->map);
1857         is_release(task->itk_space);
1858         if (task->restartable_ranges) {
1859                 restartable_ranges_release(task->restartable_ranges);
1860         }
1861
1862         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1863             &interrupt_wakeups, &debit);
1864         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1865             &platform_idle_wakeups, &debit);
1866
1867 #if defined(CONFIG_SCHED_MULTIQ)
1868         sched_group_destroy(task->sched_group);
1869 #endif
1870
1871         /* Accumulate statistics for dead tasks */
1872         lck_spin_lock(&dead_task_statistics_lock);
1873         dead_task_statistics.total_user_time += task->total_user_time;
1874         dead_task_statistics.total_system_time += task->total_system_time;
1875
1876         dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1877         dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1878
1879         dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1880         dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1881         dead_task_statistics.total_ptime += task->total_ptime;
1882         dead_task_statistics.total_pset_switches += task->ps_switch;
1883         dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
1884         dead_task_statistics.task_energy += task->task_energy;
1885
1886         lck_spin_unlock(&dead_task_statistics_lock);
1887         lck_mtx_destroy(&task->lock, &task_lck_grp);
1888
1889         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1890             &debit)) {
1891                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1892                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1893         }
1894         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1895             &debit)) {
1896                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1897                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1898         }
1899         ledger_dereference(task->ledger);
1900
1901 #if TASK_REFERENCE_LEAK_DEBUG
1902         btlog_remove_entries_for_element(task_ref_btlog, task);
1903 #endif
1904
1905 #if CONFIG_COALITIONS
1906         task_release_coalitions(task);
1907 #endif /* CONFIG_COALITIONS */
1908
1909         bzero(task->coalition, sizeof(task->coalition));
1910
1911 #if MACH_BSD
1912         /* clean up collected information since last reference to task is gone */
1913         if (task->corpse_info) {
1914                 void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
1915                 task_crashinfo_destroy(task->corpse_info);
1916                 task->corpse_info = NULL;
1917                 if (corpse_info_kernel) {
1918                         kfree(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
1919                 }
1920         }
1921 #endif
1922
1923 #if CONFIG_MACF
1924         if (task->crash_label) {
1925                 mac_exc_free_label(task->crash_label);
1926                 task->crash_label = NULL;
1927         }
1928 #endif
1929
1930         assert(queue_empty(&task->task_objq));
1931
1932         zfree(task_zone, task);
1933 }
1934
1935 /*
1936  *      task_name_deallocate:
1937  *
1938  *      Drop a reference on a task name.
1939  */
1940 void
1941 task_name_deallocate(
1942         task_name_t             task_name)
1943 {
1944         return task_deallocate((task_t)task_name);
1945 }
1946
1947 /*
1948  *      task_inspect_deallocate:
1949  *
1950  *      Drop a task inspection reference.
1951  */
1952 void
1953 task_inspect_deallocate(
1954         task_inspect_t          task_inspect)
1955 {
1956         return task_deallocate((task_t)task_inspect);
1957 }
1958
1959 /*
1960  *      task_suspension_token_deallocate:
1961  *
1962  *      Drop a reference on a task suspension token.
1963  */
1964 void
1965 task_suspension_token_deallocate(
1966         task_suspension_token_t         token)
1967 {
1968         return task_deallocate((task_t)token);
1969 }
1970
1971
1972 /*
1973  * task_collect_crash_info:
1974  *
1975  * collect crash info from bsd and mach based data
1976  */
1977 kern_return_t
1978 task_collect_crash_info(
1979         task_t task,
1980 #ifdef CONFIG_MACF
1981         struct label *crash_label,
1982 #endif
1983         int is_corpse_fork)
1984 {
1985         kern_return_t kr = KERN_SUCCESS;
1986
1987         kcdata_descriptor_t crash_data = NULL;
1988         kcdata_descriptor_t crash_data_release = NULL;
1989         mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1990         mach_vm_offset_t crash_data_ptr = 0;
1991         void *crash_data_kernel = NULL;
1992         void *crash_data_kernel_release = NULL;
1993 #if CONFIG_MACF
1994         struct label *label, *free_label;
1995 #endif
1996
1997         if (!corpses_enabled()) {
1998                 return KERN_NOT_SUPPORTED;
1999         }
2000
2001 #if CONFIG_MACF
2002         free_label = label = mac_exc_create_label();
2003 #endif
2004
2005         task_lock(task);
2006
2007         assert(is_corpse_fork || task->bsd_info != NULL);
2008         if (task->corpse_info == NULL && (is_corpse_fork || task->bsd_info != NULL)) {
2009 #if CONFIG_MACF
2010                 /* Set the crash label, used by the exception delivery mac hook */
2011                 free_label = task->crash_label; // Most likely NULL.
2012                 task->crash_label = label;
2013                 mac_exc_update_task_crash_label(task, crash_label);
2014 #endif
2015                 task_unlock(task);
2016
2017                 crash_data_kernel = (void *) kalloc(CORPSEINFO_ALLOCATION_SIZE);
2018                 if (crash_data_kernel == NULL) {
2019                         kr = KERN_RESOURCE_SHORTAGE;
2020                         goto out_no_lock;
2021                 }
2022                 bzero(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
2023                 crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
2024
2025                 /* Do not get a corpse ref for corpse fork */
2026                 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
2027                     is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
2028                     KCFLAG_USE_MEMCOPY);
2029                 if (crash_data) {
2030                         task_lock(task);
2031                         crash_data_release = task->corpse_info;
2032                         crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
2033                         task->corpse_info = crash_data;
2034
2035                         task_unlock(task);
2036                         kr = KERN_SUCCESS;
2037                 } else {
2038                         kfree(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
2039                         kr = KERN_FAILURE;
2040                 }
2041
2042                 if (crash_data_release != NULL) {
2043                         task_crashinfo_destroy(crash_data_release);
2044                 }
2045                 if (crash_data_kernel_release != NULL) {
2046                         kfree(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2047                 }
2048         } else {
2049                 task_unlock(task);
2050         }
2051
2052 out_no_lock:
2053 #if CONFIG_MACF
2054         if (free_label != NULL) {
2055                 mac_exc_free_label(free_label);
2056         }
2057 #endif
2058         return kr;
2059 }
2060
2061 /*
2062  * task_deliver_crash_notification:
2063  *
2064  * Makes outcall to registered host port for a corpse.
2065  */
2066 kern_return_t
2067 task_deliver_crash_notification(
2068         task_t task,
2069         thread_t thread,
2070         exception_type_t etype,
2071         mach_exception_subcode_t subcode)
2072 {
2073         kcdata_descriptor_t crash_info = task->corpse_info;
2074         thread_t th_iter = NULL;
2075         kern_return_t kr = KERN_SUCCESS;
2076         wait_interrupt_t wsave;
2077         mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2078         ipc_port_t task_port, old_notify;
2079
2080         if (crash_info == NULL) {
2081                 return KERN_FAILURE;
2082         }
2083
2084         task_lock(task);
2085         if (task_is_a_corpse_fork(task)) {
2086                 /* Populate code with EXC_{RESOURCE,GUARD} for corpse fork */
2087                 code[0] = etype;
2088                 code[1] = subcode;
2089         } else {
2090                 /* Populate code with EXC_CRASH for corpses */
2091                 code[0] = EXC_CRASH;
2092                 code[1] = 0;
2093                 /* Update the code[1] if the boot-arg corpse_for_fatal_memkill is set */
2094                 if (corpse_for_fatal_memkill) {
2095                         code[1] = subcode;
2096                 }
2097         }
2098
2099         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2100         {
2101                 if (th_iter->corpse_dup == FALSE) {
2102                         ipc_thread_reset(th_iter);
2103                 }
2104         }
2105         task_unlock(task);
2106
2107         /* Arm the no-sender notification for taskport */
2108         task_reference(task);
2109         task_port = convert_task_to_port(task);
2110         ip_lock(task_port);
2111         require_ip_active(task_port);
2112         ipc_port_nsrequest(task_port, task_port->ip_mscount, ipc_port_make_sonce_locked(task_port), &old_notify);
2113         /* port unlocked */
2114         assert(IP_NULL == old_notify);
2115
2116         wsave = thread_interrupt_level(THREAD_UNINT);
2117         kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
2118         if (kr != KERN_SUCCESS) {
2119                 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
2120         }
2121
2122         (void)thread_interrupt_level(wsave);
2123
2124         /*
2125          * Drop the send right on task port, will fire the
2126          * no-sender notification if exception deliver failed.
2127          */
2128         ipc_port_release_send(task_port);
2129         return kr;
2130 }
2131
2132 /*
2133  *      task_terminate:
2134  *
2135  *      Terminate the specified task.  See comments on thread_terminate
2136  *      (kern/thread.c) about problems with terminating the "current task."
2137  */
2138
2139 kern_return_t
2140 task_terminate(
2141         task_t          task)
2142 {
2143         if (task == TASK_NULL) {
2144                 return KERN_INVALID_ARGUMENT;
2145         }
2146
2147         if (task->bsd_info) {
2148                 return KERN_FAILURE;
2149         }
2150
2151         return task_terminate_internal(task);
2152 }
2153
2154 #if MACH_ASSERT
2155 extern int proc_pid(struct proc *);
2156 extern void proc_name_kdp(task_t t, char *buf, int size);
2157 #endif /* MACH_ASSERT */
2158
2159 #define VM_MAP_PARTIAL_REAP 0x54  /* 0x150 */
2160 static void
2161 __unused task_partial_reap(task_t task, __unused int pid)
2162 {
2163         unsigned int    reclaimed_resident = 0;
2164         unsigned int    reclaimed_compressed = 0;
2165         uint64_t        task_page_count;
2166
2167         task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
2168
2169         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
2170             pid, task_page_count, 0, 0, 0);
2171
2172         vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
2173
2174         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
2175             pid, reclaimed_resident, reclaimed_compressed, 0, 0);
2176 }
2177
2178 kern_return_t
2179 task_mark_corpse(task_t task)
2180 {
2181         kern_return_t kr = KERN_SUCCESS;
2182         thread_t self_thread;
2183         (void) self_thread;
2184         wait_interrupt_t wsave;
2185 #if CONFIG_MACF
2186         struct label *crash_label = NULL;
2187 #endif
2188
2189         assert(task != kernel_task);
2190         assert(task == current_task());
2191         assert(!task_is_a_corpse(task));
2192
2193 #if CONFIG_MACF
2194         crash_label = mac_exc_create_label_for_proc((struct proc*)task->bsd_info);
2195 #endif
2196
2197         kr = task_collect_crash_info(task,
2198 #if CONFIG_MACF
2199             crash_label,
2200 #endif
2201             FALSE);
2202         if (kr != KERN_SUCCESS) {
2203                 goto out;
2204         }
2205
2206         self_thread = current_thread();
2207
2208         wsave = thread_interrupt_level(THREAD_UNINT);
2209         task_lock(task);
2210
2211         task_set_corpse_pending_report(task);
2212         task_set_corpse(task);
2213         task->crashed_thread_id = thread_tid(self_thread);
2214
2215         kr = task_start_halt_locked(task, TRUE);
2216         assert(kr == KERN_SUCCESS);
2217
2218         ipc_task_reset(task);
2219         /* Remove the naked send right for task port, needed to arm no sender notification */
2220         task_set_special_port_internal(task, TASK_KERNEL_PORT, IPC_PORT_NULL);
2221         ipc_task_enable(task);
2222
2223         task_unlock(task);
2224         /* terminate the ipc space */
2225         ipc_space_terminate(task->itk_space);
2226
2227         /* Add it to global corpse task list */
2228         task_add_to_corpse_task_list(task);
2229
2230         task_start_halt(task);
2231         thread_terminate_internal(self_thread);
2232
2233         (void) thread_interrupt_level(wsave);
2234         assert(task->halting == TRUE);
2235
2236 out:
2237 #if CONFIG_MACF
2238         mac_exc_free_label(crash_label);
2239 #endif
2240         return kr;
2241 }
2242
2243 /*
2244  *      task_clear_corpse
2245  *
2246  *      Clears the corpse pending bit on task.
2247  *      Removes inspection bit on the threads.
2248  */
2249 void
2250 task_clear_corpse(task_t task)
2251 {
2252         thread_t th_iter = NULL;
2253
2254         task_lock(task);
2255         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2256         {
2257                 thread_mtx_lock(th_iter);
2258                 th_iter->inspection = FALSE;
2259                 thread_mtx_unlock(th_iter);
2260         }
2261
2262         thread_terminate_crashed_threads();
2263         /* remove the pending corpse report flag */
2264         task_clear_corpse_pending_report(task);
2265
2266         task_unlock(task);
2267 }
2268
2269 /*
2270  *      task_port_notify
2271  *
2272  *      Called whenever the Mach port system detects no-senders on
2273  *      the task port of a corpse.
2274  *      Each notification that comes in should terminate the task (corpse).
2275  */
2276 void
2277 task_port_notify(mach_msg_header_t *msg)
2278 {
2279         mach_no_senders_notification_t *notification = (void *)msg;
2280         ipc_port_t port = notification->not_header.msgh_remote_port;
2281         task_t task;
2282
2283         require_ip_active(port);
2284         assert(IKOT_TASK == ip_kotype(port));
2285         task = (task_t) port->ip_kobject;
2286
2287         assert(task_is_a_corpse(task));
2288
2289         /* Remove the task from global corpse task list */
2290         task_remove_from_corpse_task_list(task);
2291
2292         task_clear_corpse(task);
2293         task_terminate_internal(task);
2294 }
2295
2296 /*
2297  *      task_wait_till_threads_terminate_locked
2298  *
2299  *      Wait till all the threads in the task are terminated.
2300  *      Might release the task lock and re-acquire it.
2301  */
2302 void
2303 task_wait_till_threads_terminate_locked(task_t task)
2304 {
2305         /* wait for all the threads in the task to terminate */
2306         while (task->active_thread_count != 0) {
2307                 assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
2308                 task_unlock(task);
2309                 thread_block(THREAD_CONTINUE_NULL);
2310
2311                 task_lock(task);
2312         }
2313 }
2314
2315 /*
2316  *      task_duplicate_map_and_threads
2317  *
2318  *      Copy vmmap of source task.
2319  *      Copy active threads from source task to destination task.
2320  *      Source task would be suspended during the copy.
2321  */
2322 kern_return_t
2323 task_duplicate_map_and_threads(
2324         task_t task,
2325         void *p,
2326         task_t new_task,
2327         thread_t *thread_ret,
2328         uint64_t **udata_buffer,
2329         int *size,
2330         int *num_udata)
2331 {
2332         kern_return_t kr = KERN_SUCCESS;
2333         int active;
2334         thread_t thread, self, thread_return = THREAD_NULL;
2335         thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
2336         thread_t *thread_array;
2337         uint32_t active_thread_count = 0, array_count = 0, i;
2338         vm_map_t oldmap;
2339         uint64_t *buffer = NULL;
2340         int buf_size = 0;
2341         int est_knotes = 0, num_knotes = 0;
2342
2343         self = current_thread();
2344
2345         /*
2346          * Suspend the task to copy thread state, use the internal
2347          * variant so that no user-space process can resume
2348          * the task from under us
2349          */
2350         kr = task_suspend_internal(task);
2351         if (kr != KERN_SUCCESS) {
2352                 return kr;
2353         }
2354
2355         if (task->map->disable_vmentry_reuse == TRUE) {
2356                 /*
2357                  * Quite likely GuardMalloc (or some debugging tool)
2358                  * is being used on this task. And it has gone through
2359                  * its limit. Making a corpse will likely encounter
2360                  * a lot of VM entries that will need COW.
2361                  *
2362                  * Skip it.
2363                  */
2364 #if DEVELOPMENT || DEBUG
2365                 memorystatus_abort_vm_map_fork(task);
2366 #endif
2367                 task_resume_internal(task);
2368                 return KERN_FAILURE;
2369         }
2370
2371         /* Check with VM if vm_map_fork is allowed for this task */
2372         if (memorystatus_allowed_vm_map_fork(task)) {
2373                 /* Setup new task's vmmap, switch from parent task's map to it COW map */
2374                 oldmap = new_task->map;
2375                 new_task->map = vm_map_fork(new_task->ledger,
2376                     task->map,
2377                     (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
2378                     VM_MAP_FORK_PRESERVE_PURGEABLE |
2379                     VM_MAP_FORK_CORPSE_FOOTPRINT));
2380                 vm_map_deallocate(oldmap);
2381
2382                 /* copy ledgers that impact the memory footprint */
2383                 vm_map_copy_footprint_ledgers(task, new_task);
2384
2385                 /* Get all the udata pointers from kqueue */
2386                 est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
2387                 if (est_knotes > 0) {
2388                         buf_size = (est_knotes + 32) * sizeof(uint64_t);
2389                         buffer = (uint64_t *) kalloc(buf_size);
2390                         num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2391                         if (num_knotes > est_knotes + 32) {
2392                                 num_knotes = est_knotes + 32;
2393                         }
2394                 }
2395         }
2396
2397         active_thread_count = task->active_thread_count;
2398         if (active_thread_count == 0) {
2399                 if (buffer != NULL) {
2400                         kfree(buffer, buf_size);
2401                 }
2402                 task_resume_internal(task);
2403                 return KERN_FAILURE;
2404         }
2405
2406         thread_array = (thread_t *) kalloc(sizeof(thread_t) * active_thread_count);
2407
2408         /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2409         task_lock(task);
2410         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2411                 /* Skip inactive threads */
2412                 active = thread->active;
2413                 if (!active) {
2414                         continue;
2415                 }
2416
2417                 if (array_count >= active_thread_count) {
2418                         break;
2419                 }
2420
2421                 thread_array[array_count++] = thread;
2422                 thread_reference(thread);
2423         }
2424         task_unlock(task);
2425
2426         for (i = 0; i < array_count; i++) {
2427                 kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2428                 if (kr != KERN_SUCCESS) {
2429                         break;
2430                 }
2431
2432                 /* Equivalent of current thread in corpse */
2433                 if (thread_array[i] == self) {
2434                         thread_return = new_thread;
2435                         new_task->crashed_thread_id = thread_tid(new_thread);
2436                 } else if (first_thread == NULL) {
2437                         first_thread = new_thread;
2438                 } else {
2439                         /* drop the extra ref returned by thread_create_with_continuation */
2440                         thread_deallocate(new_thread);
2441                 }
2442
2443                 kr = thread_dup2(thread_array[i], new_thread);
2444                 if (kr != KERN_SUCCESS) {
2445                         thread_mtx_lock(new_thread);
2446                         new_thread->corpse_dup = TRUE;
2447                         thread_mtx_unlock(new_thread);
2448                         continue;
2449                 }
2450
2451                 /* Copy thread name */
2452                 bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread);
2453                 new_thread->thread_tag = thread_array[i]->thread_tag;
2454                 thread_copy_resource_info(new_thread, thread_array[i]);
2455         }
2456
2457         /* return the first thread if we couldn't find the equivalent of current */
2458         if (thread_return == THREAD_NULL) {
2459                 thread_return = first_thread;
2460         } else if (first_thread != THREAD_NULL) {
2461                 /* drop the extra ref returned by thread_create_with_continuation */
2462                 thread_deallocate(first_thread);
2463         }
2464
2465         task_resume_internal(task);
2466
2467         for (i = 0; i < array_count; i++) {
2468                 thread_deallocate(thread_array[i]);
2469         }
2470         kfree(thread_array, sizeof(thread_t) * active_thread_count);
2471
2472         if (kr == KERN_SUCCESS) {
2473                 *thread_ret = thread_return;
2474                 *udata_buffer = buffer;
2475                 *size = buf_size;
2476                 *num_udata = num_knotes;
2477         } else {
2478                 if (thread_return != THREAD_NULL) {
2479                         thread_deallocate(thread_return);
2480                 }
2481                 if (buffer != NULL) {
2482                         kfree(buffer, buf_size);
2483                 }
2484         }
2485
2486         return kr;
2487 }
2488
2489 #if CONFIG_SECLUDED_MEMORY
2490 extern void task_set_can_use_secluded_mem_locked(
2491         task_t          task,
2492         boolean_t       can_use_secluded_mem);
2493 #endif /* CONFIG_SECLUDED_MEMORY */
2494
2495 kern_return_t
2496 task_terminate_internal(
2497         task_t                  task)
2498 {
2499         thread_t                        thread, self;
2500         task_t                          self_task;
2501         boolean_t                       interrupt_save;
2502         int                             pid = 0;
2503
2504         assert(task != kernel_task);
2505
2506         self = current_thread();
2507         self_task = self->task;
2508
2509         /*
2510          *      Get the task locked and make sure that we are not racing
2511          *      with someone else trying to terminate us.
2512          */
2513         if (task == self_task) {
2514                 task_lock(task);
2515         } else if (task < self_task) {
2516                 task_lock(task);
2517                 task_lock(self_task);
2518         } else {
2519                 task_lock(self_task);
2520                 task_lock(task);
2521         }
2522
2523 #if CONFIG_SECLUDED_MEMORY
2524         if (task->task_can_use_secluded_mem) {
2525                 task_set_can_use_secluded_mem_locked(task, FALSE);
2526         }
2527         task->task_could_use_secluded_mem = FALSE;
2528         task->task_could_also_use_secluded_mem = FALSE;
2529
2530         if (task->task_suppressed_secluded) {
2531                 stop_secluded_suppression(task);
2532         }
2533 #endif /* CONFIG_SECLUDED_MEMORY */
2534
2535         if (!task->active) {
2536                 /*
2537                  *      Task is already being terminated.
2538                  *      Just return an error. If we are dying, this will
2539                  *      just get us to our AST special handler and that
2540                  *      will get us to finalize the termination of ourselves.
2541                  */
2542                 task_unlock(task);
2543                 if (self_task != task) {
2544                         task_unlock(self_task);
2545                 }
2546
2547                 return KERN_FAILURE;
2548         }
2549
2550         if (task_corpse_pending_report(task)) {
2551                 /*
2552                  *      Task is marked for reporting as corpse.
2553                  *      Just return an error. This will
2554                  *      just get us to our AST special handler and that
2555                  *      will get us to finish the path to death
2556                  */
2557                 task_unlock(task);
2558                 if (self_task != task) {
2559                         task_unlock(self_task);
2560                 }
2561
2562                 return KERN_FAILURE;
2563         }
2564
2565         if (self_task != task) {
2566                 task_unlock(self_task);
2567         }
2568
2569         /*
2570          * Make sure the current thread does not get aborted out of
2571          * the waits inside these operations.
2572          */
2573         interrupt_save = thread_interrupt_level(THREAD_UNINT);
2574
2575         /*
2576          *      Indicate that we want all the threads to stop executing
2577          *      at user space by holding the task (we would have held
2578          *      each thread independently in thread_terminate_internal -
2579          *      but this way we may be more likely to already find it
2580          *      held there).  Mark the task inactive, and prevent
2581          *      further task operations via the task port.
2582          */
2583         task_hold_locked(task);
2584         task->active = FALSE;
2585         ipc_task_disable(task);
2586
2587 #if CONFIG_TELEMETRY
2588         /*
2589          * Notify telemetry that this task is going away.
2590          */
2591         telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2592 #endif
2593
2594         /*
2595          *      Terminate each thread in the task.
2596          */
2597         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2598                 thread_terminate_internal(thread);
2599         }
2600
2601 #ifdef MACH_BSD
2602         if (task->bsd_info != NULL && !task_is_exec_copy(task)) {
2603                 pid = proc_pid(task->bsd_info);
2604         }
2605 #endif /* MACH_BSD */
2606
2607         task_unlock(task);
2608
2609         proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
2610             TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
2611
2612         /* Early object reap phase */
2613
2614 // PR-17045188: Revisit implementation
2615 //        task_partial_reap(task, pid);
2616
2617 #if CONFIG_EMBEDDED
2618         /*
2619          * remove all task watchers
2620          */
2621         task_removewatchers(task);
2622
2623 #endif /* CONFIG_EMBEDDED */
2624
2625         /*
2626          *      Destroy all synchronizers owned by the task.
2627          */
2628         task_synchronizer_destroy_all(task);
2629
2630         /*
2631          *      Clear the watchport boost on the task.
2632          */
2633         task_remove_turnstile_watchports(task);
2634
2635         /*
2636          *      Destroy the IPC space, leaving just a reference for it.
2637          */
2638         ipc_space_terminate(task->itk_space);
2639
2640 #if 00
2641         /* if some ledgers go negative on tear-down again... */
2642         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2643             task_ledgers.phys_footprint);
2644         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2645             task_ledgers.internal);
2646         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2647             task_ledgers.internal_compressed);
2648         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2649             task_ledgers.iokit_mapped);
2650         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2651             task_ledgers.alternate_accounting);
2652         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2653             task_ledgers.alternate_accounting_compressed);
2654 #endif
2655
2656         /*
2657          * If the current thread is a member of the task
2658          * being terminated, then the last reference to
2659          * the task will not be dropped until the thread
2660          * is finally reaped.  To avoid incurring the
2661          * expense of removing the address space regions
2662          * at reap time, we do it explictly here.
2663          */
2664
2665         vm_map_lock(task->map);
2666         vm_map_disable_hole_optimization(task->map);
2667         vm_map_unlock(task->map);
2668
2669 #if MACH_ASSERT
2670         /*
2671          * Identify the pmap's process, in case the pmap ledgers drift
2672          * and we have to report it.
2673          */
2674         char procname[17];
2675         if (task->bsd_info && !task_is_exec_copy(task)) {
2676                 pid = proc_pid(task->bsd_info);
2677                 proc_name_kdp(task, procname, sizeof(procname));
2678         } else {
2679                 pid = 0;
2680                 strlcpy(procname, "<unknown>", sizeof(procname));
2681         }
2682         pmap_set_process(task->map->pmap, pid, procname);
2683 #endif /* MACH_ASSERT */
2684
2685         vm_map_remove(task->map,
2686             task->map->min_offset,
2687             task->map->max_offset,
2688             /*
2689              * Final cleanup:
2690              * + no unnesting
2691              * + remove immutable mappings
2692              * + allow gaps in range
2693              */
2694             (VM_MAP_REMOVE_NO_UNNESTING |
2695             VM_MAP_REMOVE_IMMUTABLE |
2696             VM_MAP_REMOVE_GAPS_OK));
2697
2698         /* release our shared region */
2699         vm_shared_region_set(task, NULL);
2700
2701
2702         lck_mtx_lock(&tasks_threads_lock);
2703         queue_remove(&tasks, task, task_t, tasks);
2704         queue_enter(&terminated_tasks, task, task_t, tasks);
2705         tasks_count--;
2706         terminated_tasks_count++;
2707         lck_mtx_unlock(&tasks_threads_lock);
2708
2709         /*
2710          * We no longer need to guard against being aborted, so restore
2711          * the previous interruptible state.
2712          */
2713         thread_interrupt_level(interrupt_save);
2714
2715 #if KPC
2716         /* force the task to release all ctrs */
2717         if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS) {
2718                 kpc_force_all_ctrs(task, 0);
2719         }
2720 #endif /* KPC */
2721
2722 #if CONFIG_COALITIONS
2723         /*
2724          * Leave our coalitions. (drop activation but not reference)
2725          */
2726         coalitions_remove_task(task);
2727 #endif
2728
2729         /*
2730          * Get rid of the task active reference on itself.
2731          */
2732         task_deallocate(task);
2733
2734         return KERN_SUCCESS;
2735 }
2736
2737 void
2738 tasks_system_suspend(boolean_t suspend)
2739 {
2740         task_t task;
2741
2742         lck_mtx_lock(&tasks_threads_lock);
2743         assert(tasks_suspend_state != suspend);
2744         tasks_suspend_state = suspend;
2745         queue_iterate(&tasks, task, task_t, tasks) {
2746                 if (task == kernel_task) {
2747                         continue;
2748                 }
2749                 suspend ? task_suspend_internal(task) : task_resume_internal(task);
2750         }
2751         lck_mtx_unlock(&tasks_threads_lock);
2752 }
2753
2754 /*
2755  * task_start_halt:
2756  *
2757  *      Shut the current task down (except for the current thread) in
2758  *      preparation for dramatic changes to the task (probably exec).
2759  *      We hold the task and mark all other threads in the task for
2760  *      termination.
2761  */
2762 kern_return_t
2763 task_start_halt(task_t task)
2764 {
2765         kern_return_t kr = KERN_SUCCESS;
2766         task_lock(task);
2767         kr = task_start_halt_locked(task, FALSE);
2768         task_unlock(task);
2769         return kr;
2770 }
2771
2772 static kern_return_t
2773 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
2774 {
2775         thread_t thread, self;
2776         uint64_t dispatchqueue_offset;
2777
2778         assert(task != kernel_task);
2779
2780         self = current_thread();
2781
2782         if (task != self->task && !task_is_a_corpse_fork(task)) {
2783                 return KERN_INVALID_ARGUMENT;
2784         }
2785
2786         if (task->halting || !task->active || !self->active) {
2787                 /*
2788                  * Task or current thread is already being terminated.
2789                  * Hurry up and return out of the current kernel context
2790                  * so that we run our AST special handler to terminate
2791                  * ourselves.
2792                  */
2793                 return KERN_FAILURE;
2794         }
2795
2796         task->halting = TRUE;
2797
2798         /*
2799          * Mark all the threads to keep them from starting any more
2800          * user-level execution.  The thread_terminate_internal code
2801          * would do this on a thread by thread basis anyway, but this
2802          * gives us a better chance of not having to wait there.
2803          */
2804         task_hold_locked(task);
2805         dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
2806
2807         /*
2808          * Terminate all the other threads in the task.
2809          */
2810         queue_iterate(&task->threads, thread, thread_t, task_threads)
2811         {
2812                 if (should_mark_corpse) {
2813                         thread_mtx_lock(thread);
2814                         thread->inspection = TRUE;
2815                         thread_mtx_unlock(thread);
2816                 }
2817                 if (thread != self) {
2818                         thread_terminate_internal(thread);
2819                 }
2820         }
2821         task->dispatchqueue_offset = dispatchqueue_offset;
2822
2823         task_release_locked(task);
2824
2825         return KERN_SUCCESS;
2826 }
2827
2828
2829 /*
2830  * task_complete_halt:
2831  *
2832  *      Complete task halt by waiting for threads to terminate, then clean
2833  *      up task resources (VM, port namespace, etc...) and then let the
2834  *      current thread go in the (practically empty) task context.
2835  *
2836  *      Note: task->halting flag is not cleared in order to avoid creation
2837  *      of new thread in old exec'ed task.
2838  */
2839 void
2840 task_complete_halt(task_t task)
2841 {
2842         task_lock(task);
2843         assert(task->halting);
2844         assert(task == current_task());
2845
2846         /*
2847          *      Wait for the other threads to get shut down.
2848          *      When the last other thread is reaped, we'll be
2849          *      woken up.
2850          */
2851         if (task->thread_count > 1) {
2852                 assert_wait((event_t)&task->halting, THREAD_UNINT);
2853                 task_unlock(task);
2854                 thread_block(THREAD_CONTINUE_NULL);
2855         } else {
2856                 task_unlock(task);
2857         }
2858
2859         /*
2860          *      Give the machine dependent code a chance
2861          *      to perform cleanup of task-level resources
2862          *      associated with the current thread before
2863          *      ripping apart the task.
2864          */
2865         machine_task_terminate(task);
2866
2867         /*
2868          *      Destroy all synchronizers owned by the task.
2869          */
2870         task_synchronizer_destroy_all(task);
2871
2872         /*
2873          *      Destroy the contents of the IPC space, leaving just
2874          *      a reference for it.
2875          */
2876         ipc_space_clean(task->itk_space);
2877
2878         /*
2879          * Clean out the address space, as we are going to be
2880          * getting a new one.
2881          */
2882         vm_map_remove(task->map, task->map->min_offset,
2883             task->map->max_offset,
2884             /*
2885              * Final cleanup:
2886              * + no unnesting
2887              * + remove immutable mappings
2888              * + allow gaps in the range
2889              */
2890             (VM_MAP_REMOVE_NO_UNNESTING |
2891             VM_MAP_REMOVE_IMMUTABLE |
2892             VM_MAP_REMOVE_GAPS_OK));
2893
2894         /*
2895          * Kick out any IOKitUser handles to the task. At best they're stale,
2896          * at worst someone is racing a SUID exec.
2897          */
2898         iokit_task_terminate(task);
2899 }
2900
2901 /*
2902  *      task_hold_locked:
2903  *
2904  *      Suspend execution of the specified task.
2905  *      This is a recursive-style suspension of the task, a count of
2906  *      suspends is maintained.
2907  *
2908  *      CONDITIONS: the task is locked and active.
2909  */
2910 void
2911 task_hold_locked(
2912         task_t          task)
2913 {
2914         thread_t        thread;
2915
2916         assert(task->active);
2917
2918         if (task->suspend_count++ > 0) {
2919                 return;
2920         }
2921
2922         if (task->bsd_info) {
2923                 workq_proc_suspended(task->bsd_info);
2924         }
2925
2926         /*
2927          *      Iterate through all the threads and hold them.
2928          */
2929         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2930                 thread_mtx_lock(thread);
2931                 thread_hold(thread);
2932                 thread_mtx_unlock(thread);
2933         }
2934 }
2935
2936 /*
2937  *      task_hold:
2938  *
2939  *      Same as the internal routine above, except that is must lock
2940  *      and verify that the task is active.  This differs from task_suspend
2941  *      in that it places a kernel hold on the task rather than just a
2942  *      user-level hold.  This keeps users from over resuming and setting
2943  *      it running out from under the kernel.
2944  *
2945  *      CONDITIONS: the caller holds a reference on the task
2946  */
2947 kern_return_t
2948 task_hold(
2949         task_t          task)
2950 {
2951         if (task == TASK_NULL) {
2952                 return KERN_INVALID_ARGUMENT;
2953         }
2954
2955         task_lock(task);
2956
2957         if (!task->active) {
2958                 task_unlock(task);
2959
2960                 return KERN_FAILURE;
2961         }
2962
2963         task_hold_locked(task);
2964         task_unlock(task);
2965
2966         return KERN_SUCCESS;
2967 }
2968
2969 kern_return_t
2970 task_wait(
2971         task_t          task,
2972         boolean_t       until_not_runnable)
2973 {
2974         if (task == TASK_NULL) {
2975                 return KERN_INVALID_ARGUMENT;
2976         }
2977
2978         task_lock(task);
2979
2980         if (!task->active) {
2981                 task_unlock(task);
2982
2983                 return KERN_FAILURE;
2984         }
2985
2986         task_wait_locked(task, until_not_runnable);
2987         task_unlock(task);
2988
2989         return KERN_SUCCESS;
2990 }
2991
2992 /*
2993  *      task_wait_locked:
2994  *
2995  *      Wait for all threads in task to stop.
2996  *
2997  * Conditions:
2998  *      Called with task locked, active, and held.
2999  */
3000 void
3001 task_wait_locked(
3002         task_t          task,
3003         boolean_t               until_not_runnable)
3004 {
3005         thread_t        thread, self;
3006
3007         assert(task->active);
3008         assert(task->suspend_count > 0);
3009
3010         self = current_thread();
3011
3012         /*
3013          *      Iterate through all the threads and wait for them to
3014          *      stop.  Do not wait for the current thread if it is within
3015          *      the task.
3016          */
3017         queue_iterate(&task->threads, thread, thread_t, task_threads) {
3018                 if (thread != self) {
3019                         thread_wait(thread, until_not_runnable);
3020                 }
3021         }
3022 }
3023
3024 boolean_t
3025 task_is_app_suspended(task_t task)
3026 {
3027         return task->pidsuspended;
3028 }
3029
3030 /*
3031  *      task_release_locked:
3032  *
3033  *      Release a kernel hold on a task.
3034  *
3035  *      CONDITIONS: the task is locked and active
3036  */
3037 void
3038 task_release_locked(
3039         task_t          task)
3040 {
3041         thread_t        thread;
3042
3043         assert(task->active);
3044         assert(task->suspend_count > 0);
3045
3046         if (--task->suspend_count > 0) {
3047                 return;
3048         }
3049
3050         if (task->bsd_info) {
3051                 workq_proc_resumed(task->bsd_info);
3052         }
3053
3054         queue_iterate(&task->threads, thread, thread_t, task_threads) {
3055                 thread_mtx_lock(thread);
3056                 thread_release(thread);
3057                 thread_mtx_unlock(thread);
3058         }
3059 }
3060
3061 /*
3062  *      task_release:
3063  *
3064  *      Same as the internal routine above, except that it must lock
3065  *      and verify that the task is active.
3066  *
3067  *      CONDITIONS: The caller holds a reference to the task
3068  */
3069 kern_return_t
3070 task_release(
3071         task_t          task)
3072 {
3073         if (task == TASK_NULL) {
3074                 return KERN_INVALID_ARGUMENT;
3075         }
3076
3077         task_lock(task);
3078
3079         if (!task->active) {
3080                 task_unlock(task);
3081
3082                 return KERN_FAILURE;
3083         }
3084
3085         task_release_locked(task);
3086         task_unlock(task);
3087
3088         return KERN_SUCCESS;
3089 }
3090
3091 kern_return_t
3092 task_threads(
3093         task_t                                  task,
3094         thread_act_array_t              *threads_out,
3095         mach_msg_type_number_t  *count)
3096 {
3097         mach_msg_type_number_t  actual;
3098         thread_t                                *thread_list;
3099         thread_t                                thread;
3100         vm_size_t                               size, size_needed;
3101         void                                    *addr;
3102         unsigned int                    i, j;
3103
3104         if (task == TASK_NULL) {
3105                 return KERN_INVALID_ARGUMENT;
3106         }
3107
3108         size = 0; addr = NULL;
3109
3110         for (;;) {
3111                 task_lock(task);
3112                 if (!task->active) {
3113                         task_unlock(task);
3114
3115                         if (size != 0) {
3116                                 kfree(addr, size);
3117                         }
3118
3119                         return KERN_FAILURE;
3120                 }
3121
3122                 actual = task->thread_count;
3123
3124                 /* do we have the memory we need? */
3125                 size_needed = actual * sizeof(mach_port_t);
3126                 if (size_needed <= size) {
3127                         break;
3128                 }
3129
3130                 /* unlock the task and allocate more memory */
3131                 task_unlock(task);
3132
3133                 if (size != 0) {
3134                         kfree(addr, size);
3135                 }
3136
3137                 assert(size_needed > 0);
3138                 size = size_needed;
3139
3140                 addr = kalloc(size);
3141                 if (addr == 0) {
3142                         return KERN_RESOURCE_SHORTAGE;
3143                 }
3144         }
3145
3146         /* OK, have memory and the task is locked & active */
3147         thread_list = (thread_t *)addr;
3148
3149         i = j = 0;
3150
3151         for (thread = (thread_t)queue_first(&task->threads); i < actual;
3152             ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
3153                 thread_reference_internal(thread);
3154                 thread_list[j++] = thread;
3155         }
3156
3157         assert(queue_end(&task->threads, (queue_entry_t)thread));
3158
3159         actual = j;
3160         size_needed = actual * sizeof(mach_port_t);
3161
3162         /* can unlock task now that we've got the thread refs */
3163         task_unlock(task);
3164
3165         if (actual == 0) {
3166                 /* no threads, so return null pointer and deallocate memory */
3167
3168                 *threads_out = NULL;
3169                 *count = 0;
3170
3171                 if (size != 0) {
3172                         kfree(addr, size);
3173                 }
3174         } else {
3175                 /* if we allocated too much, must copy */
3176
3177                 if (size_needed < size) {
3178                         void *newaddr;
3179
3180                         newaddr = kalloc(size_needed);
3181                         if (newaddr == 0) {
3182                                 for (i = 0; i < actual; ++i) {
3183                                         thread_deallocate(thread_list[i]);
3184                                 }
3185                                 kfree(addr, size);
3186                                 return KERN_RESOURCE_SHORTAGE;
3187                         }
3188
3189                         bcopy(addr, newaddr, size_needed);
3190                         kfree(addr, size);
3191                         thread_list = (thread_t *)newaddr;
3192                 }
3193
3194                 *threads_out = thread_list;
3195                 *count = actual;
3196
3197                 /* do the conversion that Mig should handle */
3198
3199                 for (i = 0; i < actual; ++i) {
3200                         ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
3201                 }
3202         }
3203
3204         return KERN_SUCCESS;
3205 }
3206
3207 #define TASK_HOLD_NORMAL        0
3208 #define TASK_HOLD_PIDSUSPEND    1
3209 #define TASK_HOLD_LEGACY        2
3210 #define TASK_HOLD_LEGACY_ALL    3
3211
3212 static kern_return_t
3213 place_task_hold(
3214         task_t task,
3215         int mode)
3216 {
3217         if (!task->active && !task_is_a_corpse(task)) {
3218                 return KERN_FAILURE;
3219         }
3220
3221         /* Return success for corpse task */
3222         if (task_is_a_corpse(task)) {
3223                 return KERN_SUCCESS;
3224         }
3225
3226         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3227             MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
3228             task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3229             task->user_stop_count, task->user_stop_count + 1, 0);
3230
3231 #if MACH_ASSERT
3232         current_task()->suspends_outstanding++;
3233 #endif
3234
3235         if (mode == TASK_HOLD_LEGACY) {
3236                 task->legacy_stop_count++;
3237         }
3238
3239         if (task->user_stop_count++ > 0) {
3240                 /*
3241                  *      If the stop count was positive, the task is
3242                  *      already stopped and we can exit.
3243                  */
3244                 return KERN_SUCCESS;
3245         }
3246
3247         /*
3248          * Put a kernel-level hold on the threads in the task (all
3249          * user-level task suspensions added together represent a
3250          * single kernel-level hold).  We then wait for the threads
3251          * to stop executing user code.
3252          */
3253         task_hold_locked(task);
3254         task_wait_locked(task, FALSE);
3255
3256         return KERN_SUCCESS;
3257 }
3258
3259 static kern_return_t
3260 release_task_hold(
3261         task_t          task,
3262         int                     mode)
3263 {
3264         boolean_t release = FALSE;
3265
3266         if (!task->active && !task_is_a_corpse(task)) {
3267                 return KERN_FAILURE;
3268         }
3269
3270         /* Return success for corpse task */
3271         if (task_is_a_corpse(task)) {
3272                 return KERN_SUCCESS;
3273         }
3274
3275         if (mode == TASK_HOLD_PIDSUSPEND) {
3276                 if (task->pidsuspended == FALSE) {
3277                         return KERN_FAILURE;
3278                 }
3279                 task->pidsuspended = FALSE;
3280         }
3281
3282         if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
3283                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3284                     MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_RESUME) | DBG_FUNC_NONE,
3285                     task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3286                     task->user_stop_count, mode, task->legacy_stop_count);
3287
3288 #if MACH_ASSERT
3289                 /*
3290                  * This is obviously not robust; if we suspend one task and then resume a different one,
3291                  * we'll fly under the radar. This is only meant to catch the common case of a crashed
3292                  * or buggy suspender.
3293                  */
3294                 current_task()->suspends_outstanding--;
3295 #endif
3296
3297                 if (mode == TASK_HOLD_LEGACY_ALL) {
3298                         if (task->legacy_stop_count >= task->user_stop_count) {
3299                                 task->user_stop_count = 0;
3300                                 release = TRUE;
3301                         } else {
3302                                 task->user_stop_count -= task->legacy_stop_count;
3303                         }
3304                         task->legacy_stop_count = 0;
3305                 } else {
3306                         if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0) {
3307                                 task->legacy_stop_count--;
3308                         }
3309                         if (--task->user_stop_count == 0) {
3310                                 release = TRUE;
3311                         }
3312                 }
3313         } else {
3314                 return KERN_FAILURE;
3315         }
3316
3317         /*
3318          *      Release the task if necessary.
3319          */
3320         if (release) {
3321                 task_release_locked(task);
3322         }
3323
3324         return KERN_SUCCESS;
3325 }
3326
3327 boolean_t
3328 get_task_suspended(task_t task)
3329 {
3330         return 0 != task->user_stop_count;
3331 }
3332
3333 /*
3334  *      task_suspend:
3335  *
3336  *      Implement an (old-fashioned) user-level suspension on a task.
3337  *
3338  *      Because the user isn't expecting to have to manage a suspension
3339  *      token, we'll track it for him in the kernel in the form of a naked
3340  *      send right to the task's resume port.  All such send rights
3341  *      account for a single suspension against the task (unlike task_suspend2()
3342  *      where each caller gets a unique suspension count represented by a
3343  *      unique send-once right).
3344  *
3345  * Conditions:
3346  *      The caller holds a reference to the task
3347  */
3348 kern_return_t
3349 task_suspend(
3350         task_t          task)
3351 {
3352         kern_return_t                   kr;
3353         mach_port_t                     port;
3354         mach_port_name_t                name;
3355
3356         if (task == TASK_NULL || task == kernel_task) {
3357                 return KERN_INVALID_ARGUMENT;
3358         }
3359
3360         task_lock(task);
3361
3362         /*
3363          * place a legacy hold on the task.
3364          */
3365         kr = place_task_hold(task, TASK_HOLD_LEGACY);
3366         if (kr != KERN_SUCCESS) {
3367                 task_unlock(task);
3368                 return kr;
3369         }
3370
3371         /*
3372          * Claim a send right on the task resume port, and request a no-senders
3373          * notification on that port (if none outstanding).
3374          */
3375         (void)ipc_kobject_make_send_lazy_alloc_port(&task->itk_resume,
3376             (ipc_kobject_t)task, IKOT_TASK_RESUME);
3377         port = task->itk_resume;
3378
3379         task_unlock(task);
3380
3381         /*
3382          * Copyout the send right into the calling task's IPC space.  It won't know it is there,
3383          * but we'll look it up when calling a traditional resume.  Any IPC operations that
3384          * deallocate the send right will auto-release the suspension.
3385          */
3386         if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, ip_to_object(port),
3387             MACH_MSG_TYPE_MOVE_SEND, NULL, NULL, &name)) != KERN_SUCCESS) {
3388                 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
3389                     proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3390                     task_pid(task), kr);
3391                 return kr;
3392         }
3393
3394         return kr;
3395 }
3396
3397 /*
3398  *      task_resume:
3399  *              Release a user hold on a task.
3400  *
3401  * Conditions:
3402  *              The caller holds a reference to the task
3403  */
3404 kern_return_t
3405 task_resume(
3406         task_t  task)
3407 {
3408         kern_return_t    kr;
3409         mach_port_name_t resume_port_name;
3410         ipc_entry_t              resume_port_entry;
3411         ipc_space_t              space = current_task()->itk_space;
3412
3413         if (task == TASK_NULL || task == kernel_task) {
3414                 return KERN_INVALID_ARGUMENT;
3415         }
3416
3417         /* release a legacy task hold */
3418         task_lock(task);
3419         kr = release_task_hold(task, TASK_HOLD_LEGACY);
3420         task_unlock(task);
3421
3422         is_write_lock(space);
3423         if (is_active(space) && IP_VALID(task->itk_resume) &&
3424             ipc_hash_lookup(space, ip_to_object(task->itk_resume), &resume_port_name, &resume_port_entry) == TRUE) {
3425                 /*
3426                  * We found a suspension token in the caller's IPC space. Release a send right to indicate that
3427                  * we are holding one less legacy hold on the task from this caller.  If the release failed,
3428                  * go ahead and drop all the rights, as someone either already released our holds or the task
3429                  * is gone.
3430                  */
3431                 if (kr == KERN_SUCCESS) {
3432                         ipc_right_dealloc(space, resume_port_name, resume_port_entry);
3433                 } else {
3434                         ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
3435                 }
3436                 /* space unlocked */
3437         } else {
3438                 is_write_unlock(space);
3439                 if (kr == KERN_SUCCESS) {
3440                         printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
3441                             proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3442                             task_pid(task));
3443                 }
3444         }
3445
3446         return kr;
3447 }
3448
3449 /*
3450  * Suspend the target task.
3451  * Making/holding a token/reference/port is the callers responsibility.
3452  */
3453 kern_return_t
3454 task_suspend_internal(task_t task)
3455 {
3456         kern_return_t    kr;
3457
3458         if (task == TASK_NULL || task == kernel_task) {
3459                 return KERN_INVALID_ARGUMENT;
3460         }
3461
3462         task_lock(task);
3463         kr = place_task_hold(task, TASK_HOLD_NORMAL);
3464         task_unlock(task);
3465         return kr;
3466 }
3467
3468 /*
3469  * Suspend the target task, and return a suspension token. The token
3470  * represents a reference on the suspended task.
3471  */
3472 kern_return_t
3473 task_suspend2(
3474         task_t                  task,
3475         task_suspension_token_t *suspend_token)
3476 {
3477         kern_return_t    kr;
3478
3479         kr = task_suspend_internal(task);
3480         if (kr != KERN_SUCCESS) {
3481                 *suspend_token = TASK_NULL;
3482                 return kr;
3483         }
3484
3485         /*
3486          * Take a reference on the target task and return that to the caller
3487          * as a "suspension token," which can be converted into an SO right to
3488          * the now-suspended task's resume port.
3489          */
3490         task_reference_internal(task);
3491         *suspend_token = task;
3492
3493         return KERN_SUCCESS;
3494 }
3495
3496 /*
3497  * Resume the task
3498  * (reference/token/port management is caller's responsibility).
3499  */
3500 kern_return_t
3501 task_resume_internal(
3502         task_suspension_token_t         task)
3503 {
3504         kern_return_t kr;
3505
3506         if (task == TASK_NULL || task == kernel_task) {
3507                 return KERN_INVALID_ARGUMENT;
3508         }
3509
3510         task_lock(task);
3511         kr = release_task_hold(task, TASK_HOLD_NORMAL);
3512         task_unlock(task);
3513         return kr;
3514 }
3515
3516 /*
3517  * Resume the task using a suspension token. Consumes the token's ref.
3518  */
3519 kern_return_t
3520 task_resume2(
3521         task_suspension_token_t         task)
3522 {
3523         kern_return_t kr;
3524
3525         kr = task_resume_internal(task);
3526         task_suspension_token_deallocate(task);
3527
3528         return kr;
3529 }
3530
3531 boolean_t
3532 task_suspension_notify(mach_msg_header_t *request_header)
3533 {
3534         ipc_port_t port = request_header->msgh_remote_port;
3535         task_t task = convert_port_to_task_suspension_token(port);
3536         mach_msg_type_number_t not_count;
3537
3538         if (task == TASK_NULL || task == kernel_task) {
3539                 return TRUE;  /* nothing to do */
3540         }
3541         switch (request_header->msgh_id) {
3542         case MACH_NOTIFY_SEND_ONCE:
3543                 /* release the hold held by this specific send-once right */
3544                 task_lock(task);
3545                 release_task_hold(task, TASK_HOLD_NORMAL);
3546                 task_unlock(task);
3547                 break;
3548
3549         case MACH_NOTIFY_NO_SENDERS:
3550                 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
3551
3552                 task_lock(task);
3553                 ip_lock(port);
3554                 if (port->ip_mscount == not_count) {
3555                         /* release all the [remaining] outstanding legacy holds */
3556                         assert(port->ip_nsrequest == IP_NULL);
3557                         ip_unlock(port);
3558                         release_task_hold(task, TASK_HOLD_LEGACY_ALL);
3559                         task_unlock(task);
3560                 } else if (port->ip_nsrequest == IP_NULL) {
3561                         ipc_port_t old_notify;
3562
3563                         task_unlock(task);
3564                         /* new send rights, re-arm notification at current make-send count */
3565                         ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3566                         assert(old_notify == IP_NULL);
3567                         /* port unlocked */
3568                 } else {
3569                         ip_unlock(port);
3570                         task_unlock(task);
3571                 }
3572                 break;
3573
3574         default:
3575                 break;
3576         }
3577
3578         task_suspension_token_deallocate(task); /* drop token reference */
3579         return TRUE;
3580 }
3581
3582 static kern_return_t
3583 task_pidsuspend_locked(task_t task)
3584 {
3585         kern_return_t kr;
3586
3587         if (task->pidsuspended) {
3588                 kr = KERN_FAILURE;
3589                 goto out;
3590         }
3591
3592         task->pidsuspended = TRUE;
3593
3594         kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
3595         if (kr != KERN_SUCCESS) {
3596                 task->pidsuspended = FALSE;
3597         }
3598 out:
3599         return kr;
3600 }
3601
3602
3603 /*
3604  *      task_pidsuspend:
3605  *
3606  *      Suspends a task by placing a hold on its threads.
3607  *
3608  * Conditions:
3609  *      The caller holds a reference to the task
3610  */
3611 kern_return_t
3612 task_pidsuspend(
3613         task_t          task)
3614 {
3615         kern_return_t    kr;
3616
3617         if (task == TASK_NULL || task == kernel_task) {
3618                 return KERN_INVALID_ARGUMENT;
3619         }
3620
3621         task_lock(task);
3622
3623         kr = task_pidsuspend_locked(task);
3624
3625         task_unlock(task);
3626
3627         if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
3628                 iokit_task_app_suspended_changed(task);
3629         }
3630
3631         return kr;
3632 }
3633
3634 /*
3635  *      task_pidresume:
3636  *              Resumes a previously suspended task.
3637  *
3638  * Conditions:
3639  *              The caller holds a reference to the task
3640  */
3641 kern_return_t
3642 task_pidresume(
3643         task_t  task)
3644 {
3645         kern_return_t    kr;
3646
3647         if (task == TASK_NULL || task == kernel_task) {
3648                 return KERN_INVALID_ARGUMENT;
3649         }
3650
3651         task_lock(task);
3652
3653 #if CONFIG_FREEZE
3654
3655         while (task->changing_freeze_state) {
3656                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3657                 task_unlock(task);
3658                 thread_block(THREAD_CONTINUE_NULL);
3659
3660                 task_lock(task);
3661         }
3662         task->changing_freeze_state = TRUE;
3663 #endif
3664
3665         kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
3666
3667         task_unlock(task);
3668
3669         if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
3670                 iokit_task_app_suspended_changed(task);
3671         }
3672
3673 #if CONFIG_FREEZE
3674
3675         task_lock(task);
3676
3677         if (kr == KERN_SUCCESS) {
3678                 task->frozen = FALSE;
3679         }
3680         task->changing_freeze_state = FALSE;
3681         thread_wakeup(&task->changing_freeze_state);
3682
3683         task_unlock(task);
3684 #endif
3685
3686         return kr;
3687 }
3688
3689 os_refgrp_decl(static, task_watchports_refgrp, "task_watchports", NULL);
3690
3691 /*
3692  *      task_add_turnstile_watchports:
3693  *              Setup watchports to boost the main thread of the task.
3694  *
3695  *      Arguments:
3696  *              task: task being spawned
3697  *              thread: main thread of task
3698  *              portwatch_ports: array of watchports
3699  *              portwatch_count: number of watchports
3700  *
3701  *      Conditions:
3702  *              Nothing locked.
3703  */
3704 void
3705 task_add_turnstile_watchports(
3706         task_t          task,
3707         thread_t        thread,
3708         ipc_port_t      *portwatch_ports,
3709         uint32_t        portwatch_count)
3710 {
3711         struct task_watchports *watchports = NULL;
3712         struct task_watchport_elem *previous_elem_array[TASK_MAX_WATCHPORT_COUNT] = {};
3713         os_ref_count_t refs;
3714
3715         /* Check if the task has terminated */
3716         if (!task->active) {
3717                 return;
3718         }
3719
3720         assert(portwatch_count <= TASK_MAX_WATCHPORT_COUNT);
3721
3722         watchports = task_watchports_alloc_init(task, thread, portwatch_count);
3723
3724         /* Lock the ipc space */
3725         is_write_lock(task->itk_space);
3726
3727         /* Setup watchports to boost the main thread */
3728         refs = task_add_turnstile_watchports_locked(task,
3729             watchports, previous_elem_array, portwatch_ports,
3730             portwatch_count);
3731
3732         /* Drop the space lock */
3733         is_write_unlock(task->itk_space);
3734
3735         if (refs == 0) {
3736                 task_watchports_deallocate(watchports);
3737         }
3738
3739         /* Drop the ref on previous_elem_array */
3740         for (uint32_t i = 0; i < portwatch_count && previous_elem_array[i] != NULL; i++) {
3741                 task_watchport_elem_deallocate(previous_elem_array[i]);
3742         }
3743 }
3744
3745 /*
3746  *      task_remove_turnstile_watchports:
3747  *              Clear all turnstile boost on the task from watchports.
3748  *
3749  *      Arguments:
3750  *              task: task being terminated
3751  *
3752  *      Conditions:
3753  *              Nothing locked.
3754  */
3755 void
3756 task_remove_turnstile_watchports(
3757         task_t          task)
3758 {
3759         os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
3760         struct task_watchports *watchports = NULL;
3761         ipc_port_t port_freelist[TASK_MAX_WATCHPORT_COUNT] = {};
3762         uint32_t portwatch_count;
3763
3764         /* Lock the ipc space */
3765         is_write_lock(task->itk_space);
3766
3767         /* Check if watchport boost exist */
3768         if (task->watchports == NULL) {
3769                 is_write_unlock(task->itk_space);
3770                 return;
3771         }
3772         watchports = task->watchports;
3773         portwatch_count = watchports->tw_elem_array_count;
3774
3775         refs = task_remove_turnstile_watchports_locked(task, watchports,
3776             port_freelist);
3777
3778         is_write_unlock(task->itk_space);
3779
3780         /* Drop all the port references */
3781         for (uint32_t i = 0; i < portwatch_count && port_freelist[i] != NULL; i++) {
3782                 ip_release(port_freelist[i]);
3783         }
3784
3785         /* Clear the task and thread references for task_watchport */
3786         if (refs == 0) {
3787                 task_watchports_deallocate(watchports);
3788         }
3789 }
3790
3791 /*
3792  *      task_transfer_turnstile_watchports:
3793  *              Transfer all watchport turnstile boost from old task to new task.
3794  *
3795  *      Arguments:
3796  *              old_task: task calling exec
3797  *              new_task: new exec'ed task
3798  *              thread: main thread of new task
3799  *
3800  *      Conditions:
3801  *              Nothing locked.
3802  */
3803 void
3804 task_transfer_turnstile_watchports(
3805         task_t   old_task,
3806         task_t   new_task,
3807         thread_t new_thread)
3808 {
3809         struct task_watchports *old_watchports = NULL;
3810         struct task_watchports *new_watchports = NULL;
3811         os_ref_count_t old_refs = TASK_MAX_WATCHPORT_COUNT;
3812         os_ref_count_t new_refs = TASK_MAX_WATCHPORT_COUNT;
3813         uint32_t portwatch_count;
3814
3815         if (old_task->watchports == NULL || !new_task->active) {
3816                 return;
3817         }
3818
3819         /* Get the watch port count from the old task */
3820         is_write_lock(old_task->itk_space);
3821         if (old_task->watchports == NULL) {
3822                 is_write_unlock(old_task->itk_space);
3823                 return;
3824         }
3825
3826         portwatch_count = old_task->watchports->tw_elem_array_count;
3827         is_write_unlock(old_task->itk_space);
3828
3829         new_watchports = task_watchports_alloc_init(new_task, new_thread, portwatch_count);
3830
3831         /* Lock the ipc space for old task */
3832         is_write_lock(old_task->itk_space);
3833
3834         /* Lock the ipc space for new task */
3835         is_write_lock(new_task->itk_space);
3836
3837         /* Check if watchport boost exist */
3838         if (old_task->watchports == NULL || !new_task->active) {
3839                 is_write_unlock(new_task->itk_space);
3840                 is_write_unlock(old_task->itk_space);
3841                 (void)task_watchports_release(new_watchports);
3842                 task_watchports_deallocate(new_watchports);
3843                 return;
3844         }
3845
3846         old_watchports = old_task->watchports;
3847         assert(portwatch_count == old_task->watchports->tw_elem_array_count);
3848
3849         /* Setup new task watchports */
3850         new_task->watchports = new_watchports;
3851
3852         for (uint32_t i = 0; i < portwatch_count; i++) {
3853                 ipc_port_t port = old_watchports->tw_elem[i].twe_port;
3854
3855                 if (port == NULL) {
3856                         task_watchport_elem_clear(&new_watchports->tw_elem[i]);
3857                         continue;
3858                 }
3859
3860                 /* Lock the port and check if it has the entry */
3861                 ip_lock(port);
3862                 imq_lock(&port->ip_messages);
3863
3864                 task_watchport_elem_init(&new_watchports->tw_elem[i], new_task, port);
3865
3866                 if (ipc_port_replace_watchport_elem_conditional_locked(port,
3867                     &old_watchports->tw_elem[i], &new_watchports->tw_elem[i]) == KERN_SUCCESS) {
3868                         task_watchport_elem_clear(&old_watchports->tw_elem[i]);
3869
3870                         task_watchports_retain(new_watchports);
3871                         old_refs = task_watchports_release(old_watchports);
3872
3873                         /* Check if all ports are cleaned */
3874                         if (old_refs == 0) {
3875                                 old_task->watchports = NULL;
3876                         }
3877                 } else {
3878                         task_watchport_elem_clear(&new_watchports->tw_elem[i]);
3879                 }
3880                 /* mqueue and port unlocked by ipc_port_replace_watchport_elem_conditional_locked */
3881         }
3882
3883         /* Drop the reference on new task_watchports struct returned by task_watchports_alloc_init */
3884         new_refs = task_watchports_release(new_watchports);
3885         if (new_refs == 0) {
3886                 new_task->watchports = NULL;
3887         }
3888
3889         is_write_unlock(new_task->itk_space);
3890         is_write_unlock(old_task->itk_space);
3891
3892         /* Clear the task and thread references for old_watchport */
3893         if (old_refs == 0) {
3894                 task_watchports_deallocate(old_watchports);
3895         }
3896
3897         /* Clear the task and thread references for new_watchport */
3898         if (new_refs == 0) {
3899                 task_watchports_deallocate(new_watchports);
3900         }
3901 }
3902
3903 /*
3904  *      task_add_turnstile_watchports_locked:
3905  *              Setup watchports to boost the main thread of the task.
3906  *
3907  *      Arguments:
3908  *              task: task to boost
3909  *              watchports: watchport structure to be attached to the task
3910  *              previous_elem_array: an array of old watchport_elem to be returned to caller
3911  *              portwatch_ports: array of watchports
3912  *              portwatch_count: number of watchports
3913  *
3914  *      Conditions:
3915  *              ipc space of the task locked.
3916  *              returns array of old watchport_elem in previous_elem_array
3917  */
3918 static os_ref_count_t
3919 task_add_turnstile_watchports_locked(
3920         task_t                      task,
3921         struct task_watchports      *watchports,
3922         struct task_watchport_elem  **previous_elem_array,
3923         ipc_port_t                  *portwatch_ports,
3924         uint32_t                    portwatch_count)
3925 {
3926         os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
3927
3928         /* Check if the task is still active */
3929         if (!task->active) {
3930                 refs = task_watchports_release(watchports);
3931                 return refs;
3932         }
3933
3934         assert(task->watchports == NULL);
3935         task->watchports = watchports;
3936
3937         for (uint32_t i = 0, j = 0; i < portwatch_count; i++) {
3938                 ipc_port_t port = portwatch_ports[i];
3939
3940                 task_watchport_elem_init(&watchports->tw_elem[i], task, port);
3941                 if (port == NULL) {
3942                         task_watchport_elem_clear(&watchports->tw_elem[i]);
3943                         continue;
3944                 }
3945
3946                 ip_lock(port);
3947                 imq_lock(&port->ip_messages);
3948
3949                 /* Check if port is in valid state to be setup as watchport */
3950                 if (ipc_port_add_watchport_elem_locked(port, &watchports->tw_elem[i],
3951                     &previous_elem_array[j]) != KERN_SUCCESS) {
3952                         task_watchport_elem_clear(&watchports->tw_elem[i]);
3953                         continue;
3954                 }
3955                 /* port and mqueue unlocked on return */
3956
3957                 ip_reference(port);
3958                 task_watchports_retain(watchports);
3959                 if (previous_elem_array[j] != NULL) {
3960                         j++;
3961                 }
3962         }
3963
3964         /* Drop the reference on task_watchport struct returned by os_ref_init */
3965         refs = task_watchports_release(watchports);
3966         if (refs == 0) {
3967                 task->watchports = NULL;
3968         }
3969
3970         return refs;
3971 }
3972
3973 /*
3974  *      task_remove_turnstile_watchports_locked:
3975  *              Clear all turnstile boost on the task from watchports.
3976  *
3977  *      Arguments:
3978  *              task: task to remove watchports from
3979  *              watchports: watchports structure for the task
3980  *              port_freelist: array of ports returned with ref to caller
3981  *
3982  *
3983  *      Conditions:
3984  *              ipc space of the task locked.
3985  *              array of ports with refs are returned in port_freelist
3986  */
3987 static os_ref_count_t
3988 task_remove_turnstile_watchports_locked(
3989         task_t                 task,
3990         struct task_watchports *watchports,
3991         ipc_port_t             *port_freelist)
3992 {
3993         os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
3994
3995         for (uint32_t i = 0, j = 0; i < watchports->tw_elem_array_count; i++) {
3996                 ipc_port_t port = watchports->tw_elem[i].twe_port;
3997                 if (port == NULL) {
3998                         continue;
3999                 }
4000
4001                 /* Lock the port and check if it has the entry */
4002                 ip_lock(port);
4003                 imq_lock(&port->ip_messages);
4004                 if (ipc_port_clear_watchport_elem_internal_conditional_locked(port,
4005                     &watchports->tw_elem[i]) == KERN_SUCCESS) {
4006                         task_watchport_elem_clear(&watchports->tw_elem[i]);
4007                         port_freelist[j++] = port;
4008                         refs = task_watchports_release(watchports);
4009
4010                         /* Check if all ports are cleaned */
4011                         if (refs == 0) {
4012                                 task->watchports = NULL;
4013                                 break;
4014                         }
4015                 }
4016                 /* mqueue and port unlocked by ipc_port_clear_watchport_elem_internal_conditional_locked */
4017         }
4018         return refs;
4019 }
4020
4021 /*
4022  *      task_watchports_alloc_init:
4023  *              Allocate and initialize task watchport struct.
4024  *
4025  *      Conditions:
4026  *              Nothing locked.
4027  */
4028 static struct task_watchports *
4029 task_watchports_alloc_init(
4030         task_t        task,
4031         thread_t      thread,
4032         uint32_t      count)
4033 {
4034         struct task_watchports *watchports = kalloc(sizeof(struct task_watchports) +
4035             count * sizeof(struct task_watchport_elem));
4036
4037         task_reference(task);
4038         thread_reference(thread);
4039         watchports->tw_task = task;
4040         watchports->tw_thread = thread;
4041         watchports->tw_elem_array_count = count;
4042         os_ref_init(&watchports->tw_refcount, &task_watchports_refgrp);
4043
4044         return watchports;
4045 }
4046
4047 /*
4048  *      task_watchports_deallocate:
4049  *              Deallocate task watchport struct.
4050  *
4051  *      Conditions:
4052  *              Nothing locked.
4053  */
4054 static void
4055 task_watchports_deallocate(
4056         struct task_watchports *watchports)
4057 {
4058         uint32_t portwatch_count = watchports->tw_elem_array_count;
4059
4060         task_deallocate(watchports->tw_task);
4061         thread_deallocate(watchports->tw_thread);
4062         kfree(watchports, sizeof(struct task_watchports) + portwatch_count * sizeof(struct task_watchport_elem));
4063 }
4064
4065 /*
4066  *      task_watchport_elem_deallocate:
4067  *              Deallocate task watchport element and release its ref on task_watchport.
4068  *
4069  *      Conditions:
4070  *              Nothing locked.
4071  */
4072 void
4073 task_watchport_elem_deallocate(
4074         struct task_watchport_elem *watchport_elem)
4075 {
4076         os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4077         task_t task = watchport_elem->twe_task;
4078         struct task_watchports *watchports = NULL;
4079         ipc_port_t port = NULL;
4080
4081         assert(task != NULL);
4082
4083         /* Take the space lock to modify the elememt */
4084         is_write_lock(task->itk_space);
4085
4086         watchports = task->watchports;
4087         assert(watchports != NULL);
4088
4089         port = watchport_elem->twe_port;
4090         assert(port != NULL);
4091
4092         task_watchport_elem_clear(watchport_elem);
4093         refs = task_watchports_release(watchports);
4094
4095         if (refs == 0) {
4096                 task->watchports = NULL;
4097         }
4098
4099         is_write_unlock(task->itk_space);
4100
4101         ip_release(port);
4102         if (refs == 0) {
4103                 task_watchports_deallocate(watchports);
4104         }
4105 }
4106
4107 /*
4108  *      task_has_watchports:
4109  *              Return TRUE if task has watchport boosts.
4110  *
4111  *      Conditions:
4112  *              Nothing locked.
4113  */
4114 boolean_t
4115 task_has_watchports(task_t task)
4116 {
4117         return task->watchports != NULL;
4118 }
4119
4120 #if DEVELOPMENT || DEBUG
4121
4122 extern void IOSleep(int);
4123
4124 kern_return_t
4125 task_disconnect_page_mappings(task_t task)
4126 {
4127         int     n;
4128
4129         if (task == TASK_NULL || task == kernel_task) {
4130                 return KERN_INVALID_ARGUMENT;
4131         }
4132
4133         /*
4134          * this function is used to strip all of the mappings from
4135          * the pmap for the specified task to force the task to
4136          * re-fault all of the pages it is actively using... this
4137          * allows us to approximate the true working set of the
4138          * specified task.  We only engage if at least 1 of the
4139          * threads in the task is runnable, but we want to continuously
4140          * sweep (at least for a while - I've arbitrarily set the limit at
4141          * 100 sweeps to be re-looked at as we gain experience) to get a better
4142          * view into what areas within a page are being visited (as opposed to only
4143          * seeing the first fault of a page after the task becomes
4144          * runnable)...  in the future I may
4145          * try to block until awakened by a thread in this task
4146          * being made runnable, but for now we'll periodically poll from the
4147          * user level debug tool driving the sysctl
4148          */
4149         for (n = 0; n < 100; n++) {
4150                 thread_t        thread;
4151                 boolean_t       runnable;
4152                 boolean_t       do_unnest;
4153                 int             page_count;
4154
4155                 runnable = FALSE;
4156                 do_unnest = FALSE;
4157
4158                 task_lock(task);
4159
4160                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4161                         if (thread->state & TH_RUN) {
4162                                 runnable = TRUE;
4163                                 break;
4164                         }
4165                 }
4166                 if (n == 0) {
4167                         task->task_disconnected_count++;
4168                 }
4169
4170                 if (task->task_unnested == FALSE) {
4171                         if (runnable == TRUE) {
4172                                 task->task_unnested = TRUE;
4173                                 do_unnest = TRUE;
4174                         }
4175                 }
4176                 task_unlock(task);
4177
4178                 if (runnable == FALSE) {
4179                         break;
4180                 }
4181
4182                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
4183                     task, do_unnest, task->task_disconnected_count, 0, 0);
4184
4185                 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
4186
4187                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
4188                     task, page_count, 0, 0, 0);
4189
4190                 if ((n % 5) == 4) {
4191                         IOSleep(1);
4192                 }
4193         }
4194         return KERN_SUCCESS;
4195 }
4196
4197 #endif
4198
4199
4200 #if CONFIG_FREEZE
4201
4202 /*
4203  *      task_freeze:
4204  *
4205  *      Freeze a task.
4206  *
4207  * Conditions:
4208  *      The caller holds a reference to the task
4209  */
4210 extern void             vm_wake_compactor_swapper(void);
4211 extern queue_head_t     c_swapout_list_head;
4212
4213 kern_return_t
4214 task_freeze(
4215         task_t    task,
4216         uint32_t           *purgeable_count,
4217         uint32_t           *wired_count,
4218         uint32_t           *clean_count,
4219         uint32_t           *dirty_count,
4220         uint32_t           dirty_budget,
4221         uint32_t           *shared_count,
4222         int                *freezer_error_code,
4223         boolean_t          eval_only)
4224 {
4225         kern_return_t kr = KERN_SUCCESS;
4226
4227         if (task == TASK_NULL || task == kernel_task) {
4228                 return KERN_INVALID_ARGUMENT;
4229         }
4230
4231         task_lock(task);
4232
4233         while (task->changing_freeze_state) {
4234                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4235                 task_unlock(task);
4236                 thread_block(THREAD_CONTINUE_NULL);
4237
4238                 task_lock(task);
4239         }
4240         if (task->frozen) {
4241                 task_unlock(task);
4242                 return KERN_FAILURE;
4243         }
4244         task->changing_freeze_state = TRUE;
4245
4246         task_unlock(task);
4247
4248         kr = vm_map_freeze(task,
4249             purgeable_count,
4250             wired_count,
4251             clean_count,
4252             dirty_count,
4253             dirty_budget,
4254             shared_count,
4255             freezer_error_code,
4256             eval_only);
4257
4258         task_lock(task);
4259
4260         if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
4261                 task->frozen = TRUE;
4262         }
4263
4264         task->changing_freeze_state = FALSE;
4265         thread_wakeup(&task->changing_freeze_state);
4266
4267         task_unlock(task);
4268
4269         if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
4270             (eval_only == FALSE)) {
4271                 vm_wake_compactor_swapper();
4272                 /*
4273                  * We do an explicit wakeup of the swapout thread here
4274                  * because the compact_and_swap routines don't have
4275                  * knowledge about these kind of "per-task packed c_segs"
4276                  * and so will not be evaluating whether we need to do
4277                  * a wakeup there.
4278                  */
4279                 thread_wakeup((event_t)&c_swapout_list_head);
4280         }
4281
4282         return kr;
4283 }
4284
4285 /*
4286  *      task_thaw:
4287  *
4288  *      Thaw a currently frozen task.
4289  *
4290  * Conditions:
4291  *      The caller holds a reference to the task
4292  */
4293 kern_return_t
4294 task_thaw(
4295         task_t          task)
4296 {
4297         if (task == TASK_NULL || task == kernel_task) {
4298                 return KERN_INVALID_ARGUMENT;
4299         }
4300
4301         task_lock(task);
4302
4303         while (task->changing_freeze_state) {
4304                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4305                 task_unlock(task);
4306                 thread_block(THREAD_CONTINUE_NULL);
4307
4308                 task_lock(task);
4309         }
4310         if (!task->frozen) {
4311                 task_unlock(task);
4312                 return KERN_FAILURE;
4313         }
4314         task->frozen = FALSE;
4315
4316         task_unlock(task);
4317
4318         return KERN_SUCCESS;
4319 }
4320
4321 #endif /* CONFIG_FREEZE */
4322
4323 kern_return_t
4324 host_security_set_task_token(
4325         host_security_t  host_security,
4326         task_t           task,
4327         security_token_t sec_token,
4328         audit_token_t    audit_token,
4329         host_priv_t      host_priv)
4330 {
4331         ipc_port_t       host_port;
4332         kern_return_t    kr;
4333
4334         if (task == TASK_NULL) {
4335                 return KERN_INVALID_ARGUMENT;
4336         }
4337
4338         if (host_security == HOST_NULL) {
4339                 return KERN_INVALID_SECURITY;
4340         }
4341
4342         task_lock(task);
4343         task->sec_token = sec_token;
4344         task->audit_token = audit_token;
4345         task_unlock(task);
4346
4347         if (host_priv != HOST_PRIV_NULL) {
4348                 kr = host_get_host_priv_port(host_priv, &host_port);
4349         } else {
4350                 kr = host_get_host_port(host_priv_self(), &host_port);
4351         }
4352         assert(kr == KERN_SUCCESS);
4353
4354         kr = task_set_special_port_internal(task, TASK_HOST_PORT, host_port);
4355         return kr;
4356 }
4357
4358 kern_return_t
4359 task_send_trace_memory(
4360         __unused task_t   target_task,
4361         __unused uint32_t pid,
4362         __unused uint64_t uniqueid)
4363 {
4364         return KERN_INVALID_ARGUMENT;
4365 }
4366
4367 /*
4368  * This routine was added, pretty much exclusively, for registering the
4369  * RPC glue vector for in-kernel short circuited tasks.  Rather than
4370  * removing it completely, I have only disabled that feature (which was
4371  * the only feature at the time).  It just appears that we are going to
4372  * want to add some user data to tasks in the future (i.e. bsd info,
4373  * task names, etc...), so I left it in the formal task interface.
4374  */
4375 kern_return_t
4376 task_set_info(
4377         task_t          task,
4378         task_flavor_t   flavor,
4379         __unused task_info_t    task_info_in,           /* pointer to IN array */
4380         __unused mach_msg_type_number_t task_info_count)
4381 {
4382         if (task == TASK_NULL) {
4383                 return KERN_INVALID_ARGUMENT;
4384         }
4385
4386         switch (flavor) {
4387 #if CONFIG_ATM
4388         case TASK_TRACE_MEMORY_INFO:
4389         {
4390                 if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT) {
4391                         return KERN_INVALID_ARGUMENT;
4392                 }
4393
4394                 assert(task_info_in != NULL);
4395                 task_trace_memory_info_t mem_info;
4396                 mem_info = (task_trace_memory_info_t) task_info_in;
4397                 kern_return_t kr = atm_register_trace_memory(task,
4398                     mem_info->user_memory_address,
4399                     mem_info->buffer_size);
4400                 return kr;
4401         }
4402
4403 #endif
4404         default:
4405                 return KERN_INVALID_ARGUMENT;
4406         }
4407         return KERN_SUCCESS;
4408 }
4409
4410 int radar_20146450 = 1;
4411 kern_return_t
4412 task_info(
4413         task_t                  task,
4414         task_flavor_t           flavor,
4415         task_info_t             task_info_out,
4416         mach_msg_type_number_t  *task_info_count)
4417 {
4418         kern_return_t error = KERN_SUCCESS;
4419         mach_msg_type_number_t  original_task_info_count;
4420
4421         if (task == TASK_NULL) {
4422                 return KERN_INVALID_ARGUMENT;
4423         }
4424
4425         original_task_info_count = *task_info_count;
4426         task_lock(task);
4427
4428         if ((task != current_task()) && (!task->active)) {
4429                 task_unlock(task);
4430                 return KERN_INVALID_ARGUMENT;
4431         }
4432
4433         switch (flavor) {
4434         case TASK_BASIC_INFO_32:
4435         case TASK_BASIC2_INFO_32:
4436 #if defined(__arm__) || defined(__arm64__)
4437         case TASK_BASIC_INFO_64:
4438 #endif
4439                 {
4440                         task_basic_info_32_t    basic_info;
4441                         vm_map_t                                map;
4442                         clock_sec_t                             secs;
4443                         clock_usec_t                    usecs;
4444
4445                         if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
4446                                 error = KERN_INVALID_ARGUMENT;
4447                                 break;
4448                         }
4449
4450                         basic_info = (task_basic_info_32_t)task_info_out;
4451
4452                         map = (task == kernel_task)? kernel_map: task->map;
4453                         basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
4454                         if (flavor == TASK_BASIC2_INFO_32) {
4455                                 /*
4456                                  * The "BASIC2" flavor gets the maximum resident
4457                                  * size instead of the current resident size...
4458                                  */
4459                                 basic_info->resident_size = pmap_resident_max(map->pmap);
4460                         } else {
4461                                 basic_info->resident_size = pmap_resident_count(map->pmap);
4462                         }
4463                         basic_info->resident_size *= PAGE_SIZE;
4464
4465                         basic_info->policy = ((task != kernel_task)?
4466                             POLICY_TIMESHARE: POLICY_RR);
4467                         basic_info->suspend_count = task->user_stop_count;
4468
4469                         absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4470                         basic_info->user_time.seconds =
4471                             (typeof(basic_info->user_time.seconds))secs;
4472                         basic_info->user_time.microseconds = usecs;
4473
4474                         absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4475                         basic_info->system_time.seconds =
4476                             (typeof(basic_info->system_time.seconds))secs;
4477                         basic_info->system_time.microseconds = usecs;
4478
4479                         *task_info_count = TASK_BASIC_INFO_32_COUNT;
4480                         break;
4481                 }
4482
4483 #if defined(__arm__) || defined(__arm64__)
4484         case TASK_BASIC_INFO_64_2:
4485         {
4486                 task_basic_info_64_2_t  basic_info;
4487                 vm_map_t                                map;
4488                 clock_sec_t                             secs;
4489                 clock_usec_t                    usecs;
4490
4491                 if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
4492                         error = KERN_INVALID_ARGUMENT;
4493                         break;
4494                 }
4495
4496                 basic_info = (task_basic_info_64_2_t)task_info_out;
4497
4498                 map = (task == kernel_task)? kernel_map: task->map;
4499                 basic_info->virtual_size  = map->size;
4500                 basic_info->resident_size =
4501                     (mach_vm_size_t)(pmap_resident_count(map->pmap))
4502                     * PAGE_SIZE_64;
4503
4504                 basic_info->policy = ((task != kernel_task)?
4505                     POLICY_TIMESHARE: POLICY_RR);
4506                 basic_info->suspend_count = task->user_stop_count;
4507
4508                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4509                 basic_info->user_time.seconds =
4510                     (typeof(basic_info->user_time.seconds))secs;
4511                 basic_info->user_time.microseconds = usecs;
4512
4513                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4514                 basic_info->system_time.seconds =
4515                     (typeof(basic_info->system_time.seconds))secs;
4516                 basic_info->system_time.microseconds = usecs;
4517
4518                 *task_info_count = TASK_BASIC_INFO_64_2_COUNT;
4519                 break;
4520         }
4521
4522 #else /* defined(__arm__) || defined(__arm64__) */
4523         case TASK_BASIC_INFO_64:
4524         {
4525                 task_basic_info_64_t    basic_info;
4526                 vm_map_t                                map;
4527                 clock_sec_t                             secs;
4528                 clock_usec_t                    usecs;
4529
4530                 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
4531                         error = KERN_INVALID_ARGUMENT;
4532                         break;
4533                 }
4534
4535                 basic_info = (task_basic_info_64_t)task_info_out;
4536
4537                 map = (task == kernel_task)? kernel_map: task->map;
4538                 basic_info->virtual_size  = map->size;
4539                 basic_info->resident_size =
4540                     (mach_vm_size_t)(pmap_resident_count(map->pmap))
4541                     * PAGE_SIZE_64;
4542
4543                 basic_info->policy = ((task != kernel_task)?
4544                     POLICY_TIMESHARE: POLICY_RR);
4545                 basic_info->suspend_count = task->user_stop_count;
4546
4547                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4548                 basic_info->user_time.seconds =
4549                     (typeof(basic_info->user_time.seconds))secs;
4550                 basic_info->user_time.microseconds = usecs;
4551
4552                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4553                 basic_info->system_time.seconds =
4554                     (typeof(basic_info->system_time.seconds))secs;
4555                 basic_info->system_time.microseconds = usecs;
4556
4557                 *task_info_count = TASK_BASIC_INFO_64_COUNT;
4558                 break;
4559         }
4560 #endif /* defined(__arm__) || defined(__arm64__) */
4561
4562         case MACH_TASK_BASIC_INFO:
4563         {
4564                 mach_task_basic_info_t  basic_info;
4565                 vm_map_t                map;
4566                 clock_sec_t             secs;
4567                 clock_usec_t            usecs;
4568
4569                 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
4570                         error = KERN_INVALID_ARGUMENT;
4571                         break;
4572                 }
4573
4574                 basic_info = (mach_task_basic_info_t)task_info_out;
4575
4576                 map = (task == kernel_task) ? kernel_map : task->map;
4577
4578                 basic_info->virtual_size  = map->size;
4579
4580                 basic_info->resident_size =
4581                     (mach_vm_size_t)(pmap_resident_count(map->pmap));
4582                 basic_info->resident_size *= PAGE_SIZE_64;
4583
4584                 basic_info->resident_size_max =
4585                     (mach_vm_size_t)(pmap_resident_max(map->pmap));
4586                 basic_info->resident_size_max *= PAGE_SIZE_64;
4587
4588                 basic_info->policy = ((task != kernel_task) ?
4589                     POLICY_TIMESHARE : POLICY_RR);
4590
4591                 basic_info->suspend_count = task->user_stop_count;
4592
4593                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4594                 basic_info->user_time.seconds =
4595                     (typeof(basic_info->user_time.seconds))secs;
4596                 basic_info->user_time.microseconds = usecs;
4597
4598                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4599                 basic_info->system_time.seconds =
4600                     (typeof(basic_info->system_time.seconds))secs;
4601                 basic_info->system_time.microseconds = usecs;
4602
4603                 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
4604                 break;
4605         }
4606
4607         case TASK_THREAD_TIMES_INFO:
4608         {
4609                 task_thread_times_info_t        times_info;
4610                 thread_t                                        thread;
4611
4612                 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
4613                         error = KERN_INVALID_ARGUMENT;
4614                         break;
4615                 }
4616
4617                 times_info = (task_thread_times_info_t) task_info_out;
4618                 times_info->user_time.seconds = 0;
4619                 times_info->user_time.microseconds = 0;
4620                 times_info->system_time.seconds = 0;
4621                 times_info->system_time.microseconds = 0;
4622
4623
4624                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4625                         time_value_t    user_time, system_time;
4626
4627                         if (thread->options & TH_OPT_IDLE_THREAD) {
4628                                 continue;
4629                         }
4630
4631                         thread_read_times(thread, &user_time, &system_time, NULL);
4632
4633                         time_value_add(&times_info->user_time, &user_time);
4634                         time_value_add(&times_info->system_time, &system_time);
4635                 }
4636
4637                 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
4638                 break;
4639         }
4640
4641         case TASK_ABSOLUTETIME_INFO:
4642         {
4643                 task_absolutetime_info_t        info;
4644                 thread_t                        thread;
4645
4646                 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
4647                         error = KERN_INVALID_ARGUMENT;
4648                         break;
4649                 }
4650
4651                 info = (task_absolutetime_info_t)task_info_out;
4652                 info->threads_user = info->threads_system = 0;
4653
4654
4655                 info->total_user = task->total_user_time;
4656                 info->total_system = task->total_system_time;
4657
4658                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4659                         uint64_t        tval;
4660                         spl_t           x;
4661
4662                         if (thread->options & TH_OPT_IDLE_THREAD) {
4663                                 continue;
4664                         }
4665
4666                         x = splsched();
4667                         thread_lock(thread);
4668
4669                         tval = timer_grab(&thread->user_timer);
4670                         info->threads_user += tval;
4671                         info->total_user += tval;
4672
4673                         tval = timer_grab(&thread->system_timer);
4674                         if (thread->precise_user_kernel_time) {
4675                                 info->threads_system += tval;
4676                                 info->total_system += tval;
4677                         } else {
4678                                 /* system_timer may represent either sys or user */
4679                                 info->threads_user += tval;
4680                                 info->total_user += tval;
4681                         }
4682
4683                         thread_unlock(thread);
4684                         splx(x);
4685                 }
4686
4687
4688                 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
4689                 break;
4690         }
4691
4692         case TASK_DYLD_INFO:
4693         {
4694                 task_dyld_info_t info;
4695
4696                 /*
4697                  * We added the format field to TASK_DYLD_INFO output.  For
4698                  * temporary backward compatibility, accept the fact that
4699                  * clients may ask for the old version - distinquished by the
4700                  * size of the expected result structure.
4701                  */
4702 #define TASK_LEGACY_DYLD_INFO_COUNT \
4703                 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
4704
4705                 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
4706                         error = KERN_INVALID_ARGUMENT;
4707                         break;
4708                 }
4709
4710                 info = (task_dyld_info_t)task_info_out;
4711                 info->all_image_info_addr = task->all_image_info_addr;
4712                 info->all_image_info_size = task->all_image_info_size;
4713
4714                 /* only set format on output for those expecting it */
4715                 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
4716                         info->all_image_info_format = task_has_64Bit_addr(task) ?
4717                             TASK_DYLD_ALL_IMAGE_INFO_64 :
4718                             TASK_DYLD_ALL_IMAGE_INFO_32;
4719                         *task_info_count = TASK_DYLD_INFO_COUNT;
4720                 } else {
4721                         *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
4722                 }
4723                 break;
4724         }
4725
4726         case TASK_EXTMOD_INFO:
4727         {
4728                 task_extmod_info_t info;
4729                 void *p;
4730
4731                 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
4732                         error = KERN_INVALID_ARGUMENT;
4733                         break;
4734                 }
4735
4736                 info = (task_extmod_info_t)task_info_out;
4737
4738                 p = get_bsdtask_info(task);
4739                 if (p) {
4740                         proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
4741                 } else {
4742                         bzero(info->task_uuid, sizeof(info->task_uuid));
4743                 }
4744                 info->extmod_statistics = task->extmod_statistics;
4745                 *task_info_count = TASK_EXTMOD_INFO_COUNT;
4746
4747                 break;
4748         }
4749
4750         case TASK_KERNELMEMORY_INFO:
4751         {
4752                 task_kernelmemory_info_t        tkm_info;
4753                 ledger_amount_t                 credit, debit;
4754
4755                 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
4756                         error = KERN_INVALID_ARGUMENT;
4757                         break;
4758                 }
4759
4760                 tkm_info = (task_kernelmemory_info_t) task_info_out;
4761                 tkm_info->total_palloc = 0;
4762                 tkm_info->total_pfree = 0;
4763                 tkm_info->total_salloc = 0;
4764                 tkm_info->total_sfree = 0;
4765
4766                 if (task == kernel_task) {
4767                         /*
4768                          * All shared allocs/frees from other tasks count against
4769                          * the kernel private memory usage.  If we are looking up
4770                          * info for the kernel task, gather from everywhere.
4771                          */
4772                         task_unlock(task);
4773
4774                         /* start by accounting for all the terminated tasks against the kernel */
4775                         tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
4776                         tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
4777
4778                         /* count all other task/thread shared alloc/free against the kernel */
4779                         lck_mtx_lock(&tasks_threads_lock);
4780
4781                         /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
4782                         queue_iterate(&tasks, task, task_t, tasks) {
4783                                 if (task == kernel_task) {
4784                                         if (ledger_get_entries(task->ledger,
4785                                             task_ledgers.tkm_private, &credit,
4786                                             &debit) == KERN_SUCCESS) {
4787                                                 tkm_info->total_palloc += credit;
4788                                                 tkm_info->total_pfree += debit;
4789                                         }
4790                                 }
4791                                 if (!ledger_get_entries(task->ledger,
4792                                     task_ledgers.tkm_shared, &credit, &debit)) {
4793                                         tkm_info->total_palloc += credit;
4794                                         tkm_info->total_pfree += debit;
4795                                 }
4796                         }
4797                         lck_mtx_unlock(&tasks_threads_lock);
4798                 } else {
4799                         if (!ledger_get_entries(task->ledger,
4800                             task_ledgers.tkm_private, &credit, &debit)) {
4801                                 tkm_info->total_palloc = credit;
4802                                 tkm_info->total_pfree = debit;
4803                         }
4804                         if (!ledger_get_entries(task->ledger,
4805                             task_ledgers.tkm_shared, &credit, &debit)) {
4806                                 tkm_info->total_salloc = credit;
4807                                 tkm_info->total_sfree = debit;
4808                         }
4809                         task_unlock(task);
4810                 }
4811
4812                 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
4813                 return KERN_SUCCESS;
4814         }
4815
4816         /* OBSOLETE */
4817         case TASK_SCHED_FIFO_INFO:
4818         {
4819                 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
4820                         error = KERN_INVALID_ARGUMENT;
4821                         break;
4822                 }
4823
4824                 error = KERN_INVALID_POLICY;
4825                 break;
4826         }
4827
4828         /* OBSOLETE */
4829         case TASK_SCHED_RR_INFO:
4830         {
4831                 policy_rr_base_t        rr_base;
4832                 uint32_t quantum_time;
4833                 uint64_t quantum_ns;
4834
4835                 if (*task_info_count < POLICY_RR_BASE_COUNT) {
4836                         error = KERN_INVALID_ARGUMENT;
4837                         break;
4838                 }
4839
4840                 rr_base = (policy_rr_base_t) task_info_out;
4841
4842                 if (task != kernel_task) {
4843                         error = KERN_INVALID_POLICY;
4844                         break;
4845                 }
4846
4847                 rr_base->base_priority = task->priority;
4848
4849                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
4850                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
4851
4852                 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
4853
4854                 *task_info_count = POLICY_RR_BASE_COUNT;
4855                 break;
4856         }
4857
4858         /* OBSOLETE */
4859         case TASK_SCHED_TIMESHARE_INFO:
4860         {
4861                 policy_timeshare_base_t ts_base;
4862
4863                 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
4864                         error = KERN_INVALID_ARGUMENT;
4865                         break;
4866                 }
4867
4868                 ts_base = (policy_timeshare_base_t) task_info_out;
4869
4870                 if (task == kernel_task) {
4871                         error = KERN_INVALID_POLICY;
4872                         break;
4873                 }
4874
4875                 ts_base->base_priority = task->priority;
4876
4877                 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
4878                 break;
4879         }
4880
4881         case TASK_SECURITY_TOKEN:
4882         {
4883                 security_token_t        *sec_token_p;
4884
4885                 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
4886                         error = KERN_INVALID_ARGUMENT;
4887                         break;
4888                 }
4889
4890                 sec_token_p = (security_token_t *) task_info_out;
4891
4892                 *sec_token_p = task->sec_token;
4893
4894                 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
4895                 break;
4896         }
4897
4898         case TASK_AUDIT_TOKEN:
4899         {
4900                 audit_token_t   *audit_token_p;
4901
4902                 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
4903                         error = KERN_INVALID_ARGUMENT;
4904                         break;
4905                 }
4906
4907                 audit_token_p = (audit_token_t *) task_info_out;
4908
4909                 *audit_token_p = task->audit_token;
4910
4911                 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
4912                 break;
4913         }
4914
4915         case TASK_SCHED_INFO:
4916                 error = KERN_INVALID_ARGUMENT;
4917                 break;
4918
4919         case TASK_EVENTS_INFO:
4920         {
4921                 task_events_info_t      events_info;
4922                 thread_t                        thread;
4923
4924                 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
4925                         error = KERN_INVALID_ARGUMENT;
4926                         break;
4927                 }
4928
4929                 events_info = (task_events_info_t) task_info_out;
4930
4931
4932                 events_info->faults = task->faults;
4933                 events_info->pageins = task->pageins;
4934                 events_info->cow_faults = task->cow_faults;
4935                 events_info->messages_sent = task->messages_sent;
4936                 events_info->messages_received = task->messages_received;
4937                 events_info->syscalls_mach = task->syscalls_mach;
4938                 events_info->syscalls_unix = task->syscalls_unix;
4939
4940                 events_info->csw = task->c_switch;
4941
4942                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4943                         events_info->csw           += thread->c_switch;
4944                         events_info->syscalls_mach += thread->syscalls_mach;
4945                         events_info->syscalls_unix += thread->syscalls_unix;
4946                 }
4947
4948
4949                 *task_info_count = TASK_EVENTS_INFO_COUNT;
4950                 break;
4951         }
4952         case TASK_AFFINITY_TAG_INFO:
4953         {
4954                 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
4955                         error = KERN_INVALID_ARGUMENT;
4956                         break;
4957                 }
4958
4959                 error = task_affinity_info(task, task_info_out, task_info_count);
4960                 break;
4961         }
4962         case TASK_POWER_INFO:
4963         {
4964                 if (*task_info_count < TASK_POWER_INFO_COUNT) {
4965                         error = KERN_INVALID_ARGUMENT;
4966                         break;
4967                 }
4968
4969                 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL, NULL);
4970                 break;
4971         }
4972
4973         case TASK_POWER_INFO_V2:
4974         {
4975                 if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
4976                         error = KERN_INVALID_ARGUMENT;
4977                         break;
4978                 }
4979                 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
4980                 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2, NULL);
4981                 break;
4982         }
4983
4984         case TASK_VM_INFO:
4985         case TASK_VM_INFO_PURGEABLE:
4986         {
4987                 task_vm_info_t          vm_info;
4988                 vm_map_t                map;
4989
4990 #if __arm64__
4991                 struct proc *p;
4992                 uint32_t platform, sdk;
4993                 p = current_proc();
4994                 platform = proc_platform(p);
4995                 sdk = proc_sdk(p);
4996                 if (original_task_info_count > TASK_VM_INFO_REV2_COUNT &&
4997                     platform == PLATFORM_IOS &&
4998                     sdk != 0 &&
4999                     (sdk >> 16) <= 12) {
5000                         /*
5001                          * Some iOS apps pass an incorrect value for
5002                          * task_info_count, expressed in number of bytes
5003                          * instead of number of "natural_t" elements.
5004                          * For the sake of backwards binary compatibility
5005                          * for apps built with an iOS12 or older SDK and using
5006                          * the "rev2" data structure, let's fix task_info_count
5007                          * for them, to avoid stomping past the actual end
5008                          * of their buffer.
5009                          */
5010 #if DEVELOPMENT || DEBUG
5011                         printf("%s:%d %d[%s] rdar://49484582 task_info_count %d -> %d platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p), proc_name_address(p), original_task_info_count, TASK_VM_INFO_REV2_COUNT, platform, (sdk >> 16), ((sdk >> 8) & 0xff), (sdk & 0xff));
5012 #endif /* DEVELOPMENT || DEBUG */
5013                         DTRACE_VM4(workaround_task_vm_info_count,
5014                             mach_msg_type_number_t, original_task_info_count,
5015                             mach_msg_type_number_t, TASK_VM_INFO_REV2_COUNT,
5016                             uint32_t, platform,
5017                             uint32_t, sdk);
5018                         original_task_info_count = TASK_VM_INFO_REV2_COUNT;
5019                         *task_info_count = original_task_info_count;
5020                 }
5021 #endif /* __arm64__ */
5022
5023                 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
5024                         error = KERN_INVALID_ARGUMENT;
5025                         break;
5026                 }
5027
5028                 vm_info = (task_vm_info_t)task_info_out;
5029
5030                 if (task == kernel_task) {
5031                         map = kernel_map;
5032                         /* no lock */
5033                 } else {
5034                         map = task->map;
5035                         vm_map_lock_read(map);
5036                 }
5037
5038                 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
5039                 vm_info->region_count = map->hdr.nentries;
5040                 vm_info->page_size = vm_map_page_size(map);
5041
5042                 vm_info->resident_size = pmap_resident_count(map->pmap);
5043                 vm_info->resident_size *= PAGE_SIZE;
5044                 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
5045                 vm_info->resident_size_peak *= PAGE_SIZE;
5046
5047 #define _VM_INFO(_name) \
5048         vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
5049
5050                 _VM_INFO(device);
5051                 _VM_INFO(device_peak);
5052                 _VM_INFO(external);
5053                 _VM_INFO(external_peak);
5054                 _VM_INFO(internal);
5055                 _VM_INFO(internal_peak);
5056                 _VM_INFO(reusable);
5057                 _VM_INFO(reusable_peak);
5058                 _VM_INFO(compressed);
5059                 _VM_INFO(compressed_peak);
5060                 _VM_INFO(compressed_lifetime);
5061
5062                 vm_info->purgeable_volatile_pmap = 0;
5063                 vm_info->purgeable_volatile_resident = 0;
5064                 vm_info->purgeable_volatile_virtual = 0;
5065                 if (task == kernel_task) {
5066                         /*
5067                          * We do not maintain the detailed stats for the
5068                          * kernel_pmap, so just count everything as
5069                          * "internal"...
5070                          */
5071                         vm_info->internal = vm_info->resident_size;
5072                         /*
5073                          * ... but since the memory held by the VM compressor
5074                          * in the kernel address space ought to be attributed
5075                          * to user-space tasks, we subtract it from "internal"
5076                          * to give memory reporting tools a more accurate idea
5077                          * of what the kernel itself is actually using, instead
5078                          * of making it look like the kernel is leaking memory
5079                          * when the system is under memory pressure.
5080                          */
5081                         vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
5082                             PAGE_SIZE);
5083                 } else {
5084                         mach_vm_size_t  volatile_virtual_size;
5085                         mach_vm_size_t  volatile_resident_size;
5086                         mach_vm_size_t  volatile_compressed_size;
5087                         mach_vm_size_t  volatile_pmap_size;
5088                         mach_vm_size_t  volatile_compressed_pmap_size;
5089                         kern_return_t   kr;
5090
5091                         if (flavor == TASK_VM_INFO_PURGEABLE) {
5092                                 kr = vm_map_query_volatile(
5093                                         map,
5094                                         &volatile_virtual_size,
5095                                         &volatile_resident_size,
5096                                         &volatile_compressed_size,
5097                                         &volatile_pmap_size,
5098                                         &volatile_compressed_pmap_size);
5099                                 if (kr == KERN_SUCCESS) {
5100                                         vm_info->purgeable_volatile_pmap =
5101                                             volatile_pmap_size;
5102                                         if (radar_20146450) {
5103                                                 vm_info->compressed -=
5104                                                     volatile_compressed_pmap_size;
5105                                         }
5106                                         vm_info->purgeable_volatile_resident =
5107                                             volatile_resident_size;
5108                                         vm_info->purgeable_volatile_virtual =
5109                                             volatile_virtual_size;
5110                                 }
5111                         }
5112                 }
5113                 *task_info_count = TASK_VM_INFO_REV0_COUNT;
5114
5115                 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
5116                         vm_info->phys_footprint =
5117                             (mach_vm_size_t) get_task_phys_footprint(task);
5118                         *task_info_count = TASK_VM_INFO_REV1_COUNT;
5119                 }
5120                 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5121                         vm_info->min_address = map->min_offset;
5122                         vm_info->max_address = map->max_offset;
5123                         *task_info_count = TASK_VM_INFO_REV2_COUNT;
5124                 }
5125                 if (original_task_info_count >= TASK_VM_INFO_REV3_COUNT) {
5126                         ledger_get_lifetime_max(task->ledger,
5127                             task_ledgers.phys_footprint,
5128                             &vm_info->ledger_phys_footprint_peak);
5129                         ledger_get_balance(task->ledger,
5130                             task_ledgers.purgeable_nonvolatile,
5131                             &vm_info->ledger_purgeable_nonvolatile);
5132                         ledger_get_balance(task->ledger,
5133                             task_ledgers.purgeable_nonvolatile_compressed,
5134                             &vm_info->ledger_purgeable_novolatile_compressed);
5135                         ledger_get_balance(task->ledger,
5136                             task_ledgers.purgeable_volatile,
5137                             &vm_info->ledger_purgeable_volatile);
5138                         ledger_get_balance(task->ledger,
5139                             task_ledgers.purgeable_volatile_compressed,
5140                             &vm_info->ledger_purgeable_volatile_compressed);
5141                         ledger_get_balance(task->ledger,
5142                             task_ledgers.network_nonvolatile,
5143                             &vm_info->ledger_tag_network_nonvolatile);
5144                         ledger_get_balance(task->ledger,
5145                             task_ledgers.network_nonvolatile_compressed,
5146                             &vm_info->ledger_tag_network_nonvolatile_compressed);
5147                         ledger_get_balance(task->ledger,
5148                             task_ledgers.network_volatile,
5149                             &vm_info->ledger_tag_network_volatile);
5150                         ledger_get_balance(task->ledger,
5151                             task_ledgers.network_volatile_compressed,
5152                             &vm_info->ledger_tag_network_volatile_compressed);
5153                         ledger_get_balance(task->ledger,
5154                             task_ledgers.media_footprint,
5155                             &vm_info->ledger_tag_media_footprint);
5156                         ledger_get_balance(task->ledger,
5157                             task_ledgers.media_footprint_compressed,
5158                             &vm_info->ledger_tag_media_footprint_compressed);
5159                         ledger_get_balance(task->ledger,
5160                             task_ledgers.media_nofootprint,
5161                             &vm_info->ledger_tag_media_nofootprint);
5162                         ledger_get_balance(task->ledger,
5163                             task_ledgers.media_nofootprint_compressed,
5164                             &vm_info->ledger_tag_media_nofootprint_compressed);
5165                         ledger_get_balance(task->ledger,
5166                             task_ledgers.graphics_footprint,
5167                             &vm_info->ledger_tag_graphics_footprint);
5168                         ledger_get_balance(task->ledger,
5169                             task_ledgers.graphics_footprint_compressed,
5170                             &vm_info->ledger_tag_graphics_footprint_compressed);
5171                         ledger_get_balance(task->ledger,
5172                             task_ledgers.graphics_nofootprint,
5173                             &vm_info->ledger_tag_graphics_nofootprint);
5174                         ledger_get_balance(task->ledger,
5175                             task_ledgers.graphics_nofootprint_compressed,
5176                             &vm_info->ledger_tag_graphics_nofootprint_compressed);
5177                         ledger_get_balance(task->ledger,
5178                             task_ledgers.neural_footprint,
5179                             &vm_info->ledger_tag_neural_footprint);
5180                         ledger_get_balance(task->ledger,
5181                             task_ledgers.neural_footprint_compressed,
5182                             &vm_info->ledger_tag_neural_footprint_compressed);
5183                         ledger_get_balance(task->ledger,
5184                             task_ledgers.neural_nofootprint,
5185                             &vm_info->ledger_tag_neural_nofootprint);
5186                         ledger_get_balance(task->ledger,
5187                             task_ledgers.neural_nofootprint_compressed,
5188                             &vm_info->ledger_tag_neural_nofootprint_compressed);
5189                         *task_info_count = TASK_VM_INFO_REV3_COUNT;
5190                 }
5191                 if (original_task_info_count >= TASK_VM_INFO_REV4_COUNT) {
5192                         if (task->bsd_info) {
5193                                 vm_info->limit_bytes_remaining =
5194                                     memorystatus_available_memory_internal(task->bsd_info);
5195                         } else {
5196                                 vm_info->limit_bytes_remaining = 0;
5197                         }
5198                         *task_info_count = TASK_VM_INFO_REV4_COUNT;
5199                 }
5200                 if (original_task_info_count >= TASK_VM_INFO_REV5_COUNT) {
5201                         thread_t thread;
5202                         integer_t total = task->decompressions;
5203                         queue_iterate(&task->threads, thread, thread_t, task_threads) {
5204                                 total += thread->decompressions;
5205                         }
5206                         vm_info->decompressions = total;
5207                         *task_info_count = TASK_VM_INFO_REV5_COUNT;
5208                 }
5209
5210                 if (task != kernel_task) {
5211                         vm_map_unlock_read(map);
5212                 }
5213
5214                 break;
5215         }
5216
5217         case TASK_WAIT_STATE_INFO:
5218         {
5219                 /*
5220                  * Deprecated flavor. Currently allowing some results until all users
5221                  * stop calling it. The results may not be accurate.
5222                  */
5223                 task_wait_state_info_t  wait_state_info;
5224                 uint64_t total_sfi_ledger_val = 0;
5225
5226                 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
5227                         error = KERN_INVALID_ARGUMENT;
5228                         break;
5229                 }
5230
5231                 wait_state_info = (task_wait_state_info_t) task_info_out;
5232
5233                 wait_state_info->total_wait_state_time = 0;
5234                 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
5235
5236 #if CONFIG_SCHED_SFI
5237                 int i, prev_lentry = -1;
5238                 int64_t  val_credit, val_debit;
5239
5240                 for (i = 0; i < MAX_SFI_CLASS_ID; i++) {
5241                         val_credit = 0;
5242                         /*
5243                          * checking with prev_lentry != entry ensures adjacent classes
5244                          * which share the same ledger do not add wait times twice.
5245                          * Note: Use ledger() call to get data for each individual sfi class.
5246                          */
5247                         if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
5248                             KERN_SUCCESS == ledger_get_entries(task->ledger,
5249                             task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
5250                                 total_sfi_ledger_val += val_credit;
5251                         }
5252                         prev_lentry = task_ledgers.sfi_wait_times[i];
5253                 }
5254
5255 #endif /* CONFIG_SCHED_SFI */
5256                 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
5257                 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
5258
5259                 break;
5260         }
5261         case TASK_VM_INFO_PURGEABLE_ACCOUNT:
5262         {
5263 #if DEVELOPMENT || DEBUG
5264                 pvm_account_info_t      acnt_info;
5265
5266                 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
5267                         error = KERN_INVALID_ARGUMENT;
5268                         break;
5269                 }
5270
5271                 if (task_info_out == NULL) {
5272                         error = KERN_INVALID_ARGUMENT;
5273                         break;
5274                 }
5275
5276                 acnt_info = (pvm_account_info_t) task_info_out;
5277
5278                 error = vm_purgeable_account(task, acnt_info);
5279
5280                 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
5281
5282                 break;
5283 #else /* DEVELOPMENT || DEBUG */
5284                 error = KERN_NOT_SUPPORTED;
5285                 break;
5286 #endif /* DEVELOPMENT || DEBUG */
5287         }
5288         case TASK_FLAGS_INFO:
5289         {
5290                 task_flags_info_t               flags_info;
5291
5292                 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
5293                         error = KERN_INVALID_ARGUMENT;
5294                         break;
5295                 }
5296
5297                 flags_info = (task_flags_info_t)task_info_out;
5298
5299                 /* only publish the 64-bit flag of the task */
5300                 flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA);
5301
5302                 *task_info_count = TASK_FLAGS_INFO_COUNT;
5303                 break;
5304         }
5305
5306         case TASK_DEBUG_INFO_INTERNAL:
5307         {
5308 #if DEVELOPMENT || DEBUG
5309                 task_debug_info_internal_t dbg_info;
5310                 ipc_space_t space = task->itk_space;
5311                 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
5312                         error = KERN_NOT_SUPPORTED;
5313                         break;
5314                 }
5315
5316                 if (task_info_out == NULL) {
5317                         error = KERN_INVALID_ARGUMENT;
5318                         break;
5319                 }
5320                 dbg_info = (task_debug_info_internal_t) task_info_out;
5321                 dbg_info->ipc_space_size = 0;
5322
5323                 if (space) {
5324                         is_read_lock(space);
5325                         dbg_info->ipc_space_size = space->is_table_size;
5326                         is_read_unlock(space);
5327                 }
5328
5329                 dbg_info->suspend_count = task->suspend_count;
5330
5331                 error = KERN_SUCCESS;
5332                 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
5333                 break;
5334 #else /* DEVELOPMENT || DEBUG */
5335                 error = KERN_NOT_SUPPORTED;
5336                 break;
5337 #endif /* DEVELOPMENT || DEBUG */
5338         }
5339         default:
5340                 error = KERN_INVALID_ARGUMENT;
5341         }
5342
5343         task_unlock(task);
5344         return error;
5345 }
5346
5347 /*
5348  * task_info_from_user
5349  *
5350  * When calling task_info from user space,
5351  * this function will be executed as mig server side
5352  * instead of calling directly into task_info.
5353  * This gives the possibility to perform more security
5354  * checks on task_port.
5355  *
5356  * In the case of TASK_DYLD_INFO, we require the more
5357  * privileged task_port not the less-privileged task_name_port.
5358  *
5359  */
5360 kern_return_t
5361 task_info_from_user(
5362         mach_port_t             task_port,
5363         task_flavor_t           flavor,
5364         task_info_t             task_info_out,
5365         mach_msg_type_number_t  *task_info_count)
5366 {
5367         task_t task;
5368         kern_return_t ret;
5369
5370         if (flavor == TASK_DYLD_INFO) {
5371                 task = convert_port_to_task(task_port);
5372         } else {
5373                 task = convert_port_to_task_name(task_port);
5374         }
5375
5376         ret = task_info(task, flavor, task_info_out, task_info_count);
5377
5378         task_deallocate(task);
5379
5380         return ret;
5381 }
5382
5383 /*
5384  *      task_power_info
5385  *
5386  *      Returns power stats for the task.
5387  *      Note: Called with task locked.
5388  */
5389 void
5390 task_power_info_locked(
5391         task_t                  task,
5392         task_power_info_t       info,
5393         gpu_energy_data_t       ginfo,
5394         task_power_info_v2_t    infov2,
5395         uint64_t                *runnable_time)
5396 {
5397         thread_t                thread;
5398         ledger_amount_t         tmp;
5399
5400         uint64_t                runnable_time_sum = 0;
5401
5402         task_lock_assert_owned(task);
5403
5404         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
5405             (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
5406         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
5407             (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
5408
5409         info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
5410         info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
5411
5412         info->total_user = task->total_user_time;
5413         info->total_system = task->total_system_time;
5414         runnable_time_sum = task->total_runnable_time;
5415
5416 #if CONFIG_EMBEDDED
5417         if (infov2) {
5418                 infov2->task_energy = task->task_energy;
5419         }
5420 #endif
5421
5422         if (ginfo) {
5423                 ginfo->task_gpu_utilisation = task->task_gpu_ns;
5424         }
5425
5426         if (infov2) {
5427                 infov2->task_ptime = task->total_ptime;
5428                 infov2->task_pset_switches = task->ps_switch;
5429         }
5430
5431         queue_iterate(&task->threads, thread, thread_t, task_threads) {
5432                 uint64_t        tval;
5433                 spl_t           x;
5434
5435                 if (thread->options & TH_OPT_IDLE_THREAD) {
5436                         continue;
5437                 }
5438
5439                 x = splsched();
5440                 thread_lock(thread);
5441
5442                 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
5443                 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
5444
5445 #if CONFIG_EMBEDDED
5446                 if (infov2) {
5447                         infov2->task_energy += ml_energy_stat(thread);
5448                 }
5449 #endif
5450
5451                 tval = timer_grab(&thread->user_timer);
5452                 info->total_user += tval;
5453
5454                 if (infov2) {
5455                         tval = timer_grab(&thread->ptime);
5456                         infov2->task_ptime += tval;
5457                         infov2->task_pset_switches += thread->ps_switch;
5458                 }
5459
5460                 tval = timer_grab(&thread->system_timer);
5461                 if (thread->precise_user_kernel_time) {
5462                         info->total_system += tval;
5463                 } else {
5464                         /* system_timer may represent either sys or user */
5465                         info->total_user += tval;
5466                 }
5467
5468                 tval = timer_grab(&thread->runnable_timer);
5469
5470                 runnable_time_sum += tval;
5471
5472                 if (ginfo) {
5473                         ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
5474                 }
5475                 thread_unlock(thread);
5476                 splx(x);
5477         }
5478
5479         if (runnable_time) {
5480                 *runnable_time = runnable_time_sum;
5481         }
5482 }
5483
5484 /*
5485  *      task_gpu_utilisation
5486  *
5487  *      Returns the total gpu time used by the all the threads of the task
5488  *  (both dead and alive)
5489  */
5490 uint64_t
5491 task_gpu_utilisation(
5492         task_t  task)
5493 {
5494         uint64_t gpu_time = 0;
5495 #if !CONFIG_EMBEDDED
5496         thread_t thread;
5497
5498         task_lock(task);
5499         gpu_time += task->task_gpu_ns;
5500
5501         queue_iterate(&task->threads, thread, thread_t, task_threads) {
5502                 spl_t x;
5503                 x = splsched();
5504                 thread_lock(thread);
5505                 gpu_time += ml_gpu_stat(thread);
5506                 thread_unlock(thread);
5507                 splx(x);
5508         }
5509
5510         task_unlock(task);
5511 #else /* CONFIG_EMBEDDED */
5512         /* silence compiler warning */
5513         (void)task;
5514 #endif /* !CONFIG_EMBEDDED */
5515         return gpu_time;
5516 }
5517
5518 /*
5519  *      task_energy
5520  *
5521  *      Returns the total energy used by the all the threads of the task
5522  *  (both dead and alive)
5523  */
5524 uint64_t
5525 task_energy(
5526         task_t  task)
5527 {
5528         uint64_t energy = 0;
5529         thread_t thread;
5530
5531         task_lock(task);
5532         energy += task->task_energy;
5533
5534         queue_iterate(&task->threads, thread, thread_t, task_threads) {
5535                 spl_t x;
5536                 x = splsched();
5537                 thread_lock(thread);
5538                 energy += ml_energy_stat(thread);
5539                 thread_unlock(thread);
5540                 splx(x);
5541         }
5542
5543         task_unlock(task);
5544         return energy;
5545 }
5546
5547 #if __AMP__
5548
5549 uint64_t
5550 task_cpu_ptime(
5551         task_t  task)
5552 {
5553         uint64_t cpu_ptime = 0;
5554         thread_t thread;
5555
5556         task_lock(task);
5557         cpu_ptime += task->total_ptime;
5558
5559         queue_iterate(&task->threads, thread, thread_t, task_threads) {
5560                 cpu_ptime += timer_grab(&thread->ptime);
5561         }
5562
5563         task_unlock(task);
5564         return cpu_ptime;
5565 }
5566
5567 #else /* __AMP__ */
5568
5569 uint64_t
5570 task_cpu_ptime(
5571         __unused task_t  task)
5572 {
5573         return 0;
5574 }
5575
5576 #endif /* __AMP__ */
5577
5578 /* This function updates the cpu time in the arrays for each
5579  * effective and requested QoS class
5580  */
5581 void
5582 task_update_cpu_time_qos_stats(
5583         task_t  task,
5584         uint64_t *eqos_stats,
5585         uint64_t *rqos_stats)
5586 {
5587         if (!eqos_stats && !rqos_stats) {
5588                 return;
5589         }
5590
5591         task_lock(task);
5592         thread_t thread;
5593         queue_iterate(&task->threads, thread, thread_t, task_threads) {
5594                 if (thread->options & TH_OPT_IDLE_THREAD) {
5595                         continue;
5596                 }
5597
5598                 thread_update_qos_cpu_time(thread);
5599         }
5600
5601         if (eqos_stats) {
5602                 eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
5603                 eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
5604                 eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
5605                 eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
5606                 eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
5607                 eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
5608                 eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
5609         }
5610
5611         if (rqos_stats) {
5612                 rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
5613                 rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
5614                 rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
5615                 rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
5616                 rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
5617                 rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
5618                 rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
5619         }
5620
5621         task_unlock(task);
5622 }
5623
5624 kern_return_t
5625 task_purgable_info(
5626         task_t                  task,
5627         task_purgable_info_t    *stats)
5628 {
5629         if (task == TASK_NULL || stats == NULL) {
5630                 return KERN_INVALID_ARGUMENT;
5631         }
5632         /* Take task reference */
5633         task_reference(task);
5634         vm_purgeable_stats((vm_purgeable_info_t)stats, task);
5635         /* Drop task reference */
5636         task_deallocate(task);
5637         return KERN_SUCCESS;
5638 }
5639
5640 void
5641 task_vtimer_set(
5642         task_t          task,
5643         integer_t       which)
5644 {
5645         thread_t        thread;
5646         spl_t           x;
5647
5648         task_lock(task);
5649
5650         task->vtimers |= which;
5651
5652         switch (which) {
5653         case TASK_VTIMER_USER:
5654                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5655                         x = splsched();
5656                         thread_lock(thread);
5657                         if (thread->precise_user_kernel_time) {
5658                                 thread->vtimer_user_save = timer_grab(&thread->user_timer);
5659                         } else {
5660                                 thread->vtimer_user_save = timer_grab(&thread->system_timer);
5661                         }
5662                         thread_unlock(thread);
5663                         splx(x);
5664                 }
5665                 break;
5666
5667         case TASK_VTIMER_PROF:
5668                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5669                         x = splsched();
5670                         thread_lock(thread);
5671                         thread->vtimer_prof_save = timer_grab(&thread->user_timer);
5672                         thread->vtimer_prof_save += timer_grab(&thread->system_timer);
5673                         thread_unlock(thread);
5674                         splx(x);
5675                 }
5676                 break;
5677
5678         case TASK_VTIMER_RLIM:
5679                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5680                         x = splsched();
5681                         thread_lock(thread);
5682                         thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
5683                         thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
5684                         thread_unlock(thread);
5685                         splx(x);
5686                 }
5687                 break;
5688         }
5689
5690         task_unlock(task);
5691 }
5692
5693 void
5694 task_vtimer_clear(
5695         task_t          task,
5696         integer_t       which)
5697 {
5698         assert(task == current_task());
5699
5700         task_lock(task);
5701
5702         task->vtimers &= ~which;
5703
5704         task_unlock(task);
5705 }
5706
5707 void
5708 task_vtimer_update(
5709         __unused
5710         task_t          task,
5711         integer_t       which,
5712         uint32_t        *microsecs)
5713 {
5714         thread_t        thread = current_thread();
5715         uint32_t        tdelt = 0;
5716         clock_sec_t     secs = 0;
5717         uint64_t        tsum;
5718
5719         assert(task == current_task());
5720
5721         spl_t s = splsched();
5722         thread_lock(thread);
5723
5724         if ((task->vtimers & which) != (uint32_t)which) {
5725                 thread_unlock(thread);
5726                 splx(s);
5727                 return;
5728         }
5729
5730         switch (which) {
5731         case TASK_VTIMER_USER:
5732                 if (thread->precise_user_kernel_time) {
5733                         tdelt = (uint32_t)timer_delta(&thread->user_timer,
5734                             &thread->vtimer_user_save);
5735                 } else {
5736                         tdelt = (uint32_t)timer_delta(&thread->system_timer,
5737                             &thread->vtimer_user_save);
5738                 }
5739                 absolutetime_to_microtime(tdelt, &secs, microsecs);
5740                 break;
5741
5742         case TASK_VTIMER_PROF:
5743                 tsum = timer_grab(&thread->user_timer);
5744                 tsum += timer_grab(&thread->system_timer);
5745                 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
5746                 absolutetime_to_microtime(tdelt, &secs, microsecs);
5747                 /* if the time delta is smaller than a usec, ignore */
5748                 if (*microsecs != 0) {
5749                         thread->vtimer_prof_save = tsum;
5750                 }
5751                 break;
5752
5753         case TASK_VTIMER_RLIM:
5754                 tsum = timer_grab(&thread->user_timer);
5755                 tsum += timer_grab(&thread->system_timer);
5756                 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
5757                 thread->vtimer_rlim_save = tsum;
5758                 absolutetime_to_microtime(tdelt, &secs, microsecs);
5759                 break;
5760         }
5761
5762         thread_unlock(thread);
5763         splx(s);
5764 }
5765
5766 /*
5767  *      task_assign:
5768  *
5769  *      Change the assigned processor set for the task
5770  */
5771 kern_return_t
5772 task_assign(
5773         __unused task_t         task,
5774         __unused processor_set_t        new_pset,
5775         __unused boolean_t      assign_threads)
5776 {
5777         return KERN_FAILURE;
5778 }
5779
5780 /*
5781  *      task_assign_default:
5782  *
5783  *      Version of task_assign to assign to default processor set.
5784  */
5785 kern_return_t
5786 task_assign_default(
5787         task_t          task,
5788         boolean_t       assign_threads)
5789 {
5790         return task_assign(task, &pset0, assign_threads);
5791 }
5792
5793 /*
5794  *      task_get_assignment
5795  *
5796  *      Return name of processor set that task is assigned to.
5797  */
5798 kern_return_t
5799 task_get_assignment(
5800         task_t          task,
5801         processor_set_t *pset)
5802 {
5803         if (!task || !task->active) {
5804                 return KERN_FAILURE;
5805         }
5806
5807         *pset = &pset0;
5808
5809         return KERN_SUCCESS;
5810 }
5811
5812 uint64_t
5813 get_task_dispatchqueue_offset(
5814         task_t          task)
5815 {
5816         return task->dispatchqueue_offset;
5817 }
5818
5819 /*
5820  *      task_policy
5821  *
5822  *      Set scheduling policy and parameters, both base and limit, for
5823  *      the given task. Policy must be a policy which is enabled for the
5824  *      processor set. Change contained threads if requested.
5825  */
5826 kern_return_t
5827 task_policy(
5828         __unused task_t                 task,
5829         __unused policy_t                       policy_id,
5830         __unused policy_base_t          base,
5831         __unused mach_msg_type_number_t count,
5832         __unused boolean_t                      set_limit,
5833         __unused boolean_t                      change)
5834 {
5835         return KERN_FAILURE;
5836 }
5837
5838 /*
5839  *      task_set_policy
5840  *
5841  *      Set scheduling policy and parameters, both base and limit, for
5842  *      the given task. Policy can be any policy implemented by the
5843  *      processor set, whether enabled or not. Change contained threads
5844  *      if requested.
5845  */
5846 kern_return_t
5847 task_set_policy(
5848         __unused task_t                 task,
5849         __unused processor_set_t                pset,
5850         __unused policy_t                       policy_id,
5851         __unused policy_base_t          base,
5852         __unused mach_msg_type_number_t base_count,
5853         __unused policy_limit_t         limit,
5854         __unused mach_msg_type_number_t limit_count,
5855         __unused boolean_t                      change)
5856 {
5857         return KERN_FAILURE;
5858 }
5859
5860 kern_return_t
5861 task_set_ras_pc(
5862         __unused task_t task,
5863         __unused vm_offset_t    pc,
5864         __unused vm_offset_t    endpc)
5865 {
5866         return KERN_FAILURE;
5867 }
5868
5869 void
5870 task_synchronizer_destroy_all(task_t task)
5871 {
5872         /*
5873          *  Destroy owned semaphores
5874          */
5875         semaphore_destroy_all(task);
5876 }
5877
5878 /*
5879  * Install default (machine-dependent) initial thread state
5880  * on the task.  Subsequent thread creation will have this initial
5881  * state set on the thread by machine_thread_inherit_taskwide().
5882  * Flavors and structures are exactly the same as those to thread_set_state()
5883  */
5884 kern_return_t
5885 task_set_state(
5886         task_t task,
5887         int flavor,
5888         thread_state_t state,
5889         mach_msg_type_number_t state_count)
5890 {
5891         kern_return_t ret;
5892
5893         if (task == TASK_NULL) {
5894                 return KERN_INVALID_ARGUMENT;
5895         }
5896
5897         task_lock(task);
5898
5899         if (!task->active) {
5900                 task_unlock(task);
5901                 return KERN_FAILURE;
5902         }
5903
5904         ret = machine_task_set_state(task, flavor, state, state_count);
5905
5906         task_unlock(task);
5907         return ret;
5908 }
5909
5910 /*
5911  * Examine the default (machine-dependent) initial thread state
5912  * on the task, as set by task_set_state().  Flavors and structures
5913  * are exactly the same as those passed to thread_get_state().
5914  */
5915 kern_return_t
5916 task_get_state(
5917         task_t  task,
5918         int     flavor,
5919         thread_state_t state,
5920         mach_msg_type_number_t *state_count)
5921 {
5922         kern_return_t ret;
5923
5924         if (task == TASK_NULL) {
5925                 return KERN_INVALID_ARGUMENT;
5926         }
5927
5928         task_lock(task);
5929
5930         if (!task->active) {
5931                 task_unlock(task);
5932                 return KERN_FAILURE;
5933         }
5934
5935         ret = machine_task_get_state(task, flavor, state, state_count);
5936
5937         task_unlock(task);
5938         return ret;
5939 }
5940
5941
5942 static kern_return_t __attribute__((noinline, not_tail_called))
5943 PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(
5944         mach_exception_code_t code,
5945         mach_exception_subcode_t subcode,
5946         void *reason)
5947 {
5948 #ifdef MACH_BSD
5949         if (1 == proc_selfpid()) {
5950                 return KERN_NOT_SUPPORTED;              // initproc is immune
5951         }
5952 #endif
5953         mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
5954                 [0] = code,
5955                 [1] = subcode,
5956         };
5957         task_t task = current_task();
5958         kern_return_t kr;
5959
5960         /* (See jetsam-related comments below) */
5961
5962         proc_memstat_terminated(task->bsd_info, TRUE);
5963         kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason);
5964         proc_memstat_terminated(task->bsd_info, FALSE);
5965         return kr;
5966 }
5967
5968 kern_return_t
5969 task_violated_guard(
5970         mach_exception_code_t code,
5971         mach_exception_subcode_t subcode,
5972         void *reason)
5973 {
5974         return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code, subcode, reason);
5975 }
5976
5977
5978 #if CONFIG_MEMORYSTATUS
5979
5980 boolean_t
5981 task_get_memlimit_is_active(task_t task)
5982 {
5983         assert(task != NULL);
5984
5985         if (task->memlimit_is_active == 1) {
5986                 return TRUE;
5987         } else {
5988                 return FALSE;
5989         }
5990 }
5991
5992 void
5993 task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
5994 {
5995         assert(task != NULL);
5996
5997         if (memlimit_is_active) {
5998                 task->memlimit_is_active = 1;
5999         } else {
6000                 task->memlimit_is_active = 0;
6001         }
6002 }
6003
6004 boolean_t
6005 task_get_memlimit_is_fatal(task_t task)
6006 {
6007         assert(task != NULL);
6008
6009         if (task->memlimit_is_fatal == 1) {
6010                 return TRUE;
6011         } else {
6012                 return FALSE;
6013         }
6014 }
6015
6016 void
6017 task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
6018 {
6019         assert(task != NULL);
6020
6021         if (memlimit_is_fatal) {
6022                 task->memlimit_is_fatal = 1;
6023         } else {
6024                 task->memlimit_is_fatal = 0;
6025         }
6026 }
6027
6028 boolean_t
6029 task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6030 {
6031         boolean_t triggered = FALSE;
6032
6033         assert(task == current_task());
6034
6035         /*
6036          * Returns true, if task has already triggered an exc_resource exception.
6037          */
6038
6039         if (memlimit_is_active) {
6040                 triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
6041         } else {
6042                 triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
6043         }
6044
6045         return triggered;
6046 }
6047
6048 void
6049 task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6050 {
6051         assert(task == current_task());
6052
6053         /*
6054          * We allow one exc_resource per process per active/inactive limit.
6055          * The limit's fatal attribute does not come into play.
6056          */
6057
6058         if (memlimit_is_active) {
6059                 task->memlimit_active_exc_resource = 1;
6060         } else {
6061                 task->memlimit_inactive_exc_resource = 1;
6062         }
6063 }
6064
6065 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
6066
6067 void __attribute__((noinline))
6068 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
6069 {
6070         task_t                                          task            = current_task();
6071         int                                                     pid         = 0;
6072         const char                                      *procname       = "unknown";
6073         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
6074         boolean_t send_sync_exc_resource = FALSE;
6075
6076 #ifdef MACH_BSD
6077         pid = proc_selfpid();
6078
6079         if (pid == 1) {
6080                 /*
6081                  * Cannot have ReportCrash analyzing
6082                  * a suspended initproc.
6083                  */
6084                 return;
6085         }
6086
6087         if (task->bsd_info != NULL) {
6088                 procname = proc_name_address(current_task()->bsd_info);
6089                 send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(current_task()->bsd_info);
6090         }
6091 #endif
6092 #if CONFIG_COREDUMP
6093         if (hwm_user_cores) {
6094                 int                             error;
6095                 uint64_t                starttime, end;
6096                 clock_sec_t             secs = 0;
6097                 uint32_t                microsecs = 0;
6098
6099                 starttime = mach_absolute_time();
6100                 /*
6101                  * Trigger a coredump of this process. Don't proceed unless we know we won't
6102                  * be filling up the disk; and ignore the core size resource limit for this
6103                  * core file.
6104                  */
6105                 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
6106                         printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
6107                 }
6108                 /*
6109                  * coredump() leaves the task suspended.
6110                  */
6111                 task_resume_internal(current_task());
6112
6113                 end = mach_absolute_time();
6114                 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
6115                 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
6116                     proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
6117         }
6118 #endif /* CONFIG_COREDUMP */
6119
6120         if (disable_exc_resource) {
6121                 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6122                     "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
6123                 return;
6124         }
6125
6126         /*
6127          * A task that has triggered an EXC_RESOURCE, should not be
6128          * jetsammed when the device is under memory pressure.  Here
6129          * we set the P_MEMSTAT_TERMINATED flag so that the process
6130          * will be skipped if the memorystatus_thread wakes up.
6131          */
6132         proc_memstat_terminated(current_task()->bsd_info, TRUE);
6133
6134         code[0] = code[1] = 0;
6135         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
6136         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
6137         EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
6138
6139         /*
6140          * Do not generate a corpse fork if the violation is a fatal one
6141          * or the process wants synchronous EXC_RESOURCE exceptions.
6142          */
6143         if (is_fatal || send_sync_exc_resource || exc_via_corpse_forking == 0) {
6144                 /* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */
6145                 if (send_sync_exc_resource || corpse_for_fatal_memkill == 0) {
6146                         /*
6147                          * Use the _internal_ variant so that no user-space
6148                          * process can resume our task from under us.
6149                          */
6150                         task_suspend_internal(task);
6151                         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6152                         task_resume_internal(task);
6153                 }
6154         } else {
6155                 if (audio_active) {
6156                         printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6157                             "supressed due to audio playback.\n", procname, pid, max_footprint_mb);
6158                 } else {
6159                         task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
6160                             code, EXCEPTION_CODE_MAX, NULL);
6161                 }
6162         }
6163
6164         /*
6165          * After the EXC_RESOURCE has been handled, we must clear the
6166          * P_MEMSTAT_TERMINATED flag so that the process can again be
6167          * considered for jetsam if the memorystatus_thread wakes up.
6168          */
6169         proc_memstat_terminated(current_task()->bsd_info, FALSE);  /* clear the flag */
6170 }
6171
6172 /*
6173  * Callback invoked when a task exceeds its physical footprint limit.
6174  */
6175 void
6176 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
6177 {
6178         ledger_amount_t max_footprint, max_footprint_mb;
6179         task_t task;
6180         boolean_t is_warning;
6181         boolean_t memlimit_is_active;
6182         boolean_t memlimit_is_fatal;
6183
6184         if (warning == LEDGER_WARNING_DIPPED_BELOW) {
6185                 /*
6186                  * Task memory limits only provide a warning on the way up.
6187                  */
6188                 return;
6189         } else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
6190                 /*
6191                  * This task is in danger of violating a memory limit,
6192                  * It has exceeded a percentage level of the limit.
6193                  */
6194                 is_warning = TRUE;
6195         } else {
6196                 /*
6197                  * The task has exceeded the physical footprint limit.
6198                  * This is not a warning but a true limit violation.
6199                  */
6200                 is_warning = FALSE;
6201         }
6202
6203         task = current_task();
6204
6205         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
6206         max_footprint_mb = max_footprint >> 20;
6207
6208         memlimit_is_active = task_get_memlimit_is_active(task);
6209         memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6210
6211         /*
6212          * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
6213          * We only generate the exception once per process per memlimit (active/inactive limit).
6214          * To enforce this, we monitor state based on the  memlimit's active/inactive attribute
6215          * and we disable it by marking that memlimit as exception triggered.
6216          */
6217         if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
6218                 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
6219                 memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
6220                 task_mark_has_triggered_exc_resource(task, memlimit_is_active);
6221         }
6222
6223         memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
6224 }
6225
6226 extern int proc_check_footprint_priv(void);
6227
6228 kern_return_t
6229 task_set_phys_footprint_limit(
6230         task_t task,
6231         int new_limit_mb,
6232         int *old_limit_mb)
6233 {
6234         kern_return_t error;
6235
6236         boolean_t memlimit_is_active;
6237         boolean_t memlimit_is_fatal;
6238
6239         if ((error = proc_check_footprint_priv())) {
6240                 return KERN_NO_ACCESS;
6241         }
6242
6243         /*
6244          * This call should probably be obsoleted.
6245          * But for now, we default to current state.
6246          */
6247         memlimit_is_active = task_get_memlimit_is_active(task);
6248         memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6249
6250         return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
6251 }
6252
6253 kern_return_t
6254 task_convert_phys_footprint_limit(
6255         int limit_mb,
6256         int *converted_limit_mb)
6257 {
6258         if (limit_mb == -1) {
6259                 /*
6260                  * No limit
6261                  */
6262                 if (max_task_footprint != 0) {
6263                         *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024);   /* bytes to MB */
6264                 } else {
6265                         *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
6266                 }
6267         } else {
6268                 /* nothing to convert */
6269                 *converted_limit_mb = limit_mb;
6270         }
6271         return KERN_SUCCESS;
6272 }
6273
6274
6275 kern_return_t
6276 task_set_phys_footprint_limit_internal(
6277         task_t task,
6278         int new_limit_mb,
6279         int *old_limit_mb,
6280         boolean_t memlimit_is_active,
6281         boolean_t memlimit_is_fatal)
6282 {
6283         ledger_amount_t old;
6284         kern_return_t ret;
6285
6286         ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
6287
6288         if (ret != KERN_SUCCESS) {
6289                 return ret;
6290         }
6291
6292         /*
6293          * Check that limit >> 20 will not give an "unexpected" 32-bit
6294          * result. There are, however, implicit assumptions that -1 mb limit
6295          * equates to LEDGER_LIMIT_INFINITY.
6296          */
6297         assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
6298
6299         if (old_limit_mb) {
6300                 *old_limit_mb = (int)(old >> 20);
6301         }
6302
6303         if (new_limit_mb == -1) {
6304                 /*
6305                  * Caller wishes to remove the limit.
6306                  */
6307                 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
6308                     max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
6309                     max_task_footprint ? max_task_footprint_warning_level : 0);
6310
6311                 task_lock(task);
6312                 task_set_memlimit_is_active(task, memlimit_is_active);
6313                 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
6314                 task_unlock(task);
6315
6316                 return KERN_SUCCESS;
6317         }
6318
6319 #ifdef CONFIG_NOMONITORS
6320         return KERN_SUCCESS;
6321 #endif /* CONFIG_NOMONITORS */
6322
6323         task_lock(task);
6324
6325         if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
6326             (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
6327             (((ledger_amount_t)new_limit_mb << 20) == old)) {
6328                 /*
6329                  * memlimit state is not changing
6330                  */
6331                 task_unlock(task);
6332                 return KERN_SUCCESS;
6333         }
6334
6335         task_set_memlimit_is_active(task, memlimit_is_active);
6336         task_set_memlimit_is_fatal(task, memlimit_is_fatal);
6337
6338         ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
6339             (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
6340
6341         if (task == current_task()) {
6342                 ledger_check_new_balance(current_thread(), task->ledger,
6343                     task_ledgers.phys_footprint);
6344         }
6345
6346         task_unlock(task);
6347
6348         return KERN_SUCCESS;
6349 }
6350
6351 kern_return_t
6352 task_get_phys_footprint_limit(
6353         task_t task,
6354         int *limit_mb)
6355 {
6356         ledger_amount_t limit;
6357         kern_return_t ret;
6358
6359         ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
6360         if (ret != KERN_SUCCESS) {
6361                 return ret;
6362         }
6363
6364         /*
6365          * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
6366          * result. There are, however, implicit assumptions that -1 mb limit
6367          * equates to LEDGER_LIMIT_INFINITY.
6368          */
6369         assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
6370         *limit_mb = (int)(limit >> 20);
6371
6372         return KERN_SUCCESS;
6373 }
6374 #else /* CONFIG_MEMORYSTATUS */
6375 kern_return_t
6376 task_set_phys_footprint_limit(
6377         __unused task_t task,
6378         __unused int new_limit_mb,
6379         __unused int *old_limit_mb)
6380 {
6381         return KERN_FAILURE;
6382 }
6383
6384 kern_return_t
6385 task_get_phys_footprint_limit(
6386         __unused task_t task,
6387         __unused int *limit_mb)
6388 {
6389         return KERN_FAILURE;
6390 }
6391 #endif /* CONFIG_MEMORYSTATUS */
6392
6393 void
6394 task_set_thread_limit(task_t task, uint16_t thread_limit)
6395 {
6396         assert(task != kernel_task);
6397         if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
6398                 task_lock(task);
6399                 task->task_thread_limit = thread_limit;
6400                 task_unlock(task);
6401         }
6402 }
6403
6404 /*
6405  * We need to export some functions to other components that
6406  * are currently implemented in macros within the osfmk
6407  * component.  Just export them as functions of the same name.
6408  */
6409 boolean_t
6410 is_kerneltask(task_t t)
6411 {
6412         if (t == kernel_task) {
6413                 return TRUE;
6414         }
6415
6416         return FALSE;
6417 }
6418
6419 boolean_t
6420 is_corpsetask(task_t t)
6421 {
6422         return task_is_a_corpse(t);
6423 }
6424
6425 #undef current_task
6426 task_t current_task(void);
6427 task_t
6428 current_task(void)
6429 {
6430         return current_task_fast();
6431 }
6432
6433 #undef task_reference
6434 void task_reference(task_t task);
6435 void
6436 task_reference(
6437         task_t          task)
6438 {
6439         if (task != TASK_NULL) {
6440                 task_reference_internal(task);
6441         }
6442 }
6443
6444 /* defined in bsd/kern/kern_prot.c */
6445 extern int get_audit_token_pid(audit_token_t *audit_token);
6446
6447 int
6448 task_pid(task_t task)
6449 {
6450         if (task) {
6451                 return get_audit_token_pid(&task->audit_token);
6452         }
6453         return -1;
6454 }
6455
6456
6457 /*
6458  * This routine finds a thread in a task by its unique id
6459  * Returns a referenced thread or THREAD_NULL if the thread was not found
6460  *
6461  * TODO: This is super inefficient - it's an O(threads in task) list walk!
6462  *       We should make a tid hash, or transition all tid clients to thread ports
6463  *
6464  * Precondition: No locks held (will take task lock)
6465  */
6466 thread_t
6467 task_findtid(task_t task, uint64_t tid)
6468 {
6469         thread_t self           = current_thread();
6470         thread_t found_thread   = THREAD_NULL;
6471         thread_t iter_thread    = THREAD_NULL;
6472
6473         /* Short-circuit the lookup if we're looking up ourselves */
6474         if (tid == self->thread_id || tid == TID_NULL) {
6475                 assert(self->task == task);
6476
6477                 thread_reference(self);
6478
6479                 return self;
6480         }
6481
6482         task_lock(task);
6483
6484         queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
6485                 if (iter_thread->thread_id == tid) {
6486                         found_thread = iter_thread;
6487                         thread_reference(found_thread);
6488                         break;
6489                 }
6490         }
6491
6492         task_unlock(task);
6493
6494         return found_thread;
6495 }
6496
6497 int
6498 pid_from_task(task_t task)
6499 {
6500         int pid = -1;
6501
6502         if (task->bsd_info) {
6503                 pid = proc_pid(task->bsd_info);
6504         } else {
6505                 pid = task_pid(task);
6506         }
6507
6508         return pid;
6509 }
6510
6511 /*
6512  * Control the CPU usage monitor for a task.
6513  */
6514 kern_return_t
6515 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
6516 {
6517         int error = KERN_SUCCESS;
6518
6519         if (*flags & CPUMON_MAKE_FATAL) {
6520                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
6521         } else {
6522                 error = KERN_INVALID_ARGUMENT;
6523         }
6524
6525         return error;
6526 }
6527
6528 /*
6529  * Control the wakeups monitor for a task.
6530  */
6531 kern_return_t
6532 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
6533 {
6534         ledger_t ledger = task->ledger;
6535
6536         task_lock(task);
6537         if (*flags & WAKEMON_GET_PARAMS) {
6538                 ledger_amount_t limit;
6539                 uint64_t                period;
6540
6541                 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
6542                 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
6543
6544                 if (limit != LEDGER_LIMIT_INFINITY) {
6545                         /*
6546                          * An active limit means the wakeups monitor is enabled.
6547                          */
6548                         *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
6549                         *flags = WAKEMON_ENABLE;
6550                         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
6551                                 *flags |= WAKEMON_MAKE_FATAL;
6552                         }
6553                 } else {
6554                         *flags = WAKEMON_DISABLE;
6555                         *rate_hz = -1;
6556                 }
6557
6558                 /*
6559                  * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
6560                  */
6561                 task_unlock(task);
6562                 return KERN_SUCCESS;
6563         }
6564
6565         if (*flags & WAKEMON_ENABLE) {
6566                 if (*flags & WAKEMON_SET_DEFAULTS) {
6567                         *rate_hz = task_wakeups_monitor_rate;
6568                 }
6569
6570 #ifndef CONFIG_NOMONITORS
6571                 if (*flags & WAKEMON_MAKE_FATAL) {
6572                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
6573                 }
6574 #endif /* CONFIG_NOMONITORS */
6575
6576                 if (*rate_hz <= 0) {
6577                         task_unlock(task);
6578                         return KERN_INVALID_ARGUMENT;
6579                 }
6580
6581 #ifndef CONFIG_NOMONITORS
6582                 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
6583                     task_wakeups_monitor_ustackshots_trigger_pct);
6584                 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
6585                 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
6586 #endif /* CONFIG_NOMONITORS */
6587         } else if (*flags & WAKEMON_DISABLE) {
6588                 /*
6589                  * Caller wishes to disable wakeups monitor on the task.
6590                  *
6591                  * Disable telemetry if it was triggered by the wakeups monitor, and
6592                  * remove the limit & callback on the wakeups ledger entry.
6593                  */
6594 #if CONFIG_TELEMETRY
6595                 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
6596 #endif
6597                 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
6598                 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
6599         }
6600
6601         task_unlock(task);
6602         return KERN_SUCCESS;
6603 }
6604
6605 void
6606 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
6607 {
6608         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
6609 #if CONFIG_TELEMETRY
6610                 /*
6611                  * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
6612                  * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
6613                  */
6614                 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
6615 #endif
6616                 return;
6617         }
6618
6619 #if CONFIG_TELEMETRY
6620         /*
6621          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
6622          * exceeded the limit, turn telemetry off for the task.
6623          */
6624         telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
6625 #endif
6626
6627         if (warning == 0) {
6628                 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
6629         }
6630 }
6631
6632 void __attribute__((noinline))
6633 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
6634 {
6635         task_t                      task        = current_task();
6636         int                         pid         = 0;
6637         const char                  *procname   = "unknown";
6638         boolean_t                   fatal;
6639         kern_return_t               kr;
6640 #ifdef EXC_RESOURCE_MONITORS
6641         mach_exception_data_type_t  code[EXCEPTION_CODE_MAX];
6642 #endif /* EXC_RESOURCE_MONITORS */
6643         struct ledger_entry_info    lei;
6644
6645 #ifdef MACH_BSD
6646         pid = proc_selfpid();
6647         if (task->bsd_info != NULL) {
6648                 procname = proc_name_address(current_task()->bsd_info);
6649         }
6650 #endif
6651
6652         ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
6653
6654         /*
6655          * Disable the exception notification so we don't overwhelm
6656          * the listener with an endless stream of redundant exceptions.
6657          * TODO: detect whether another thread is already reporting the violation.
6658          */
6659         uint32_t flags = WAKEMON_DISABLE;
6660         task_wakeups_monitor_ctl(task, &flags, NULL);
6661
6662         fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
6663         trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
6664         os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
6665             "over ~%llu seconds, averaging %llu wakes / second and "
6666             "violating a %slimit of %llu wakes over %llu seconds.\n",
6667             procname, pid,
6668             lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
6669             lei.lei_last_refill == 0 ? 0 :
6670             (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
6671             fatal ? "FATAL " : "",
6672             lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
6673
6674         kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
6675             fatal ? kRNFatalLimitFlag : 0);
6676         if (kr) {
6677                 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
6678         }
6679
6680 #ifdef EXC_RESOURCE_MONITORS
6681         if (disable_exc_resource) {
6682                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
6683                     "supressed by a boot-arg\n", procname, pid);
6684                 return;
6685         }
6686         if (audio_active) {
6687                 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
6688                     "supressed due to audio playback\n", procname, pid);
6689                 return;
6690         }
6691         if (lei.lei_last_refill == 0) {
6692                 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
6693                     "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
6694         }
6695
6696         code[0] = code[1] = 0;
6697         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
6698         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
6699         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
6700             NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
6701         EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
6702             lei.lei_last_refill);
6703         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
6704             NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
6705         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6706 #endif /* EXC_RESOURCE_MONITORS */
6707
6708         if (fatal) {
6709                 task_terminate_internal(task);
6710         }
6711 }
6712
6713 static boolean_t
6714 global_update_logical_writes(int64_t io_delta, int64_t *global_write_count)
6715 {
6716         int64_t old_count, new_count;
6717         boolean_t needs_telemetry;
6718
6719         do {
6720                 new_count = old_count = *global_write_count;
6721                 new_count += io_delta;
6722                 if (new_count >= io_telemetry_limit) {
6723                         new_count = 0;
6724                         needs_telemetry = TRUE;
6725                 } else {
6726                         needs_telemetry = FALSE;
6727                 }
6728         } while (!OSCompareAndSwap64(old_count, new_count, global_write_count));
6729         return needs_telemetry;
6730 }
6731
6732 void
6733 task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
6734 {
6735         int64_t io_delta = 0;
6736         int64_t * global_counter_to_update;
6737         boolean_t needs_telemetry = FALSE;
6738         int ledger_to_update = 0;
6739         struct task_writes_counters * writes_counters_to_update;
6740
6741         if ((!task) || (!io_size) || (!vp)) {
6742                 return;
6743         }
6744
6745         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
6746             task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
6747         DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
6748
6749         // Is the drive backing this vnode internal or external to the system?
6750         if (vnode_isonexternalstorage(vp) == false) {
6751                 global_counter_to_update = &global_logical_writes_count;
6752                 ledger_to_update = task_ledgers.logical_writes;
6753                 writes_counters_to_update = &task->task_writes_counters_internal;
6754         } else {
6755                 global_counter_to_update = &global_logical_writes_to_external_count;
6756                 ledger_to_update = task_ledgers.logical_writes_to_external;
6757                 writes_counters_to_update = &task->task_writes_counters_external;
6758         }
6759
6760         switch (flags) {
6761         case TASK_WRITE_IMMEDIATE:
6762                 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_immediate_writes));
6763                 ledger_credit(task->ledger, ledger_to_update, io_size);
6764                 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
6765                 break;
6766         case TASK_WRITE_DEFERRED:
6767                 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_deferred_writes));
6768                 ledger_credit(task->ledger, ledger_to_update, io_size);
6769                 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
6770                 break;
6771         case TASK_WRITE_INVALIDATED:
6772                 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_invalidated_writes));
6773                 ledger_debit(task->ledger, ledger_to_update, io_size);
6774                 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
6775                 break;
6776         case TASK_WRITE_METADATA:
6777                 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_metadata_writes));
6778                 ledger_credit(task->ledger, ledger_to_update, io_size);
6779                 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
6780                 break;
6781         }
6782
6783         io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
6784         if (io_telemetry_limit != 0) {
6785                 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
6786                 needs_telemetry = global_update_logical_writes(io_delta, global_counter_to_update);
6787                 if (needs_telemetry) {
6788                         act_set_io_telemetry_ast(current_thread());
6789                 }
6790         }
6791 }
6792
6793 /*
6794  * Control the I/O monitor for a task.
6795  */
6796 kern_return_t
6797 task_io_monitor_ctl(task_t task, uint32_t *flags)
6798 {
6799         ledger_t ledger = task->ledger;
6800
6801         task_lock(task);
6802         if (*flags & IOMON_ENABLE) {
6803                 /* Configure the physical I/O ledger */
6804                 ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
6805                 ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
6806         } else if (*flags & IOMON_DISABLE) {
6807                 /*
6808                  * Caller wishes to disable I/O monitor on the task.
6809                  */
6810                 ledger_disable_refill(ledger, task_ledgers.physical_writes);
6811                 ledger_disable_callback(ledger, task_ledgers.physical_writes);
6812         }
6813
6814         task_unlock(task);
6815         return KERN_SUCCESS;
6816 }
6817
6818 void
6819 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
6820 {
6821         if (warning == 0) {
6822                 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
6823         }
6824 }
6825
6826 void __attribute__((noinline))
6827 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
6828 {
6829         int                             pid = 0;
6830         task_t                          task = current_task();
6831 #ifdef EXC_RESOURCE_MONITORS
6832         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
6833 #endif /* EXC_RESOURCE_MONITORS */
6834         struct ledger_entry_info        lei;
6835         kern_return_t                   kr;
6836
6837 #ifdef MACH_BSD
6838         pid = proc_selfpid();
6839 #endif
6840         /*
6841          * Get the ledger entry info. We need to do this before disabling the exception
6842          * to get correct values for all fields.
6843          */
6844         switch (flavor) {
6845         case FLAVOR_IO_PHYSICAL_WRITES:
6846                 ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
6847                 break;
6848         }
6849
6850
6851         /*
6852          * Disable the exception notification so we don't overwhelm
6853          * the listener with an endless stream of redundant exceptions.
6854          * TODO: detect whether another thread is already reporting the violation.
6855          */
6856         uint32_t flags = IOMON_DISABLE;
6857         task_io_monitor_ctl(task, &flags);
6858
6859         if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
6860                 trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
6861         }
6862         os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
6863             pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
6864
6865         kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
6866         if (kr) {
6867                 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
6868         }
6869
6870 #ifdef EXC_RESOURCE_MONITORS
6871         code[0] = code[1] = 0;
6872         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
6873         EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
6874         EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
6875         EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
6876         EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
6877         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6878 #endif /* EXC_RESOURCE_MONITORS */
6879 }
6880
6881 /* Placeholders for the task set/get voucher interfaces */
6882 kern_return_t
6883 task_get_mach_voucher(
6884         task_t                  task,
6885         mach_voucher_selector_t __unused which,
6886         ipc_voucher_t           *voucher)
6887 {
6888         if (TASK_NULL == task) {
6889                 return KERN_INVALID_TASK;
6890         }
6891
6892         *voucher = NULL;
6893         return KERN_SUCCESS;
6894 }
6895
6896 kern_return_t
6897 task_set_mach_voucher(
6898         task_t                  task,
6899         ipc_voucher_t           __unused voucher)
6900 {
6901         if (TASK_NULL == task) {
6902                 return KERN_INVALID_TASK;
6903         }
6904
6905         return KERN_SUCCESS;
6906 }
6907
6908 kern_return_t
6909 task_swap_mach_voucher(
6910         __unused task_t         task,
6911         __unused ipc_voucher_t  new_voucher,
6912         ipc_voucher_t          *in_out_old_voucher)
6913 {
6914         /*
6915          * Currently this function is only called from a MIG generated
6916          * routine which doesn't release the reference on the voucher
6917          * addressed by in_out_old_voucher. To avoid leaking this reference,
6918          * a call to release it has been added here.
6919          */
6920         ipc_voucher_release(*in_out_old_voucher);
6921         return KERN_NOT_SUPPORTED;
6922 }
6923
6924 void
6925 task_set_gpu_denied(task_t task, boolean_t denied)
6926 {
6927         task_lock(task);
6928
6929         if (denied) {
6930                 task->t_flags |= TF_GPU_DENIED;
6931         } else {
6932                 task->t_flags &= ~TF_GPU_DENIED;
6933         }
6934
6935         task_unlock(task);
6936 }
6937
6938 boolean_t
6939 task_is_gpu_denied(task_t task)
6940 {
6941         /* We don't need the lock to read this flag */
6942         return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
6943 }
6944
6945
6946 uint64_t
6947 get_task_memory_region_count(task_t task)
6948 {
6949         vm_map_t map;
6950         map = (task == kernel_task) ? kernel_map: task->map;
6951         return (uint64_t)get_map_nentries(map);
6952 }
6953
6954 static void
6955 kdebug_trace_dyld_internal(uint32_t base_code,
6956     struct dyld_kernel_image_info *info)
6957 {
6958         static_assert(sizeof(info->uuid) >= 16);
6959
6960 #if defined(__LP64__)
6961         uint64_t *uuid = (uint64_t *)&(info->uuid);
6962
6963         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6964             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
6965             uuid[1], info->load_addr,
6966             (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
6967             0);
6968         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6969             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
6970             (uint64_t)info->fsobjid.fid_objno |
6971             ((uint64_t)info->fsobjid.fid_generation << 32),
6972             0, 0, 0, 0);
6973 #else /* defined(__LP64__) */
6974         uint32_t *uuid = (uint32_t *)&(info->uuid);
6975
6976         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6977             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
6978             uuid[1], uuid[2], uuid[3], 0);
6979         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6980             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
6981             (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
6982             info->fsobjid.fid_objno, 0);
6983         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6984             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
6985             info->fsobjid.fid_generation, 0, 0, 0, 0);
6986 #endif /* !defined(__LP64__) */
6987 }
6988
6989 static kern_return_t
6990 kdebug_trace_dyld(task_t task, uint32_t base_code,
6991     vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
6992 {
6993         kern_return_t kr;
6994         dyld_kernel_image_info_array_t infos;
6995         vm_map_offset_t map_data;
6996         vm_offset_t data;
6997
6998         if (!infos_copy) {
6999                 return KERN_INVALID_ADDRESS;
7000         }
7001
7002         if (!kdebug_enable ||
7003             !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0))) {
7004                 vm_map_copy_discard(infos_copy);
7005                 return KERN_SUCCESS;
7006         }
7007
7008         if (task == NULL || task != current_task()) {
7009                 return KERN_INVALID_TASK;
7010         }
7011
7012         kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
7013         if (kr != KERN_SUCCESS) {
7014                 return kr;
7015         }
7016
7017         infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
7018
7019         for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
7020                 kdebug_trace_dyld_internal(base_code, &(infos[i]));
7021         }
7022
7023         data = CAST_DOWN(vm_offset_t, map_data);
7024         mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
7025         return KERN_SUCCESS;
7026 }
7027
7028 kern_return_t
7029 task_register_dyld_image_infos(task_t task,
7030     dyld_kernel_image_info_array_t infos_copy,
7031     mach_msg_type_number_t infos_len)
7032 {
7033         return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
7034                    (vm_map_copy_t)infos_copy, infos_len);
7035 }
7036
7037 kern_return_t
7038 task_unregister_dyld_image_infos(task_t task,
7039     dyld_kernel_image_info_array_t infos_copy,
7040     mach_msg_type_number_t infos_len)
7041 {
7042         return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
7043                    (vm_map_copy_t)infos_copy, infos_len);
7044 }
7045
7046 kern_return_t
7047 task_get_dyld_image_infos(__unused task_t task,
7048     __unused dyld_kernel_image_info_array_t * dyld_images,
7049     __unused mach_msg_type_number_t * dyld_imagesCnt)
7050 {
7051         return KERN_NOT_SUPPORTED;
7052 }
7053
7054 kern_return_t
7055 task_register_dyld_shared_cache_image_info(task_t task,
7056     dyld_kernel_image_info_t cache_img,
7057     __unused boolean_t no_cache,
7058     __unused boolean_t private_cache)
7059 {
7060         if (task == NULL || task != current_task()) {
7061                 return KERN_INVALID_TASK;
7062         }
7063
7064         kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
7065         return KERN_SUCCESS;
7066 }
7067
7068 kern_return_t
7069 task_register_dyld_set_dyld_state(__unused task_t task,
7070     __unused uint8_t dyld_state)
7071 {
7072         return KERN_NOT_SUPPORTED;
7073 }
7074
7075 kern_return_t
7076 task_register_dyld_get_process_state(__unused task_t task,
7077     __unused dyld_kernel_process_info_t * dyld_process_state)
7078 {
7079         return KERN_NOT_SUPPORTED;
7080 }
7081
7082 kern_return_t
7083 task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
7084     task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
7085 {
7086 #if MONOTONIC
7087         task_t task = (task_t)task_insp;
7088         kern_return_t kr = KERN_SUCCESS;
7089         mach_msg_type_number_t size;
7090
7091         if (task == TASK_NULL) {
7092                 return KERN_INVALID_ARGUMENT;
7093         }
7094
7095         size = *size_in_out;
7096
7097         switch (flavor) {
7098         case TASK_INSPECT_BASIC_COUNTS: {
7099                 struct task_inspect_basic_counts *bc;
7100                 uint64_t task_counts[MT_CORE_NFIXED] = { 0 };
7101
7102                 if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
7103                         kr = KERN_INVALID_ARGUMENT;
7104                         break;
7105                 }
7106
7107                 mt_fixed_task_counts(task, task_counts);
7108                 bc = (struct task_inspect_basic_counts *)info_out;
7109 #ifdef MT_CORE_INSTRS
7110                 bc->instructions = task_counts[MT_CORE_INSTRS];
7111 #else /* defined(MT_CORE_INSTRS) */
7112                 bc->instructions = 0;
7113 #endif /* !defined(MT_CORE_INSTRS) */
7114                 bc->cycles = task_counts[MT_CORE_CYCLES];
7115                 size = TASK_INSPECT_BASIC_COUNTS_COUNT;
7116                 break;
7117         }
7118         default:
7119                 kr = KERN_INVALID_ARGUMENT;
7120                 break;
7121         }
7122
7123         if (kr == KERN_SUCCESS) {
7124                 *size_in_out = size;
7125         }
7126         return kr;
7127 #else /* MONOTONIC */
7128 #pragma unused(task_insp, flavor, info_out, size_in_out)
7129         return KERN_NOT_SUPPORTED;
7130 #endif /* !MONOTONIC */
7131 }
7132
7133 #if CONFIG_SECLUDED_MEMORY
7134 int num_tasks_can_use_secluded_mem = 0;
7135
7136 void
7137 task_set_can_use_secluded_mem(
7138         task_t          task,
7139         boolean_t       can_use_secluded_mem)
7140 {
7141         if (!task->task_could_use_secluded_mem) {
7142                 return;
7143         }
7144         task_lock(task);
7145         task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
7146         task_unlock(task);
7147 }
7148
7149 void
7150 task_set_can_use_secluded_mem_locked(
7151         task_t          task,
7152         boolean_t       can_use_secluded_mem)
7153 {
7154         assert(task->task_could_use_secluded_mem);
7155         if (can_use_secluded_mem &&
7156             secluded_for_apps && /* global boot-arg */
7157             !task->task_can_use_secluded_mem) {
7158                 assert(num_tasks_can_use_secluded_mem >= 0);
7159                 OSAddAtomic(+1,
7160                     (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
7161                 task->task_can_use_secluded_mem = TRUE;
7162         } else if (!can_use_secluded_mem &&
7163             task->task_can_use_secluded_mem) {
7164                 assert(num_tasks_can_use_secluded_mem > 0);
7165                 OSAddAtomic(-1,
7166                     (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
7167                 task->task_can_use_secluded_mem = FALSE;
7168         }
7169 }
7170
7171 void
7172 task_set_could_use_secluded_mem(
7173         task_t          task,
7174         boolean_t       could_use_secluded_mem)
7175 {
7176         task->task_could_use_secluded_mem = could_use_secluded_mem;
7177 }
7178
7179 void
7180 task_set_could_also_use_secluded_mem(
7181         task_t          task,
7182         boolean_t       could_also_use_secluded_mem)
7183 {
7184         task->task_could_also_use_secluded_mem = could_also_use_secluded_mem;
7185 }
7186
7187 boolean_t
7188 task_can_use_secluded_mem(
7189         task_t          task,
7190         boolean_t       is_alloc)
7191 {
7192         if (task->task_can_use_secluded_mem) {
7193                 assert(task->task_could_use_secluded_mem);
7194                 assert(num_tasks_can_use_secluded_mem > 0);
7195                 return TRUE;
7196         }
7197         if (task->task_could_also_use_secluded_mem &&
7198             num_tasks_can_use_secluded_mem > 0) {
7199                 assert(num_tasks_can_use_secluded_mem > 0);
7200                 return TRUE;
7201         }
7202
7203         /*
7204          * If a single task is using more than some amount of
7205          * memory, allow it to dip into secluded and also begin
7206          * suppression of secluded memory until the tasks exits.
7207          */
7208         if (is_alloc && secluded_shutoff_trigger != 0) {
7209                 uint64_t phys_used = get_task_phys_footprint(task);
7210                 if (phys_used > secluded_shutoff_trigger) {
7211                         start_secluded_suppression(task);
7212                         return TRUE;
7213                 }
7214         }
7215
7216         return FALSE;
7217 }
7218
7219 boolean_t
7220 task_could_use_secluded_mem(
7221         task_t  task)
7222 {
7223         return task->task_could_use_secluded_mem;
7224 }
7225
7226 boolean_t
7227 task_could_also_use_secluded_mem(
7228         task_t  task)
7229 {
7230         return task->task_could_also_use_secluded_mem;
7231 }
7232 #endif /* CONFIG_SECLUDED_MEMORY */
7233
7234 queue_head_t *
7235 task_io_user_clients(task_t task)
7236 {
7237         return &task->io_user_clients;
7238 }
7239
7240 void
7241 task_set_message_app_suspended(task_t task, boolean_t enable)
7242 {
7243         task->message_app_suspended = enable;
7244 }
7245
7246 void
7247 task_copy_fields_for_exec(task_t dst_task, task_t src_task)
7248 {
7249         dst_task->vtimers = src_task->vtimers;
7250 }
7251
7252 #if DEVELOPMENT || DEBUG
7253 int vm_region_footprint = 0;
7254 #endif /* DEVELOPMENT || DEBUG */
7255
7256 boolean_t
7257 task_self_region_footprint(void)
7258 {
7259 #if DEVELOPMENT || DEBUG
7260         if (vm_region_footprint) {
7261                 /* system-wide override */
7262                 return TRUE;
7263         }
7264 #endif /* DEVELOPMENT || DEBUG */
7265         return current_task()->task_region_footprint;
7266 }
7267
7268 void
7269 task_self_region_footprint_set(
7270         boolean_t newval)
7271 {
7272         task_t  curtask;
7273
7274         curtask = current_task();
7275         task_lock(curtask);
7276         if (newval) {
7277                 curtask->task_region_footprint = TRUE;
7278         } else {
7279                 curtask->task_region_footprint = FALSE;
7280         }
7281         task_unlock(curtask);
7282 }
7283
7284 void
7285 task_set_darkwake_mode(task_t task, boolean_t set_mode)
7286 {
7287         assert(task);
7288
7289         task_lock(task);
7290
7291         if (set_mode) {
7292                 task->t_flags |= TF_DARKWAKE_MODE;
7293         } else {
7294                 task->t_flags &= ~(TF_DARKWAKE_MODE);
7295         }
7296
7297         task_unlock(task);
7298 }
7299
7300 boolean_t
7301 task_get_darkwake_mode(task_t task)
7302 {
7303         assert(task);
7304         return (task->t_flags & TF_DARKWAKE_MODE) != 0;
7305 }
7306
7307 kern_return_t
7308 task_get_exc_guard_behavior(
7309         task_t task,
7310         task_exc_guard_behavior_t *behaviorp)
7311 {
7312         if (task == TASK_NULL) {
7313                 return KERN_INVALID_TASK;
7314         }
7315         *behaviorp = task->task_exc_guard;
7316         return KERN_SUCCESS;
7317 }
7318
7319 #ifndef TASK_EXC_GUARD_ALL
7320 /* Temporary define until two branches are merged */
7321 #define TASK_EXC_GUARD_ALL (TASK_EXC_GUARD_VM_ALL | 0xf0)
7322 #endif
7323
7324 kern_return_t
7325 task_set_exc_guard_behavior(
7326         task_t task,
7327         task_exc_guard_behavior_t behavior)
7328 {
7329         if (task == TASK_NULL) {
7330                 return KERN_INVALID_TASK;
7331         }
7332         if (behavior & ~TASK_EXC_GUARD_ALL) {
7333                 return KERN_INVALID_VALUE;
7334         }
7335         task->task_exc_guard = behavior;
7336         return KERN_SUCCESS;
7337 }
7338
7339 #if __arm64__
7340 extern int legacy_footprint_entitlement_mode;
7341 extern void memorystatus_act_on_legacy_footprint_entitlement(proc_t, boolean_t);
7342 extern void memorystatus_act_on_ios13extended_footprint_entitlement(proc_t);
7343
7344 void
7345 task_set_legacy_footprint(
7346         task_t task)
7347 {
7348         task_lock(task);
7349         task->task_legacy_footprint = TRUE;
7350         task_unlock(task);
7351 }
7352
7353 void
7354 task_set_extra_footprint_limit(
7355         task_t task)
7356 {
7357         if (task->task_extra_footprint_limit) {
7358                 return;
7359         }
7360         task_lock(task);
7361         if (task->task_extra_footprint_limit) {
7362                 task_unlock(task);
7363                 return;
7364         }
7365         task->task_extra_footprint_limit = TRUE;
7366         task_unlock(task);
7367         memorystatus_act_on_legacy_footprint_entitlement(task->bsd_info, TRUE);
7368 }
7369
7370 void
7371 task_set_ios13extended_footprint_limit(
7372         task_t task)
7373 {
7374         if (task->task_ios13extended_footprint_limit) {
7375                 return;
7376         }
7377         task_lock(task);
7378         if (task->task_ios13extended_footprint_limit) {
7379                 task_unlock(task);
7380                 return;
7381         }
7382         task->task_ios13extended_footprint_limit = TRUE;
7383         task_unlock(task);
7384         memorystatus_act_on_ios13extended_footprint_entitlement(task->bsd_info);
7385 }
7386 #endif /* __arm64__ */
7387
7388 static inline ledger_amount_t
7389 task_ledger_get_balance(
7390         ledger_t        ledger,
7391         int             ledger_idx)
7392 {
7393         ledger_amount_t amount;
7394         amount = 0;
7395         ledger_get_balance(ledger, ledger_idx, &amount);
7396         return amount;
7397 }
7398
7399 /*
7400  * Gather the amount of memory counted in a task's footprint due to
7401  * being in a specific set of ledgers.
7402  */
7403 void
7404 task_ledgers_footprint(
7405         ledger_t        ledger,
7406         ledger_amount_t *ledger_resident,
7407         ledger_amount_t *ledger_compressed)
7408 {
7409         *ledger_resident = 0;
7410         *ledger_compressed = 0;
7411
7412         /* purgeable non-volatile memory */
7413         *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile);
7414         *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile_compressed);
7415
7416         /* "default" tagged memory */
7417         *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint);
7418         *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint_compressed);
7419
7420         /* "network" currently never counts in the footprint... */
7421
7422         /* "media" tagged memory */
7423         *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.media_footprint);
7424         *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.media_footprint_compressed);
7425
7426         /* "graphics" tagged memory */
7427         *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint);
7428         *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint_compressed);
7429
7430         /* "neural" tagged memory */
7431         *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.neural_footprint);
7432         *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.neural_footprint_compressed);
7433 }
7434
7435 void
7436 task_set_memory_ownership_transfer(
7437         task_t    task,
7438         boolean_t value)
7439 {
7440         task_lock(task);
7441         task->task_can_transfer_memory_ownership = value;
7442         task_unlock(task);
7443 }
7444
7445 void
7446 task_copy_vmobjects(task_t task, vm_object_query_t query, int len, int64_t* num)
7447 {
7448         vm_object_t find_vmo;
7449         int64_t size = 0;
7450
7451         task_objq_lock(task);
7452         if (query != NULL) {
7453                 queue_iterate(&task->task_objq, find_vmo, vm_object_t, task_objq)
7454                 {
7455                         int byte_size;
7456                         vm_object_query_t p = &query[size++];
7457
7458                         p->object_id = (vm_object_id_t) VM_KERNEL_ADDRPERM(find_vmo);
7459                         p->virtual_size = find_vmo->internal ? find_vmo->vo_size : 0;
7460                         p->resident_size = find_vmo->resident_page_count * PAGE_SIZE;
7461                         p->wired_size = find_vmo->wired_page_count * PAGE_SIZE;
7462                         p->reusable_size = find_vmo->reusable_page_count * PAGE_SIZE;
7463                         p->vo_no_footprint = find_vmo->vo_no_footprint;
7464                         p->vo_ledger_tag = find_vmo->vo_ledger_tag;
7465                         p->purgable = find_vmo->purgable;
7466
7467                         if (find_vmo->internal && find_vmo->pager_created && find_vmo->pager != NULL) {
7468                                 p->compressed_size = vm_compressor_pager_get_count(find_vmo->pager) * PAGE_SIZE;
7469                         } else {
7470                                 p->compressed_size = 0;
7471                         }
7472
7473                         /* make sure to not overrun */
7474                         byte_size = (int) size * sizeof(vm_object_query_data_t);
7475                         if ((int)(byte_size + sizeof(vm_object_query_data_t)) > len) {
7476                                 break;
7477                         }
7478                 }
7479         } else {
7480                 size = task->task_owned_objects;
7481         }
7482         task_objq_unlock(task);
7483
7484         *num = size;
7485 }