osfmk/kern/task.c

   1 /*
   2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  *      File:   kern/task.c
  58  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
  59  *              David Black
  60  *
  61  *      Task management primitives implementation.
  62  */
  63 /*
  64  * Copyright (c) 1993 The University of Utah and
  65  * the Computer Systems Laboratory (CSL).  All rights reserved.
  66  *
  67  * Permission to use, copy, modify and distribute this software and its
  68  * documentation is hereby granted, provided that both the copyright
  69  * notice and this permission notice appear in all copies of the
  70  * software, derivative works or modified versions, and any portions
  71  * thereof, and that both notices appear in supporting documentation.
  72  *
  73  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  74  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  75  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  76  *
  77  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  78  * improvements that they make and grant CSL redistribution rights.
  79  *
  80  */
  81 /*
  82  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
  83  * support for mandatory and extensible security protections.  This notice
  84  * is included in support of clause 2.2 (b) of the Apple Public License,
  85  * Version 2.0.
  86  * Copyright (c) 2005 SPARTA, Inc.
  87  */
  88
  89 #include <mach/mach_types.h>
  90 #include <mach/boolean.h>
  91 #include <mach/host_priv.h>
  92 #include <mach/machine/vm_types.h>
  93 #include <mach/vm_param.h>
  94 #include <mach/mach_vm.h>
  95 #include <mach/semaphore.h>
  96 #include <mach/task_info.h>
  97 #include <mach/task_inspect.h>
  98 #include <mach/task_special_ports.h>
  99 #include <mach/sdt.h>
 100
 101 #include <ipc/ipc_importance.h>
 102 #include <ipc/ipc_types.h>
 103 #include <ipc/ipc_space.h>
 104 #include <ipc/ipc_entry.h>
 105 #include <ipc/ipc_hash.h>
 106
 107 #include <kern/kern_types.h>
 108 #include <kern/mach_param.h>
 109 #include <kern/misc_protos.h>
 110 #include <kern/task.h>
 111 #include <kern/thread.h>
 112 #include <kern/coalition.h>
 113 #include <kern/zalloc.h>
 114 #include <kern/kalloc.h>
 115 #include <kern/kern_cdata.h>
 116 #include <kern/processor.h>
 117 #include <kern/sched_prim.h>    /* for thread_wakeup */
 118 #include <kern/ipc_tt.h>
 119 #include <kern/host.h>
 120 #include <kern/clock.h>
 121 #include <kern/timer.h>
 122 #include <kern/assert.h>
 123 #include <kern/sync_lock.h>
 124 #include <kern/affinity.h>
 125 #include <kern/exc_resource.h>
 126 #include <kern/machine.h>
 127 #include <kern/policy_internal.h>
 128 #include <kern/restartable.h>
 129
 130 #include <corpses/task_corpse.h>
 131 #if CONFIG_TELEMETRY
 132 #include <kern/telemetry.h>
 133 #endif
 134
 135 #if MONOTONIC
 136 #include <kern/monotonic.h>
 137 #include <machine/monotonic.h>
 138 #endif /* MONOTONIC */
 139
 140 #include <os/log.h>
 141
 142 #include <vm/pmap.h>
 143 #include <vm/vm_map.h>
 144 #include <vm/vm_kern.h>         /* for kernel_map, ipc_kernel_map */
 145 #include <vm/vm_pageout.h>
 146 #include <vm/vm_protos.h>
 147 #include <vm/vm_purgeable_internal.h>
 148 #include <vm/vm_compressor_pager.h>
 149
 150 #include <sys/resource.h>
 151 #include <sys/signalvar.h> /* for coredump */
 152 #include <sys/bsdtask_info.h>
 153 /*
 154  * Exported interfaces
 155  */
 156
 157 #include <mach/task_server.h>
 158 #include <mach/mach_host_server.h>
 159 #include <mach/host_security_server.h>
 160 #include <mach/mach_port_server.h>
 161
 162 #include <vm/vm_shared_region.h>
 163
 164 #include <libkern/OSDebug.h>
 165 #include <libkern/OSAtomic.h>
 166 #include <libkern/section_keywords.h>
 167
 168 #include <mach-o/loader.h>
 169
 170 #if CONFIG_ATM
 171 #include <atm/atm_internal.h>
 172 #endif
 173
 174 #include <kern/sfi.h>           /* picks up ledger.h */
 175
 176 #if CONFIG_MACF
 177 #include <security/mac_mach_internal.h>
 178 #endif
 179
 180 #if KPERF
 181 extern int kpc_force_all_ctrs(task_t, int);
 182 #endif
 183
 184 task_t                  kernel_task;
 185 zone_t                  task_zone;
 186 lck_attr_t      task_lck_attr;
 187 lck_grp_t       task_lck_grp;
 188 lck_grp_attr_t  task_lck_grp_attr;
 189
 190 extern int exc_via_corpse_forking;
 191 extern int corpse_for_fatal_memkill;
 192 extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
 193
 194 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
 195 int audio_active = 0;
 196
 197 zinfo_usage_store_t tasks_tkm_private;
 198 zinfo_usage_store_t tasks_tkm_shared;
 199
 200 /* A container to accumulate statistics for expired tasks */
 201 expired_task_statistics_t               dead_task_statistics;
 202 lck_spin_t              dead_task_statistics_lock;
 203
 204 ledger_template_t task_ledger_template = NULL;
 205
 206 SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
 207 {.cpu_time = -1,
 208  .tkm_private = -1,
 209  .tkm_shared = -1,
 210  .phys_mem = -1,
 211  .wired_mem = -1,
 212  .internal = -1,
 213  .iokit_mapped = -1,
 214  .alternate_accounting = -1,
 215  .alternate_accounting_compressed = -1,
 216  .page_table = -1,
 217  .phys_footprint = -1,
 218  .internal_compressed = -1,
 219  .purgeable_volatile = -1,
 220  .purgeable_nonvolatile = -1,
 221  .purgeable_volatile_compressed = -1,
 222  .purgeable_nonvolatile_compressed = -1,
 223  .tagged_nofootprint = -1,
 224  .tagged_footprint = -1,
 225  .tagged_nofootprint_compressed = -1,
 226  .tagged_footprint_compressed = -1,
 227  .network_volatile = -1,
 228  .network_nonvolatile = -1,
 229  .network_volatile_compressed = -1,
 230  .network_nonvolatile_compressed = -1,
 231  .media_nofootprint = -1,
 232  .media_footprint = -1,
 233  .media_nofootprint_compressed = -1,
 234  .media_footprint_compressed = -1,
 235  .graphics_nofootprint = -1,
 236  .graphics_footprint = -1,
 237  .graphics_nofootprint_compressed = -1,
 238  .graphics_footprint_compressed = -1,
 239  .neural_nofootprint = -1,
 240  .neural_footprint = -1,
 241  .neural_nofootprint_compressed = -1,
 242  .neural_footprint_compressed = -1,
 243  .platform_idle_wakeups = -1,
 244  .interrupt_wakeups = -1,
 245 #if !CONFIG_EMBEDDED
 246  .sfi_wait_times = { 0 /* initialized at runtime */},
 247 #endif /* !CONFIG_EMBEDDED */
 248  .cpu_time_billed_to_me = -1,
 249  .cpu_time_billed_to_others = -1,
 250  .physical_writes = -1,
 251  .logical_writes = -1,
 252  .logical_writes_to_external = -1,
 253 #if DEBUG || DEVELOPMENT
 254  .pages_grabbed = -1,
 255  .pages_grabbed_kern = -1,
 256  .pages_grabbed_iopl = -1,
 257  .pages_grabbed_upl = -1,
 258 #endif
 259  .energy_billed_to_me = -1,
 260  .energy_billed_to_others = -1};
 261
 262 /* System sleep state */
 263 boolean_t tasks_suspend_state;
 264
 265
 266 void init_task_ledgers(void);
 267 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 268 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 269 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
 270 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
 271 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
 272 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
 273
 274 kern_return_t task_suspend_internal(task_t);
 275 kern_return_t task_resume_internal(task_t);
 276 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
 277
 278 extern kern_return_t iokit_task_terminate(task_t task);
 279 extern void          iokit_task_app_suspended_changed(task_t task);
 280
 281 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
 282 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
 283 extern kern_return_t thread_resume(thread_t thread);
 284
 285 // Warn tasks when they hit 80% of their memory limit.
 286 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
 287
 288 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT              150 /* wakeups per second */
 289 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL   300 /* in seconds. */
 290
 291 /*
 292  * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
 293  *
 294  * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
 295  *  stacktraces, aka micro-stackshots)
 296  */
 297 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER        70
 298
 299 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
 300 int task_wakeups_monitor_rate;     /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
 301
 302 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 303
 304 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
 305
 306 ledger_amount_t max_task_footprint = 0;  /* Per-task limit on physical memory consumption in bytes     */
 307 int max_task_footprint_warning_level = 0;  /* Per-task limit warning percentage */
 308 int max_task_footprint_mb = 0;  /* Per-task limit on physical memory consumption in megabytes */
 309
 310 /* I/O Monitor Limits */
 311 #define IOMON_DEFAULT_LIMIT                     (20480ull)      /* MB of logical/physical I/O */
 312 #define IOMON_DEFAULT_INTERVAL                  (86400ull)      /* in seconds */
 313
 314 uint64_t task_iomon_limit_mb;           /* Per-task I/O monitor limit in MBs */
 315 uint64_t task_iomon_interval_secs;      /* Per-task I/O monitor interval in secs */
 316
 317 #define IO_TELEMETRY_DEFAULT_LIMIT              (10ll * 1024ll * 1024ll)
 318 int64_t io_telemetry_limit;                     /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
 319 int64_t global_logical_writes_count = 0;        /* Global count for logical writes */
 320 int64_t global_logical_writes_to_external_count = 0;        /* Global count for logical writes to external storage*/
 321 static boolean_t global_update_logical_writes(int64_t, int64_t*);
 322
 323 #define TASK_MAX_THREAD_LIMIT 256
 324
 325 #if MACH_ASSERT
 326 int pmap_ledgers_panic = 1;
 327 int pmap_ledgers_panic_leeway = 3;
 328 #endif /* MACH_ASSERT */
 329
 330 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
 331
 332 #if CONFIG_COREDUMP
 333 int hwm_user_cores = 0; /* high watermark violations generate user core files */
 334 #endif
 335
 336 #ifdef MACH_BSD
 337 extern uint32_t proc_platform(struct proc *);
 338 extern uint32_t proc_sdk(struct proc *);
 339 extern void     proc_getexecutableuuid(void *, unsigned char *, unsigned long);
 340 extern int      proc_pid(struct proc *p);
 341 extern int      proc_selfpid(void);
 342 extern struct proc *current_proc(void);
 343 extern char     *proc_name_address(struct proc *p);
 344 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 345 extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, int bufsize);
 346 extern void workq_proc_suspended(struct proc *p);
 347 extern void workq_proc_resumed(struct proc *p);
 348
 349 #if CONFIG_MEMORYSTATUS
 350 extern void     proc_memstat_terminated(struct proc* p, boolean_t set);
 351 extern void     memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
 352 extern void     memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
 353 extern boolean_t memorystatus_allowed_vm_map_fork(task_t task);
 354 extern uint64_t  memorystatus_available_memory_internal(proc_t p);
 355
 356 #if DEVELOPMENT || DEBUG
 357 extern void memorystatus_abort_vm_map_fork(task_t);
 358 #endif
 359
 360 #endif /* CONFIG_MEMORYSTATUS */
 361
 362 #endif /* MACH_BSD */
 363
 364 #if DEVELOPMENT || DEBUG
 365 int exc_resource_threads_enabled;
 366 #endif /* DEVELOPMENT || DEBUG */
 367
 368 #if (DEVELOPMENT || DEBUG)
 369 uint32_t task_exc_guard_default = TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_ONCE | TASK_EXC_GUARD_MP_CORPSE |
 370     TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_ONCE | TASK_EXC_GUARD_VM_CORPSE;
 371 #else
 372 uint32_t task_exc_guard_default = 0;
 373 #endif
 374
 375 /* Forwards */
 376
 377 static void task_hold_locked(task_t task);
 378 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
 379 static void task_release_locked(task_t task);
 380
 381 static void task_synchronizer_destroy_all(task_t task);
 382 static os_ref_count_t
 383 task_add_turnstile_watchports_locked(
 384         task_t                      task,
 385         struct task_watchports      *watchports,
 386         struct task_watchport_elem  **previous_elem_array,
 387         ipc_port_t                  *portwatch_ports,
 388         uint32_t                    portwatch_count);
 389
 390 static os_ref_count_t
 391 task_remove_turnstile_watchports_locked(
 392         task_t                 task,
 393         struct task_watchports *watchports,
 394         ipc_port_t             *port_freelist);
 395
 396 static struct task_watchports *
 397 task_watchports_alloc_init(
 398         task_t        task,
 399         thread_t      thread,
 400         uint32_t      count);
 401
 402 static void
 403 task_watchports_deallocate(
 404         struct task_watchports *watchports);
 405
 406 void
 407 task_set_64bit(
 408         task_t task,
 409         boolean_t is_64bit,
 410         boolean_t is_64bit_data)
 411 {
 412 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
 413         thread_t thread;
 414 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
 415
 416         task_lock(task);
 417
 418         /*
 419          * Switching to/from 64-bit address spaces
 420          */
 421         if (is_64bit) {
 422                 if (!task_has_64Bit_addr(task)) {
 423                         task_set_64Bit_addr(task);
 424                 }
 425         } else {
 426                 if (task_has_64Bit_addr(task)) {
 427                         task_clear_64Bit_addr(task);
 428                 }
 429         }
 430
 431         /*
 432          * Switching to/from 64-bit register state.
 433          */
 434         if (is_64bit_data) {
 435                 if (task_has_64Bit_data(task)) {
 436                         goto out;
 437                 }
 438
 439                 task_set_64Bit_data(task);
 440         } else {
 441                 if (!task_has_64Bit_data(task)) {
 442                         goto out;
 443                 }
 444
 445                 task_clear_64Bit_data(task);
 446         }
 447
 448         /* FIXME: On x86, the thread save state flavor can diverge from the
 449          * task's 64-bit feature flag due to the 32-bit/64-bit register save
 450          * state dichotomy. Since we can be pre-empted in this interval,
 451          * certain routines may observe the thread as being in an inconsistent
 452          * state with respect to its task's 64-bitness.
 453          */
 454
 455 #if defined(__x86_64__) || defined(__arm64__)
 456         queue_iterate(&task->threads, thread, thread_t, task_threads) {
 457                 thread_mtx_lock(thread);
 458                 machine_thread_switch_addrmode(thread);
 459                 thread_mtx_unlock(thread);
 460
 461 #if defined(__arm64__)
 462                 /* specifically, if running on H9 */
 463                 if (thread == current_thread()) {
 464                         uint64_t arg1, arg2;
 465                         int urgency;
 466                         spl_t spl = splsched();
 467                         /*
 468                          * This call tell that the current thread changed it's 32bitness.
 469                          * Other thread were no more on core when 32bitness was changed,
 470                          * but current_thread() is on core and the previous call to
 471                          * machine_thread_going_on_core() gave 32bitness which is now wrong.
 472                          *
 473                          * This is needed for bring-up, a different callback should be used
 474                          * in the future.
 475                          *
 476                          * TODO: Remove this callout when we no longer support 32-bit code on H9
 477                          */
 478                         thread_lock(thread);
 479                         urgency = thread_get_urgency(thread, &arg1, &arg2);
 480                         machine_thread_going_on_core(thread, urgency, 0, 0, mach_approximate_time());
 481                         thread_unlock(thread);
 482                         splx(spl);
 483                 }
 484 #endif /* defined(__arm64__) */
 485         }
 486 #endif /* defined(__x86_64__) || defined(__arm64__) */
 487
 488 out:
 489         task_unlock(task);
 490 }
 491
 492 boolean_t
 493 task_get_64bit_data(task_t task)
 494 {
 495         return task_has_64Bit_data(task);
 496 }
 497
 498 void
 499 task_set_platform_binary(
 500         task_t task,
 501         boolean_t is_platform)
 502 {
 503         task_lock(task);
 504         if (is_platform) {
 505                 task->t_flags |= TF_PLATFORM;
 506                 /* set exc guard default behavior for first-party code */
 507                 task->task_exc_guard = (task_exc_guard_default & TASK_EXC_GUARD_ALL);
 508         } else {
 509                 task->t_flags &= ~(TF_PLATFORM);
 510                 /* set exc guard default behavior for third-party code */
 511                 task->task_exc_guard = ((task_exc_guard_default >> TASK_EXC_GUARD_THIRD_PARTY_DEFAULT_SHIFT) & TASK_EXC_GUARD_ALL);
 512         }
 513         task_unlock(task);
 514 }
 515
 516 /*
 517  * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
 518  * Returns "false" if flag is already set, and "true" in other cases.
 519  */
 520 bool
 521 task_set_ca_client_wi(
 522         task_t task,
 523         boolean_t set_or_clear)
 524 {
 525         bool ret = true;
 526         task_lock(task);
 527         if (set_or_clear) {
 528                 /* Tasks can have only one CA_CLIENT work interval */
 529                 if (task->t_flags & TF_CA_CLIENT_WI) {
 530                         ret = false;
 531                 } else {
 532                         task->t_flags |= TF_CA_CLIENT_WI;
 533                 }
 534         } else {
 535                 task->t_flags &= ~TF_CA_CLIENT_WI;
 536         }
 537         task_unlock(task);
 538         return ret;
 539 }
 540
 541 void
 542 task_set_dyld_info(
 543         task_t task,
 544         mach_vm_address_t addr,
 545         mach_vm_size_t size)
 546 {
 547         task_lock(task);
 548         task->all_image_info_addr = addr;
 549         task->all_image_info_size = size;
 550         task_unlock(task);
 551 }
 552
 553 void
 554 task_set_mach_header_address(
 555         task_t task,
 556         mach_vm_address_t addr)
 557 {
 558         task_lock(task);
 559         task->mach_header_vm_address = addr;
 560         task_unlock(task);
 561 }
 562
 563 void
 564 task_atm_reset(__unused task_t task)
 565 {
 566 #if CONFIG_ATM
 567         if (task->atm_context != NULL) {
 568                 atm_task_descriptor_destroy(task->atm_context);
 569                 task->atm_context = NULL;
 570         }
 571 #endif
 572 }
 573
 574 void
 575 task_bank_reset(__unused task_t task)
 576 {
 577         if (task->bank_context != NULL) {
 578                 bank_task_destroy(task);
 579         }
 580 }
 581
 582 /*
 583  * NOTE: This should only be called when the P_LINTRANSIT
 584  *       flag is set (the proc_trans lock is held) on the
 585  *       proc associated with the task.
 586  */
 587 void
 588 task_bank_init(__unused task_t task)
 589 {
 590         if (task->bank_context != NULL) {
 591                 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
 592         }
 593         bank_task_initialize(task);
 594 }
 595
 596 void
 597 task_set_did_exec_flag(task_t task)
 598 {
 599         task->t_procflags |= TPF_DID_EXEC;
 600 }
 601
 602 void
 603 task_clear_exec_copy_flag(task_t task)
 604 {
 605         task->t_procflags &= ~TPF_EXEC_COPY;
 606 }
 607
 608 event_t
 609 task_get_return_wait_event(task_t task)
 610 {
 611         return (event_t)&task->returnwait_inheritor;
 612 }
 613
 614 void
 615 task_clear_return_wait(task_t task, uint32_t flags)
 616 {
 617         if (flags & TCRW_CLEAR_INITIAL_WAIT) {
 618                 thread_wakeup(task_get_return_wait_event(task));
 619         }
 620
 621         if (flags & TCRW_CLEAR_FINAL_WAIT) {
 622                 is_write_lock(task->itk_space);
 623
 624                 task->t_returnwaitflags &= ~TRW_LRETURNWAIT;
 625                 task->returnwait_inheritor = NULL;
 626
 627                 if (task->t_returnwaitflags & TRW_LRETURNWAITER) {
 628                         struct turnstile *turnstile = turnstile_prepare((uintptr_t) task_get_return_wait_event(task),
 629                             NULL, TURNSTILE_NULL, TURNSTILE_ULOCK);
 630
 631                         waitq_wakeup64_all(&turnstile->ts_waitq,
 632                             CAST_EVENT64_T(task_get_return_wait_event(task)),
 633                             THREAD_AWAKENED, 0);
 634
 635                         turnstile_update_inheritor(turnstile, NULL,
 636                             TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD);
 637                         turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_HELD);
 638
 639                         turnstile_complete((uintptr_t) task_get_return_wait_event(task), NULL, NULL, TURNSTILE_ULOCK);
 640                         turnstile_cleanup();
 641                         task->t_returnwaitflags &= ~TRW_LRETURNWAITER;
 642                 }
 643                 is_write_unlock(task->itk_space);
 644         }
 645 }
 646
 647 void __attribute__((noreturn))
 648 task_wait_to_return(void)
 649 {
 650         task_t task = current_task();
 651
 652         is_write_lock(task->itk_space);
 653
 654         if (task->t_returnwaitflags & TRW_LRETURNWAIT) {
 655                 struct turnstile *turnstile = turnstile_prepare((uintptr_t) task_get_return_wait_event(task),
 656                     NULL, TURNSTILE_NULL, TURNSTILE_ULOCK);
 657
 658                 do {
 659                         task->t_returnwaitflags |= TRW_LRETURNWAITER;
 660                         turnstile_update_inheritor(turnstile, task->returnwait_inheritor,
 661                             (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
 662
 663                         waitq_assert_wait64(&turnstile->ts_waitq,
 664                             CAST_EVENT64_T(task_get_return_wait_event(task)),
 665                             THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
 666
 667                         is_write_unlock(task->itk_space);
 668
 669                         turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
 670
 671                         thread_block(THREAD_CONTINUE_NULL);
 672
 673                         is_write_lock(task->itk_space);
 674                 } while (task->t_returnwaitflags & TRW_LRETURNWAIT);
 675
 676                 turnstile_complete((uintptr_t) task_get_return_wait_event(task), NULL, NULL, TURNSTILE_ULOCK);
 677         }
 678
 679         is_write_unlock(task->itk_space);
 680         turnstile_cleanup();
 681
 682
 683 #if CONFIG_MACF
 684         /*
 685          * Before jumping to userspace and allowing this process to execute any code,
 686          * notify any interested parties.
 687          */
 688         mac_proc_notify_exec_complete(current_proc());
 689 #endif
 690
 691         thread_bootstrap_return();
 692 }
 693
 694 #ifdef CONFIG_32BIT_TELEMETRY
 695 boolean_t
 696 task_consume_32bit_log_flag(task_t task)
 697 {
 698         if ((task->t_procflags & TPF_LOG_32BIT_TELEMETRY) != 0) {
 699                 task->t_procflags &= ~TPF_LOG_32BIT_TELEMETRY;
 700                 return TRUE;
 701         } else {
 702                 return FALSE;
 703         }
 704 }
 705
 706 void
 707 task_set_32bit_log_flag(task_t task)
 708 {
 709         task->t_procflags |= TPF_LOG_32BIT_TELEMETRY;
 710 }
 711 #endif /* CONFIG_32BIT_TELEMETRY */
 712
 713 boolean_t
 714 task_is_exec_copy(task_t task)
 715 {
 716         return task_is_exec_copy_internal(task);
 717 }
 718
 719 boolean_t
 720 task_did_exec(task_t task)
 721 {
 722         return task_did_exec_internal(task);
 723 }
 724
 725 boolean_t
 726 task_is_active(task_t task)
 727 {
 728         return task->active;
 729 }
 730
 731 boolean_t
 732 task_is_halting(task_t task)
 733 {
 734         return task->halting;
 735 }
 736
 737 #if TASK_REFERENCE_LEAK_DEBUG
 738 #include <kern/btlog.h>
 739
 740 static btlog_t *task_ref_btlog;
 741 #define TASK_REF_OP_INCR        0x1
 742 #define TASK_REF_OP_DECR        0x2
 743
 744 #define TASK_REF_NUM_RECORDS    100000
 745 #define TASK_REF_BTDEPTH        7
 746
 747 void
 748 task_reference_internal(task_t task)
 749 {
 750         void *       bt[TASK_REF_BTDEPTH];
 751         int             numsaved = 0;
 752
 753         os_ref_retain(&task->ref_count);
 754
 755         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 756         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
 757             bt, numsaved);
 758 }
 759
 760 os_ref_count_t
 761 task_deallocate_internal(task_t task)
 762 {
 763         void *       bt[TASK_REF_BTDEPTH];
 764         int             numsaved = 0;
 765
 766         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 767         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
 768             bt, numsaved);
 769
 770         return os_ref_release(&task->ref_count);
 771 }
 772
 773 #endif /* TASK_REFERENCE_LEAK_DEBUG */
 774
 775 void
 776 task_init(void)
 777 {
 778         lck_grp_attr_setdefault(&task_lck_grp_attr);
 779         lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
 780         lck_attr_setdefault(&task_lck_attr);
 781         lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
 782         lck_mtx_init(&tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
 783
 784         task_zone = zinit(
 785                 sizeof(struct task),
 786                 task_max * sizeof(struct task),
 787                 TASK_CHUNK * sizeof(struct task),
 788                 "tasks");
 789
 790         zone_change(task_zone, Z_NOENCRYPT, TRUE);
 791
 792 #if CONFIG_EMBEDDED
 793         task_watch_init();
 794 #endif /* CONFIG_EMBEDDED */
 795
 796         /*
 797          * Configure per-task memory limit.
 798          * The boot-arg is interpreted as Megabytes,
 799          * and takes precedence over the device tree.
 800          * Setting the boot-arg to 0 disables task limits.
 801          */
 802         if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
 803             sizeof(max_task_footprint_mb))) {
 804                 /*
 805                  * No limit was found in boot-args, so go look in the device tree.
 806                  */
 807                 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
 808                     sizeof(max_task_footprint_mb))) {
 809                         /*
 810                          * No limit was found in device tree.
 811                          */
 812                         max_task_footprint_mb = 0;
 813                 }
 814         }
 815
 816         if (max_task_footprint_mb != 0) {
 817 #if CONFIG_MEMORYSTATUS
 818                 if (max_task_footprint_mb < 50) {
 819                         printf("Warning: max_task_pmem %d below minimum.\n",
 820                             max_task_footprint_mb);
 821                         max_task_footprint_mb = 50;
 822                 }
 823                 printf("Limiting task physical memory footprint to %d MB\n",
 824                     max_task_footprint_mb);
 825
 826                 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
 827
 828                 /*
 829                  * Configure the per-task memory limit warning level.
 830                  * This is computed as a percentage.
 831                  */
 832                 max_task_footprint_warning_level = 0;
 833
 834                 if (max_mem < 0x40000000) {
 835                         /*
 836                          * On devices with < 1GB of memory:
 837                          *    -- set warnings to 50MB below the per-task limit.
 838                          */
 839                         if (max_task_footprint_mb > 50) {
 840                                 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
 841                         }
 842                 } else {
 843                         /*
 844                          * On devices with >= 1GB of memory:
 845                          *    -- set warnings to 100MB below the per-task limit.
 846                          */
 847                         if (max_task_footprint_mb > 100) {
 848                                 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
 849                         }
 850                 }
 851
 852                 /*
 853                  * Never allow warning level to land below the default.
 854                  */
 855                 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
 856                         max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
 857                 }
 858
 859                 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
 860
 861 #else
 862                 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
 863 #endif /* CONFIG_MEMORYSTATUS */
 864         }
 865
 866 #if DEVELOPMENT || DEBUG
 867         if (!PE_parse_boot_argn("exc_resource_threads",
 868             &exc_resource_threads_enabled,
 869             sizeof(exc_resource_threads_enabled))) {
 870                 exc_resource_threads_enabled = 1;
 871         }
 872         PE_parse_boot_argn("task_exc_guard_default",
 873             &task_exc_guard_default,
 874             sizeof(task_exc_guard_default));
 875 #endif /* DEVELOPMENT || DEBUG */
 876
 877 #if CONFIG_COREDUMP
 878         if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
 879             sizeof(hwm_user_cores))) {
 880                 hwm_user_cores = 0;
 881         }
 882 #endif
 883
 884         proc_init_cpumon_params();
 885
 886         if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof(task_wakeups_monitor_rate))) {
 887                 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
 888         }
 889
 890         if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof(task_wakeups_monitor_interval))) {
 891                 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
 892         }
 893
 894         if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
 895             sizeof(task_wakeups_monitor_ustackshots_trigger_pct))) {
 896                 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
 897         }
 898
 899         if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
 900             sizeof(disable_exc_resource))) {
 901                 disable_exc_resource = 0;
 902         }
 903
 904         if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof(task_iomon_limit_mb))) {
 905                 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
 906         }
 907
 908         if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof(task_iomon_interval_secs))) {
 909                 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
 910         }
 911
 912         if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof(io_telemetry_limit))) {
 913                 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
 914         }
 915
 916 /*
 917  * If we have coalitions, coalition_init() will call init_task_ledgers() as it
 918  * sets up the ledgers for the default coalition. If we don't have coalitions,
 919  * then we have to call it now.
 920  */
 921 #if CONFIG_COALITIONS
 922         assert(task_ledger_template);
 923 #else /* CONFIG_COALITIONS */
 924         init_task_ledgers();
 925 #endif /* CONFIG_COALITIONS */
 926
 927 #if TASK_REFERENCE_LEAK_DEBUG
 928         task_ref_btlog = btlog_create(TASK_REF_NUM_RECORDS, TASK_REF_BTDEPTH, TRUE /* caller_will_remove_entries_for_element? */);
 929         assert(task_ref_btlog);
 930 #endif
 931
 932         /*
 933          * Create the kernel task as the first task.
 934          */
 935 #ifdef __LP64__
 936         if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TRUE, TF_NONE, TPF_NONE, TWF_NONE, &kernel_task) != KERN_SUCCESS)
 937 #else
 938         if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, FALSE, TF_NONE, TPF_NONE, TWF_NONE, &kernel_task) != KERN_SUCCESS)
 939 #endif
 940         { panic("task_init\n");}
 941
 942 #if defined(HAS_APPLE_PAC)
 943         kernel_task->rop_pid = KERNEL_ROP_ID;
 944         // kernel_task never runs at EL0, but machine_thread_state_convert_from/to_user() relies on
 945         // disable_user_jop to be false for kernel threads (e.g. in exception delivery on thread_exception_daemon)
 946         ml_task_set_disable_user_jop(kernel_task, FALSE);
 947 #endif
 948
 949         vm_map_deallocate(kernel_task->map);
 950         kernel_task->map = kernel_map;
 951         lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
 952 }
 953
 954 /*
 955  * Create a task running in the kernel address space.  It may
 956  * have its own map of size mem_size and may have ipc privileges.
 957  */
 958 kern_return_t
 959 kernel_task_create(
 960         __unused task_t         parent_task,
 961         __unused vm_offset_t            map_base,
 962         __unused vm_size_t              map_size,
 963         __unused task_t         *child_task)
 964 {
 965         return KERN_INVALID_ARGUMENT;
 966 }
 967
 968 kern_return_t
 969 task_create(
 970         task_t                          parent_task,
 971         __unused ledger_port_array_t    ledger_ports,
 972         __unused mach_msg_type_number_t num_ledger_ports,
 973         __unused boolean_t              inherit_memory,
 974         __unused task_t                 *child_task)    /* OUT */
 975 {
 976         if (parent_task == TASK_NULL) {
 977                 return KERN_INVALID_ARGUMENT;
 978         }
 979
 980         /*
 981          * No longer supported: too many calls assume that a task has a valid
 982          * process attached.
 983          */
 984         return KERN_FAILURE;
 985 }
 986
 987 kern_return_t
 988 host_security_create_task_token(
 989         host_security_t                 host_security,
 990         task_t                          parent_task,
 991         __unused security_token_t       sec_token,
 992         __unused audit_token_t          audit_token,
 993         __unused host_priv_t            host_priv,
 994         __unused ledger_port_array_t    ledger_ports,
 995         __unused mach_msg_type_number_t num_ledger_ports,
 996         __unused boolean_t              inherit_memory,
 997         __unused task_t                 *child_task)    /* OUT */
 998 {
 999         if (parent_task == TASK_NULL) {
1000                 return KERN_INVALID_ARGUMENT;
1001         }
1002
1003         if (host_security == HOST_NULL) {
1004                 return KERN_INVALID_SECURITY;
1005         }
1006
1007         /*
1008          * No longer supported.
1009          */
1010         return KERN_FAILURE;
1011 }
1012
1013 /*
1014  * Task ledgers
1015  * ------------
1016  *
1017  * phys_footprint
1018  *   Physical footprint: This is the sum of:
1019  *     + (internal - alternate_accounting)
1020  *     + (internal_compressed - alternate_accounting_compressed)
1021  *     + iokit_mapped
1022  *     + purgeable_nonvolatile
1023  *     + purgeable_nonvolatile_compressed
1024  *     + page_table
1025  *
1026  * internal
1027  *   The task's anonymous memory, which on iOS is always resident.
1028  *
1029  * internal_compressed
1030  *   Amount of this task's internal memory which is held by the compressor.
1031  *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
1032  *   and could be either decompressed back into memory, or paged out to storage, depending
1033  *   on our implementation.
1034  *
1035  * iokit_mapped
1036  *   IOKit mappings: The total size of all IOKit mappings in this task, regardless of
1037  *    clean/dirty or internal/external state].
1038  *
1039  * alternate_accounting
1040  *   The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
1041  *   are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
1042  *   double counting.
1043  *
1044  * pages_grabbed
1045  *   pages_grabbed counts all page grabs in a task.  It is also broken out into three subtypes
1046  *   which track UPL, IOPL and Kernel page grabs.
1047  */
1048 void
1049 init_task_ledgers(void)
1050 {
1051         ledger_template_t t;
1052
1053         assert(task_ledger_template == NULL);
1054         assert(kernel_task == TASK_NULL);
1055
1056 #if MACH_ASSERT
1057         PE_parse_boot_argn("pmap_ledgers_panic",
1058             &pmap_ledgers_panic,
1059             sizeof(pmap_ledgers_panic));
1060         PE_parse_boot_argn("pmap_ledgers_panic_leeway",
1061             &pmap_ledgers_panic_leeway,
1062             sizeof(pmap_ledgers_panic_leeway));
1063 #endif /* MACH_ASSERT */
1064
1065         if ((t = ledger_template_create("Per-task ledger")) == NULL) {
1066                 panic("couldn't create task ledger template");
1067         }
1068
1069         task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
1070         task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
1071             "physmem", "bytes");
1072         task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
1073             "bytes");
1074         task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
1075             "bytes");
1076         task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
1077             "bytes");
1078         task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
1079             "bytes");
1080         task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
1081             "bytes");
1082         task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
1083             "bytes");
1084         task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
1085             "bytes");
1086         task_ledgers.page_table = ledger_entry_add(t, "page_table", "physmem",
1087             "bytes");
1088         task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
1089             "bytes");
1090         task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
1091             "bytes");
1092         task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
1093         task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
1094         task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
1095         task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
1096 #if DEBUG || DEVELOPMENT
1097         task_ledgers.pages_grabbed = ledger_entry_add(t, "pages_grabbed", "physmem", "count");
1098         task_ledgers.pages_grabbed_kern = ledger_entry_add(t, "pages_grabbed_kern", "physmem", "count");
1099         task_ledgers.pages_grabbed_iopl = ledger_entry_add(t, "pages_grabbed_iopl", "physmem", "count");
1100         task_ledgers.pages_grabbed_upl = ledger_entry_add(t, "pages_grabbed_upl", "physmem", "count");
1101 #endif
1102         task_ledgers.tagged_nofootprint = ledger_entry_add(t, "tagged_nofootprint", "physmem", "bytes");
1103         task_ledgers.tagged_footprint = ledger_entry_add(t, "tagged_footprint", "physmem", "bytes");
1104         task_ledgers.tagged_nofootprint_compressed = ledger_entry_add(t, "tagged_nofootprint_compressed", "physmem", "bytes");
1105         task_ledgers.tagged_footprint_compressed = ledger_entry_add(t, "tagged_footprint_compressed", "physmem", "bytes");
1106         task_ledgers.network_volatile = ledger_entry_add(t, "network_volatile", "physmem", "bytes");
1107         task_ledgers.network_nonvolatile = ledger_entry_add(t, "network_nonvolatile", "physmem", "bytes");
1108         task_ledgers.network_volatile_compressed = ledger_entry_add(t, "network_volatile_compressed", "physmem", "bytes");
1109         task_ledgers.network_nonvolatile_compressed = ledger_entry_add(t, "network_nonvolatile_compressed", "physmem", "bytes");
1110         task_ledgers.media_nofootprint = ledger_entry_add(t, "media_nofootprint", "physmem", "bytes");
1111         task_ledgers.media_footprint = ledger_entry_add(t, "media_footprint", "physmem", "bytes");
1112         task_ledgers.media_nofootprint_compressed = ledger_entry_add(t, "media_nofootprint_compressed", "physmem", "bytes");
1113         task_ledgers.media_footprint_compressed = ledger_entry_add(t, "media_footprint_compressed", "physmem", "bytes");
1114         task_ledgers.graphics_nofootprint = ledger_entry_add(t, "graphics_nofootprint", "physmem", "bytes");
1115         task_ledgers.graphics_footprint = ledger_entry_add(t, "graphics_footprint", "physmem", "bytes");
1116         task_ledgers.graphics_nofootprint_compressed = ledger_entry_add(t, "graphics_nofootprint_compressed", "physmem", "bytes");
1117         task_ledgers.graphics_footprint_compressed = ledger_entry_add(t, "graphics_footprint_compressed", "physmem", "bytes");
1118         task_ledgers.neural_nofootprint = ledger_entry_add(t, "neural_nofootprint", "physmem", "bytes");
1119         task_ledgers.neural_footprint = ledger_entry_add(t, "neural_footprint", "physmem", "bytes");
1120         task_ledgers.neural_nofootprint_compressed = ledger_entry_add(t, "neural_nofootprint_compressed", "physmem", "bytes");
1121         task_ledgers.neural_footprint_compressed = ledger_entry_add(t, "neural_footprint_compressed", "physmem", "bytes");
1122
1123
1124         task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
1125             "count");
1126         task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
1127             "count");
1128
1129 #if CONFIG_SCHED_SFI
1130         sfi_class_id_t class_id, ledger_alias;
1131         for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1132                 task_ledgers.sfi_wait_times[class_id] = -1;
1133         }
1134
1135         /* don't account for UNSPECIFIED */
1136         for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
1137                 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1138                 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1139                         /* Check to see if alias has been registered yet */
1140                         if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
1141                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1142                         } else {
1143                                 /* Otherwise, initialize it first */
1144                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
1145                         }
1146                 } else {
1147                         task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
1148                 }
1149
1150                 if (task_ledgers.sfi_wait_times[class_id] < 0) {
1151                         panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1152                 }
1153         }
1154
1155         assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID - 1] != -1);
1156 #endif /* CONFIG_SCHED_SFI */
1157
1158         task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
1159         task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
1160         task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
1161         task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
1162         task_ledgers.logical_writes_to_external = ledger_entry_add(t, "logical_writes_to_external", "res", "bytes");
1163         task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
1164         task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
1165
1166         if ((task_ledgers.cpu_time < 0) ||
1167             (task_ledgers.tkm_private < 0) ||
1168             (task_ledgers.tkm_shared < 0) ||
1169             (task_ledgers.phys_mem < 0) ||
1170             (task_ledgers.wired_mem < 0) ||
1171             (task_ledgers.internal < 0) ||
1172             (task_ledgers.iokit_mapped < 0) ||
1173             (task_ledgers.alternate_accounting < 0) ||
1174             (task_ledgers.alternate_accounting_compressed < 0) ||
1175             (task_ledgers.page_table < 0) ||
1176             (task_ledgers.phys_footprint < 0) ||
1177             (task_ledgers.internal_compressed < 0) ||
1178             (task_ledgers.purgeable_volatile < 0) ||
1179             (task_ledgers.purgeable_nonvolatile < 0) ||
1180             (task_ledgers.purgeable_volatile_compressed < 0) ||
1181             (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
1182             (task_ledgers.tagged_nofootprint < 0) ||
1183             (task_ledgers.tagged_footprint < 0) ||
1184             (task_ledgers.tagged_nofootprint_compressed < 0) ||
1185             (task_ledgers.tagged_footprint_compressed < 0) ||
1186             (task_ledgers.network_volatile < 0) ||
1187             (task_ledgers.network_nonvolatile < 0) ||
1188             (task_ledgers.network_volatile_compressed < 0) ||
1189             (task_ledgers.network_nonvolatile_compressed < 0) ||
1190             (task_ledgers.media_nofootprint < 0) ||
1191             (task_ledgers.media_footprint < 0) ||
1192             (task_ledgers.media_nofootprint_compressed < 0) ||
1193             (task_ledgers.media_footprint_compressed < 0) ||
1194             (task_ledgers.graphics_nofootprint < 0) ||
1195             (task_ledgers.graphics_footprint < 0) ||
1196             (task_ledgers.graphics_nofootprint_compressed < 0) ||
1197             (task_ledgers.graphics_footprint_compressed < 0) ||
1198             (task_ledgers.neural_nofootprint < 0) ||
1199             (task_ledgers.neural_footprint < 0) ||
1200             (task_ledgers.neural_nofootprint_compressed < 0) ||
1201             (task_ledgers.neural_footprint_compressed < 0) ||
1202             (task_ledgers.platform_idle_wakeups < 0) ||
1203             (task_ledgers.interrupt_wakeups < 0) ||
1204             (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
1205             (task_ledgers.physical_writes < 0) ||
1206             (task_ledgers.logical_writes < 0) ||
1207             (task_ledgers.logical_writes_to_external < 0) ||
1208             (task_ledgers.energy_billed_to_me < 0) ||
1209             (task_ledgers.energy_billed_to_others < 0)
1210             ) {
1211                 panic("couldn't create entries for task ledger template");
1212         }
1213
1214         ledger_track_credit_only(t, task_ledgers.phys_footprint);
1215         ledger_track_credit_only(t, task_ledgers.page_table);
1216         ledger_track_credit_only(t, task_ledgers.internal);
1217         ledger_track_credit_only(t, task_ledgers.internal_compressed);
1218         ledger_track_credit_only(t, task_ledgers.iokit_mapped);
1219         ledger_track_credit_only(t, task_ledgers.alternate_accounting);
1220         ledger_track_credit_only(t, task_ledgers.alternate_accounting_compressed);
1221         ledger_track_credit_only(t, task_ledgers.purgeable_volatile);
1222         ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile);
1223         ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed);
1224         ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed);
1225 #if DEBUG || DEVELOPMENT
1226         ledger_track_credit_only(t, task_ledgers.pages_grabbed);
1227         ledger_track_credit_only(t, task_ledgers.pages_grabbed_kern);
1228         ledger_track_credit_only(t, task_ledgers.pages_grabbed_iopl);
1229         ledger_track_credit_only(t, task_ledgers.pages_grabbed_upl);
1230 #endif
1231         ledger_track_credit_only(t, task_ledgers.tagged_nofootprint);
1232         ledger_track_credit_only(t, task_ledgers.tagged_footprint);
1233         ledger_track_credit_only(t, task_ledgers.tagged_nofootprint_compressed);
1234         ledger_track_credit_only(t, task_ledgers.tagged_footprint_compressed);
1235         ledger_track_credit_only(t, task_ledgers.network_volatile);
1236         ledger_track_credit_only(t, task_ledgers.network_nonvolatile);
1237         ledger_track_credit_only(t, task_ledgers.network_volatile_compressed);
1238         ledger_track_credit_only(t, task_ledgers.network_nonvolatile_compressed);
1239         ledger_track_credit_only(t, task_ledgers.media_nofootprint);
1240         ledger_track_credit_only(t, task_ledgers.media_footprint);
1241         ledger_track_credit_only(t, task_ledgers.media_nofootprint_compressed);
1242         ledger_track_credit_only(t, task_ledgers.media_footprint_compressed);
1243         ledger_track_credit_only(t, task_ledgers.graphics_nofootprint);
1244         ledger_track_credit_only(t, task_ledgers.graphics_footprint);
1245         ledger_track_credit_only(t, task_ledgers.graphics_nofootprint_compressed);
1246         ledger_track_credit_only(t, task_ledgers.graphics_footprint_compressed);
1247         ledger_track_credit_only(t, task_ledgers.neural_nofootprint);
1248         ledger_track_credit_only(t, task_ledgers.neural_footprint);
1249         ledger_track_credit_only(t, task_ledgers.neural_nofootprint_compressed);
1250         ledger_track_credit_only(t, task_ledgers.neural_footprint_compressed);
1251
1252         ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
1253 #if MACH_ASSERT
1254         if (pmap_ledgers_panic) {
1255                 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
1256                 ledger_panic_on_negative(t, task_ledgers.page_table);
1257                 ledger_panic_on_negative(t, task_ledgers.internal);
1258                 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
1259                 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1260                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1261                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1262                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1263                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1264                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1265                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1266
1267                 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint);
1268                 ledger_panic_on_negative(t, task_ledgers.tagged_footprint);
1269                 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint_compressed);
1270                 ledger_panic_on_negative(t, task_ledgers.tagged_footprint_compressed);
1271                 ledger_panic_on_negative(t, task_ledgers.network_volatile);
1272                 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1273                 ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1274                 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
1275                 ledger_panic_on_negative(t, task_ledgers.media_nofootprint);
1276                 ledger_panic_on_negative(t, task_ledgers.media_footprint);
1277                 ledger_panic_on_negative(t, task_ledgers.media_nofootprint_compressed);
1278                 ledger_panic_on_negative(t, task_ledgers.media_footprint_compressed);
1279                 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint);
1280                 ledger_panic_on_negative(t, task_ledgers.graphics_footprint);
1281                 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint_compressed);
1282                 ledger_panic_on_negative(t, task_ledgers.graphics_footprint_compressed);
1283                 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint);
1284                 ledger_panic_on_negative(t, task_ledgers.neural_footprint);
1285                 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint_compressed);
1286                 ledger_panic_on_negative(t, task_ledgers.neural_footprint_compressed);
1287         }
1288 #endif /* MACH_ASSERT */
1289
1290 #if CONFIG_MEMORYSTATUS
1291         ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
1292 #endif /* CONFIG_MEMORYSTATUS */
1293
1294         ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1295             task_wakeups_rate_exceeded, NULL, NULL);
1296         ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1297
1298         ledger_template_complete(t);
1299         task_ledger_template = t;
1300 }
1301
1302 os_refgrp_decl(static, task_refgrp, "task", NULL);
1303
1304 kern_return_t
1305 task_create_internal(
1306         task_t          parent_task,
1307         coalition_t     *parent_coalitions __unused,
1308         boolean_t       inherit_memory,
1309         __unused boolean_t      is_64bit,
1310         boolean_t is_64bit_data,
1311         uint32_t        t_flags,
1312         uint32_t        t_procflags,
1313         uint8_t         t_returnwaitflags,
1314         task_t          *child_task)            /* OUT */
1315 {
1316         task_t                  new_task;
1317         vm_shared_region_t      shared_region;
1318         ledger_t                ledger = NULL;
1319
1320         new_task = (task_t) zalloc(task_zone);
1321
1322         if (new_task == TASK_NULL) {
1323                 return KERN_RESOURCE_SHORTAGE;
1324         }
1325
1326         /* one ref for just being alive; one for our caller */
1327         os_ref_init_count(&new_task->ref_count, &task_refgrp, 2);
1328
1329         /* allocate with active entries */
1330         assert(task_ledger_template != NULL);
1331         if ((ledger = ledger_instantiate(task_ledger_template,
1332             LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
1333                 zfree(task_zone, new_task);
1334                 return KERN_RESOURCE_SHORTAGE;
1335         }
1336
1337 #if defined(HAS_APPLE_PAC)
1338         ml_task_set_rop_pid(new_task, parent_task, inherit_memory);
1339         ml_task_set_disable_user_jop(new_task, inherit_memory ? parent_task->disable_user_jop : FALSE);
1340 #endif
1341
1342         new_task->ledger = ledger;
1343
1344 #if defined(CONFIG_SCHED_MULTIQ)
1345         new_task->sched_group = sched_group_create();
1346 #endif
1347
1348         /* if inherit_memory is true, parent_task MUST not be NULL */
1349         if (!(t_flags & TF_CORPSE_FORK) && inherit_memory) {
1350                 new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1351         } else {
1352                 unsigned int pmap_flags = is_64bit ? PMAP_CREATE_64BIT : 0;
1353                 new_task->map = vm_map_create(pmap_create_options(ledger, 0, pmap_flags),
1354                     (vm_map_offset_t)(VM_MIN_ADDRESS),
1355                     (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
1356         }
1357
1358         /* Inherit memlock limit from parent */
1359         if (parent_task) {
1360                 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
1361         }
1362
1363         lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1364         queue_init(&new_task->threads);
1365         new_task->suspend_count = 0;
1366         new_task->thread_count = 0;
1367         new_task->active_thread_count = 0;
1368         new_task->user_stop_count = 0;
1369         new_task->legacy_stop_count = 0;
1370         new_task->active = TRUE;
1371         new_task->halting = FALSE;
1372         new_task->priv_flags = 0;
1373         new_task->t_flags = t_flags;
1374         new_task->t_procflags = t_procflags;
1375         new_task->t_returnwaitflags = t_returnwaitflags;
1376         new_task->returnwait_inheritor = current_thread();
1377         new_task->importance = 0;
1378         new_task->crashed_thread_id = 0;
1379         new_task->exec_token = 0;
1380         new_task->watchports = NULL;
1381         new_task->restartable_ranges = NULL;
1382         new_task->task_exc_guard = 0;
1383
1384 #if CONFIG_ATM
1385         new_task->atm_context = NULL;
1386 #endif
1387         new_task->bank_context = NULL;
1388
1389 #ifdef MACH_BSD
1390         new_task->bsd_info = NULL;
1391         new_task->corpse_info = NULL;
1392 #endif /* MACH_BSD */
1393
1394 #if CONFIG_MACF
1395         new_task->crash_label = NULL;
1396 #endif
1397
1398 #if CONFIG_MEMORYSTATUS
1399         if (max_task_footprint != 0) {
1400                 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1401         }
1402 #endif /* CONFIG_MEMORYSTATUS */
1403
1404         if (task_wakeups_monitor_rate != 0) {
1405                 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1406                 int32_t  rate; // Ignored because of WAKEMON_SET_DEFAULTS
1407                 task_wakeups_monitor_ctl(new_task, &flags, &rate);
1408         }
1409
1410 #if CONFIG_IO_ACCOUNTING
1411         uint32_t flags = IOMON_ENABLE;
1412         task_io_monitor_ctl(new_task, &flags);
1413 #endif /* CONFIG_IO_ACCOUNTING */
1414
1415         machine_task_init(new_task, parent_task, inherit_memory);
1416
1417         new_task->task_debug = NULL;
1418
1419 #if DEVELOPMENT || DEBUG
1420         new_task->task_unnested = FALSE;
1421         new_task->task_disconnected_count = 0;
1422 #endif
1423         queue_init(&new_task->semaphore_list);
1424         new_task->semaphores_owned = 0;
1425
1426         ipc_task_init(new_task, parent_task);
1427
1428         new_task->vtimers = 0;
1429
1430         new_task->shared_region = NULL;
1431
1432         new_task->affinity_space = NULL;
1433
1434         new_task->t_kpc = 0;
1435
1436         new_task->pidsuspended = FALSE;
1437         new_task->frozen = FALSE;
1438         new_task->changing_freeze_state = FALSE;
1439         new_task->rusage_cpu_flags = 0;
1440         new_task->rusage_cpu_percentage = 0;
1441         new_task->rusage_cpu_interval = 0;
1442         new_task->rusage_cpu_deadline = 0;
1443         new_task->rusage_cpu_callt = NULL;
1444 #if MACH_ASSERT
1445         new_task->suspends_outstanding = 0;
1446 #endif
1447
1448 #if HYPERVISOR
1449         new_task->hv_task_target = NULL;
1450 #endif /* HYPERVISOR */
1451
1452 #if CONFIG_EMBEDDED
1453         queue_init(&new_task->task_watchers);
1454         new_task->num_taskwatchers  = 0;
1455         new_task->watchapplying  = 0;
1456 #endif /* CONFIG_EMBEDDED */
1457
1458         new_task->mem_notify_reserved = 0;
1459         new_task->memlimit_attrs_reserved = 0;
1460
1461         new_task->requested_policy = default_task_requested_policy;
1462         new_task->effective_policy = default_task_effective_policy;
1463
1464         task_importance_init_from_parent(new_task, parent_task);
1465
1466         if (parent_task != TASK_NULL) {
1467                 new_task->sec_token = parent_task->sec_token;
1468                 new_task->audit_token = parent_task->audit_token;
1469
1470                 /* inherit the parent's shared region */
1471                 shared_region = vm_shared_region_get(parent_task);
1472                 vm_shared_region_set(new_task, shared_region);
1473
1474                 if (task_has_64Bit_addr(parent_task)) {
1475                         task_set_64Bit_addr(new_task);
1476                 }
1477
1478                 if (task_has_64Bit_data(parent_task)) {
1479                         task_set_64Bit_data(new_task);
1480                 }
1481
1482                 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1483                 new_task->all_image_info_size = parent_task->all_image_info_size;
1484                 new_task->mach_header_vm_address = 0;
1485
1486                 if (inherit_memory && parent_task->affinity_space) {
1487                         task_affinity_create(parent_task, new_task);
1488                 }
1489
1490                 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1491
1492 #if DEBUG || DEVELOPMENT
1493                 if (parent_task->t_flags & TF_NO_SMT) {
1494                         new_task->t_flags |= TF_NO_SMT;
1495                 }
1496 #endif
1497
1498                 new_task->priority = BASEPRI_DEFAULT;
1499                 new_task->max_priority = MAXPRI_USER;
1500
1501                 task_policy_create(new_task, parent_task);
1502         } else {
1503                 new_task->sec_token = KERNEL_SECURITY_TOKEN;
1504                 new_task->audit_token = KERNEL_AUDIT_TOKEN;
1505 #ifdef __LP64__
1506                 if (is_64bit) {
1507                         task_set_64Bit_addr(new_task);
1508                 }
1509 #endif
1510
1511                 if (is_64bit_data) {
1512                         task_set_64Bit_data(new_task);
1513                 }
1514
1515                 new_task->all_image_info_addr = (mach_vm_address_t)0;
1516                 new_task->all_image_info_size = (mach_vm_size_t)0;
1517
1518                 new_task->pset_hint = PROCESSOR_SET_NULL;
1519
1520                 if (kernel_task == TASK_NULL) {
1521                         new_task->priority = BASEPRI_KERNEL;
1522                         new_task->max_priority = MAXPRI_KERNEL;
1523                 } else {
1524                         new_task->priority = BASEPRI_DEFAULT;
1525                         new_task->max_priority = MAXPRI_USER;
1526                 }
1527         }
1528
1529         bzero(new_task->coalition, sizeof(new_task->coalition));
1530         for (int i = 0; i < COALITION_NUM_TYPES; i++) {
1531                 queue_chain_init(new_task->task_coalition[i]);
1532         }
1533
1534         /* Allocate I/O Statistics */
1535         new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1536         assert(new_task->task_io_stats != NULL);
1537         bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1538
1539         bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1540         bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
1541
1542         bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1543
1544         /* Copy resource acc. info from Parent for Corpe Forked task. */
1545         if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1546                 task_rollup_accounting_info(new_task, parent_task);
1547         } else {
1548                 /* Initialize to zero for standard fork/spawn case */
1549                 new_task->total_user_time = 0;
1550                 new_task->total_system_time = 0;
1551                 new_task->total_ptime = 0;
1552                 new_task->total_runnable_time = 0;
1553                 new_task->faults = 0;
1554                 new_task->pageins = 0;
1555                 new_task->cow_faults = 0;
1556                 new_task->messages_sent = 0;
1557                 new_task->messages_received = 0;
1558                 new_task->syscalls_mach = 0;
1559                 new_task->syscalls_unix = 0;
1560                 new_task->c_switch = 0;
1561                 new_task->p_switch = 0;
1562                 new_task->ps_switch = 0;
1563                 new_task->decompressions = 0;
1564                 new_task->low_mem_notified_warn = 0;
1565                 new_task->low_mem_notified_critical = 0;
1566                 new_task->purged_memory_warn = 0;
1567                 new_task->purged_memory_critical = 0;
1568                 new_task->low_mem_privileged_listener = 0;
1569                 new_task->memlimit_is_active = 0;
1570                 new_task->memlimit_is_fatal = 0;
1571                 new_task->memlimit_active_exc_resource = 0;
1572                 new_task->memlimit_inactive_exc_resource = 0;
1573                 new_task->task_timer_wakeups_bin_1 = 0;
1574                 new_task->task_timer_wakeups_bin_2 = 0;
1575                 new_task->task_gpu_ns = 0;
1576                 new_task->task_writes_counters_internal.task_immediate_writes = 0;
1577                 new_task->task_writes_counters_internal.task_deferred_writes = 0;
1578                 new_task->task_writes_counters_internal.task_invalidated_writes = 0;
1579                 new_task->task_writes_counters_internal.task_metadata_writes = 0;
1580                 new_task->task_writes_counters_external.task_immediate_writes = 0;
1581                 new_task->task_writes_counters_external.task_deferred_writes = 0;
1582                 new_task->task_writes_counters_external.task_invalidated_writes = 0;
1583                 new_task->task_writes_counters_external.task_metadata_writes = 0;
1584
1585                 new_task->task_energy = 0;
1586 #if MONOTONIC
1587                 memset(&new_task->task_monotonic, 0, sizeof(new_task->task_monotonic));
1588 #endif /* MONOTONIC */
1589         }
1590
1591
1592 #if CONFIG_COALITIONS
1593         if (!(t_flags & TF_CORPSE_FORK)) {
1594                 /* TODO: there is no graceful failure path here... */
1595                 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1596                         coalitions_adopt_task(parent_coalitions, new_task);
1597                 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1598                         /*
1599                          * all tasks at least have a resource coalition, so
1600                          * if the parent has one then inherit all coalitions
1601                          * the parent is a part of
1602                          */
1603                         coalitions_adopt_task(parent_task->coalition, new_task);
1604                 } else {
1605                         /* TODO: assert that new_task will be PID 1 (launchd) */
1606                         coalitions_adopt_init_task(new_task);
1607                 }
1608                 /*
1609                  * on exec, we need to transfer the coalition roles from the
1610                  * parent task to the exec copy task.
1611                  */
1612                 if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1613                         int coal_roles[COALITION_NUM_TYPES];
1614                         task_coalition_roles(parent_task, coal_roles);
1615                         (void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1616                 }
1617         } else {
1618                 coalitions_adopt_corpse_task(new_task);
1619         }
1620
1621         if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1622                 panic("created task is not a member of a resource coalition");
1623         }
1624 #endif /* CONFIG_COALITIONS */
1625
1626         new_task->dispatchqueue_offset = 0;
1627         if (parent_task != NULL) {
1628                 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1629         }
1630
1631         new_task->task_can_transfer_memory_ownership = FALSE;
1632         new_task->task_volatile_objects = 0;
1633         new_task->task_nonvolatile_objects = 0;
1634         new_task->task_objects_disowning = FALSE;
1635         new_task->task_objects_disowned = FALSE;
1636         new_task->task_owned_objects = 0;
1637         queue_init(&new_task->task_objq);
1638         task_objq_lock_init(new_task);
1639
1640 #if __arm64__
1641         new_task->task_legacy_footprint = FALSE;
1642         new_task->task_extra_footprint_limit = FALSE;
1643         new_task->task_ios13extended_footprint_limit = FALSE;
1644 #endif /* __arm64__ */
1645         new_task->task_region_footprint = FALSE;
1646         new_task->task_has_crossed_thread_limit = FALSE;
1647         new_task->task_thread_limit = 0;
1648 #if CONFIG_SECLUDED_MEMORY
1649         new_task->task_can_use_secluded_mem = FALSE;
1650         new_task->task_could_use_secluded_mem = FALSE;
1651         new_task->task_could_also_use_secluded_mem = FALSE;
1652         new_task->task_suppressed_secluded = FALSE;
1653 #endif /* CONFIG_SECLUDED_MEMORY */
1654
1655         /*
1656          * t_flags is set up above. But since we don't
1657          * support darkwake mode being set that way
1658          * currently, we clear it out here explicitly.
1659          */
1660         new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1661
1662         queue_init(&new_task->io_user_clients);
1663         new_task->loadTag = 0;
1664
1665         ipc_task_enable(new_task);
1666
1667         lck_mtx_lock(&tasks_threads_lock);
1668         queue_enter(&tasks, new_task, task_t, tasks);
1669         tasks_count++;
1670         if (tasks_suspend_state) {
1671                 task_suspend_internal(new_task);
1672         }
1673         lck_mtx_unlock(&tasks_threads_lock);
1674
1675         *child_task = new_task;
1676         return KERN_SUCCESS;
1677 }
1678
1679 /*
1680  *      task_rollup_accounting_info
1681  *
1682  *      Roll up accounting stats. Used to rollup stats
1683  *      for exec copy task and corpse fork.
1684  */
1685 void
1686 task_rollup_accounting_info(task_t to_task, task_t from_task)
1687 {
1688         assert(from_task != to_task);
1689
1690         to_task->total_user_time = from_task->total_user_time;
1691         to_task->total_system_time = from_task->total_system_time;
1692         to_task->total_ptime = from_task->total_ptime;
1693         to_task->total_runnable_time = from_task->total_runnable_time;
1694         to_task->faults = from_task->faults;
1695         to_task->pageins = from_task->pageins;
1696         to_task->cow_faults = from_task->cow_faults;
1697         to_task->decompressions = from_task->decompressions;
1698         to_task->messages_sent = from_task->messages_sent;
1699         to_task->messages_received = from_task->messages_received;
1700         to_task->syscalls_mach = from_task->syscalls_mach;
1701         to_task->syscalls_unix = from_task->syscalls_unix;
1702         to_task->c_switch = from_task->c_switch;
1703         to_task->p_switch = from_task->p_switch;
1704         to_task->ps_switch = from_task->ps_switch;
1705         to_task->extmod_statistics = from_task->extmod_statistics;
1706         to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1707         to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1708         to_task->purged_memory_warn = from_task->purged_memory_warn;
1709         to_task->purged_memory_critical = from_task->purged_memory_critical;
1710         to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1711         *to_task->task_io_stats = *from_task->task_io_stats;
1712         to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1713         to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1714         to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1715         to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1716         to_task->task_gpu_ns = from_task->task_gpu_ns;
1717         to_task->task_writes_counters_internal.task_immediate_writes = from_task->task_writes_counters_internal.task_immediate_writes;
1718         to_task->task_writes_counters_internal.task_deferred_writes = from_task->task_writes_counters_internal.task_deferred_writes;
1719         to_task->task_writes_counters_internal.task_invalidated_writes = from_task->task_writes_counters_internal.task_invalidated_writes;
1720         to_task->task_writes_counters_internal.task_metadata_writes = from_task->task_writes_counters_internal.task_metadata_writes;
1721         to_task->task_writes_counters_external.task_immediate_writes = from_task->task_writes_counters_external.task_immediate_writes;
1722         to_task->task_writes_counters_external.task_deferred_writes = from_task->task_writes_counters_external.task_deferred_writes;
1723         to_task->task_writes_counters_external.task_invalidated_writes = from_task->task_writes_counters_external.task_invalidated_writes;
1724         to_task->task_writes_counters_external.task_metadata_writes = from_task->task_writes_counters_external.task_metadata_writes;
1725         to_task->task_energy = from_task->task_energy;
1726
1727         /* Skip ledger roll up for memory accounting entries */
1728         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1729         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1730         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1731 #if CONFIG_SCHED_SFI
1732         for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1733                 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1734         }
1735 #endif
1736         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1737         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1738         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1739         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1740         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1741         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
1742 }
1743
1744 int task_dropped_imp_count = 0;
1745
1746 /*
1747  *      task_deallocate:
1748  *
1749  *      Drop a reference on a task.
1750  */
1751 void
1752 task_deallocate(
1753         task_t          task)
1754 {
1755         ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1756         os_ref_count_t refs;
1757
1758         if (task == TASK_NULL) {
1759                 return;
1760         }
1761
1762         refs = task_deallocate_internal(task);
1763
1764 #if IMPORTANCE_INHERITANCE
1765         if (refs == 1) {
1766                 /*
1767                  * If last ref potentially comes from the task's importance,
1768                  * disconnect it.  But more task refs may be added before
1769                  * that completes, so wait for the reference to go to zero
1770                  * naturally (it may happen on a recursive task_deallocate()
1771                  * from the ipc_importance_disconnect_task() call).
1772                  */
1773                 if (IIT_NULL != task->task_imp_base) {
1774                         ipc_importance_disconnect_task(task);
1775                 }
1776                 return;
1777         }
1778 #endif /* IMPORTANCE_INHERITANCE */
1779
1780         if (refs > 0) {
1781                 return;
1782         }
1783
1784         /*
1785          * The task should be dead at this point. Ensure other resources
1786          * like threads, are gone before we trash the world.
1787          */
1788         assert(queue_empty(&task->threads));
1789         assert(task->bsd_info == NULL);
1790         assert(!is_active(task->itk_space));
1791         assert(!task->active);
1792         assert(task->active_thread_count == 0);
1793
1794         lck_mtx_lock(&tasks_threads_lock);
1795         assert(terminated_tasks_count > 0);
1796         queue_remove(&terminated_tasks, task, task_t, tasks);
1797         terminated_tasks_count--;
1798         lck_mtx_unlock(&tasks_threads_lock);
1799
1800         /*
1801          * remove the reference on atm descriptor
1802          */
1803         task_atm_reset(task);
1804
1805         /*
1806          * remove the reference on bank context
1807          */
1808         task_bank_reset(task);
1809
1810         if (task->task_io_stats) {
1811                 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1812         }
1813
1814         /*
1815          *      Give the machine dependent code a chance
1816          *      to perform cleanup before ripping apart
1817          *      the task.
1818          */
1819         machine_task_terminate(task);
1820
1821         ipc_task_terminate(task);
1822
1823         /* let iokit know */
1824         iokit_task_terminate(task);
1825
1826         if (task->affinity_space) {
1827                 task_affinity_deallocate(task);
1828         }
1829
1830 #if MACH_ASSERT
1831         if (task->ledger != NULL &&
1832             task->map != NULL &&
1833             task->map->pmap != NULL &&
1834             task->map->pmap->ledger != NULL) {
1835                 assert(task->ledger == task->map->pmap->ledger);
1836         }
1837 #endif /* MACH_ASSERT */
1838
1839         vm_owned_objects_disown(task);
1840         assert(task->task_objects_disowned);
1841         if (task->task_volatile_objects != 0 ||
1842             task->task_nonvolatile_objects != 0 ||
1843             task->task_owned_objects != 0) {
1844                 panic("task_deallocate(%p): "
1845                     "volatile_objects=%d nonvolatile_objects=%d owned=%d\n",
1846                     task,
1847                     task->task_volatile_objects,
1848                     task->task_nonvolatile_objects,
1849                     task->task_owned_objects);
1850         }
1851
1852         vm_map_deallocate(task->map);
1853         is_release(task->itk_space);
1854         if (task->restartable_ranges) {
1855                 restartable_ranges_release(task->restartable_ranges);
1856         }
1857
1858         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1859             &interrupt_wakeups, &debit);
1860         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1861             &platform_idle_wakeups, &debit);
1862
1863 #if defined(CONFIG_SCHED_MULTIQ)
1864         sched_group_destroy(task->sched_group);
1865 #endif
1866
1867         /* Accumulate statistics for dead tasks */
1868         lck_spin_lock(&dead_task_statistics_lock);
1869         dead_task_statistics.total_user_time += task->total_user_time;
1870         dead_task_statistics.total_system_time += task->total_system_time;
1871
1872         dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1873         dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1874
1875         dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1876         dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1877         dead_task_statistics.total_ptime += task->total_ptime;
1878         dead_task_statistics.total_pset_switches += task->ps_switch;
1879         dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
1880         dead_task_statistics.task_energy += task->task_energy;
1881
1882         lck_spin_unlock(&dead_task_statistics_lock);
1883         lck_mtx_destroy(&task->lock, &task_lck_grp);
1884
1885         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1886             &debit)) {
1887                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1888                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1889         }
1890         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1891             &debit)) {
1892                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1893                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1894         }
1895         ledger_dereference(task->ledger);
1896
1897 #if TASK_REFERENCE_LEAK_DEBUG
1898         btlog_remove_entries_for_element(task_ref_btlog, task);
1899 #endif
1900
1901 #if CONFIG_COALITIONS
1902         task_release_coalitions(task);
1903 #endif /* CONFIG_COALITIONS */
1904
1905         bzero(task->coalition, sizeof(task->coalition));
1906
1907 #if MACH_BSD
1908         /* clean up collected information since last reference to task is gone */
1909         if (task->corpse_info) {
1910                 void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
1911                 task_crashinfo_destroy(task->corpse_info);
1912                 task->corpse_info = NULL;
1913                 if (corpse_info_kernel) {
1914                         kfree(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
1915                 }
1916         }
1917 #endif
1918
1919 #if CONFIG_MACF
1920         if (task->crash_label) {
1921                 mac_exc_free_label(task->crash_label);
1922                 task->crash_label = NULL;
1923         }
1924 #endif
1925
1926         assert(queue_empty(&task->task_objq));
1927
1928         zfree(task_zone, task);
1929 }
1930
1931 /*
1932  *      task_name_deallocate:
1933  *
1934  *      Drop a reference on a task name.
1935  */
1936 void
1937 task_name_deallocate(
1938         task_name_t             task_name)
1939 {
1940         return task_deallocate((task_t)task_name);
1941 }
1942
1943 /*
1944  *      task_inspect_deallocate:
1945  *
1946  *      Drop a task inspection reference.
1947  */
1948 void
1949 task_inspect_deallocate(
1950         task_inspect_t          task_inspect)
1951 {
1952         return task_deallocate((task_t)task_inspect);
1953 }
1954
1955 /*
1956  *      task_suspension_token_deallocate:
1957  *
1958  *      Drop a reference on a task suspension token.
1959  */
1960 void
1961 task_suspension_token_deallocate(
1962         task_suspension_token_t         token)
1963 {
1964         return task_deallocate((task_t)token);
1965 }
1966
1967
1968 /*
1969  * task_collect_crash_info:
1970  *
1971  * collect crash info from bsd and mach based data
1972  */
1973 kern_return_t
1974 task_collect_crash_info(
1975         task_t task,
1976 #ifdef CONFIG_MACF
1977         struct label *crash_label,
1978 #endif
1979         int is_corpse_fork)
1980 {
1981         kern_return_t kr = KERN_SUCCESS;
1982
1983         kcdata_descriptor_t crash_data = NULL;
1984         kcdata_descriptor_t crash_data_release = NULL;
1985         mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1986         mach_vm_offset_t crash_data_ptr = 0;
1987         void *crash_data_kernel = NULL;
1988         void *crash_data_kernel_release = NULL;
1989 #if CONFIG_MACF
1990         struct label *label, *free_label;
1991 #endif
1992
1993         if (!corpses_enabled()) {
1994                 return KERN_NOT_SUPPORTED;
1995         }
1996
1997 #if CONFIG_MACF
1998         free_label = label = mac_exc_create_label();
1999 #endif
2000
2001         task_lock(task);
2002
2003         assert(is_corpse_fork || task->bsd_info != NULL);
2004         if (task->corpse_info == NULL && (is_corpse_fork || task->bsd_info != NULL)) {
2005 #if CONFIG_MACF
2006                 /* Set the crash label, used by the exception delivery mac hook */
2007                 free_label = task->crash_label; // Most likely NULL.
2008                 task->crash_label = label;
2009                 mac_exc_update_task_crash_label(task, crash_label);
2010 #endif
2011                 task_unlock(task);
2012
2013                 crash_data_kernel = (void *) kalloc(CORPSEINFO_ALLOCATION_SIZE);
2014                 if (crash_data_kernel == NULL) {
2015                         kr = KERN_RESOURCE_SHORTAGE;
2016                         goto out_no_lock;
2017                 }
2018                 bzero(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
2019                 crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
2020
2021                 /* Do not get a corpse ref for corpse fork */
2022                 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
2023                     is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
2024                     KCFLAG_USE_MEMCOPY);
2025                 if (crash_data) {
2026                         task_lock(task);
2027                         crash_data_release = task->corpse_info;
2028                         crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
2029                         task->corpse_info = crash_data;
2030
2031                         task_unlock(task);
2032                         kr = KERN_SUCCESS;
2033                 } else {
2034                         kfree(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
2035                         kr = KERN_FAILURE;
2036                 }
2037
2038                 if (crash_data_release != NULL) {
2039                         task_crashinfo_destroy(crash_data_release);
2040                 }
2041                 if (crash_data_kernel_release != NULL) {
2042                         kfree(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2043                 }
2044         } else {
2045                 task_unlock(task);
2046         }
2047
2048 out_no_lock:
2049 #if CONFIG_MACF
2050         if (free_label != NULL) {
2051                 mac_exc_free_label(free_label);
2052         }
2053 #endif
2054         return kr;
2055 }
2056
2057 /*
2058  * task_deliver_crash_notification:
2059  *
2060  * Makes outcall to registered host port for a corpse.
2061  */
2062 kern_return_t
2063 task_deliver_crash_notification(
2064         task_t task,
2065         thread_t thread,
2066         exception_type_t etype,
2067         mach_exception_subcode_t subcode)
2068 {
2069         kcdata_descriptor_t crash_info = task->corpse_info;
2070         thread_t th_iter = NULL;
2071         kern_return_t kr = KERN_SUCCESS;
2072         wait_interrupt_t wsave;
2073         mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2074         ipc_port_t task_port, old_notify;
2075
2076         if (crash_info == NULL) {
2077                 return KERN_FAILURE;
2078         }
2079
2080         task_lock(task);
2081         if (task_is_a_corpse_fork(task)) {
2082                 /* Populate code with EXC_{RESOURCE,GUARD} for corpse fork */
2083                 code[0] = etype;
2084                 code[1] = subcode;
2085         } else {
2086                 /* Populate code with EXC_CRASH for corpses */
2087                 code[0] = EXC_CRASH;
2088                 code[1] = 0;
2089                 /* Update the code[1] if the boot-arg corpse_for_fatal_memkill is set */
2090                 if (corpse_for_fatal_memkill) {
2091                         code[1] = subcode;
2092                 }
2093         }
2094
2095         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2096         {
2097                 if (th_iter->corpse_dup == FALSE) {
2098                         ipc_thread_reset(th_iter);
2099                 }
2100         }
2101         task_unlock(task);
2102
2103         /* Arm the no-sender notification for taskport */
2104         task_reference(task);
2105         task_port = convert_task_to_port(task);
2106         ip_lock(task_port);
2107         require_ip_active(task_port);
2108         ipc_port_nsrequest(task_port, task_port->ip_mscount, ipc_port_make_sonce_locked(task_port), &old_notify);
2109         /* port unlocked */
2110         assert(IP_NULL == old_notify);
2111
2112         wsave = thread_interrupt_level(THREAD_UNINT);
2113         kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
2114         if (kr != KERN_SUCCESS) {
2115                 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
2116         }
2117
2118         (void)thread_interrupt_level(wsave);
2119
2120         /*
2121          * Drop the send right on task port, will fire the
2122          * no-sender notification if exception deliver failed.
2123          */
2124         ipc_port_release_send(task_port);
2125         return kr;
2126 }
2127
2128 /*
2129  *      task_terminate:
2130  *
2131  *      Terminate the specified task.  See comments on thread_terminate
2132  *      (kern/thread.c) about problems with terminating the "current task."
2133  */
2134
2135 kern_return_t
2136 task_terminate(
2137         task_t          task)
2138 {
2139         if (task == TASK_NULL) {
2140                 return KERN_INVALID_ARGUMENT;
2141         }
2142
2143         if (task->bsd_info) {
2144                 return KERN_FAILURE;
2145         }
2146
2147         return task_terminate_internal(task);
2148 }
2149
2150 #if MACH_ASSERT
2151 extern int proc_pid(struct proc *);
2152 extern void proc_name_kdp(task_t t, char *buf, int size);
2153 #endif /* MACH_ASSERT */
2154
2155 #define VM_MAP_PARTIAL_REAP 0x54  /* 0x150 */
2156 static void
2157 __unused task_partial_reap(task_t task, __unused int pid)
2158 {
2159         unsigned int    reclaimed_resident = 0;
2160         unsigned int    reclaimed_compressed = 0;
2161         uint64_t        task_page_count;
2162
2163         task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
2164
2165         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
2166             pid, task_page_count, 0, 0, 0);
2167
2168         vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
2169
2170         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
2171             pid, reclaimed_resident, reclaimed_compressed, 0, 0);
2172 }
2173
2174 kern_return_t
2175 task_mark_corpse(task_t task)
2176 {
2177         kern_return_t kr = KERN_SUCCESS;
2178         thread_t self_thread;
2179         (void) self_thread;
2180         wait_interrupt_t wsave;
2181 #if CONFIG_MACF
2182         struct label *crash_label = NULL;
2183 #endif
2184
2185         assert(task != kernel_task);
2186         assert(task == current_task());
2187         assert(!task_is_a_corpse(task));
2188
2189 #if CONFIG_MACF
2190         crash_label = mac_exc_create_label_for_proc((struct proc*)task->bsd_info);
2191 #endif
2192
2193         kr = task_collect_crash_info(task,
2194 #if CONFIG_MACF
2195             crash_label,
2196 #endif
2197             FALSE);
2198         if (kr != KERN_SUCCESS) {
2199                 goto out;
2200         }
2201
2202         self_thread = current_thread();
2203
2204         wsave = thread_interrupt_level(THREAD_UNINT);
2205         task_lock(task);
2206
2207         task_set_corpse_pending_report(task);
2208         task_set_corpse(task);
2209         task->crashed_thread_id = thread_tid(self_thread);
2210
2211         kr = task_start_halt_locked(task, TRUE);
2212         assert(kr == KERN_SUCCESS);
2213
2214         ipc_task_reset(task);
2215         /* Remove the naked send right for task port, needed to arm no sender notification */
2216         task_set_special_port(task, TASK_KERNEL_PORT, IPC_PORT_NULL);
2217         ipc_task_enable(task);
2218
2219         task_unlock(task);
2220         /* terminate the ipc space */
2221         ipc_space_terminate(task->itk_space);
2222
2223         /* Add it to global corpse task list */
2224         task_add_to_corpse_task_list(task);
2225
2226         task_start_halt(task);
2227         thread_terminate_internal(self_thread);
2228
2229         (void) thread_interrupt_level(wsave);
2230         assert(task->halting == TRUE);
2231
2232 out:
2233 #if CONFIG_MACF
2234         mac_exc_free_label(crash_label);
2235 #endif
2236         return kr;
2237 }
2238
2239 /*
2240  *      task_clear_corpse
2241  *
2242  *      Clears the corpse pending bit on task.
2243  *      Removes inspection bit on the threads.
2244  */
2245 void
2246 task_clear_corpse(task_t task)
2247 {
2248         thread_t th_iter = NULL;
2249
2250         task_lock(task);
2251         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2252         {
2253                 thread_mtx_lock(th_iter);
2254                 th_iter->inspection = FALSE;
2255                 thread_mtx_unlock(th_iter);
2256         }
2257
2258         thread_terminate_crashed_threads();
2259         /* remove the pending corpse report flag */
2260         task_clear_corpse_pending_report(task);
2261
2262         task_unlock(task);
2263 }
2264
2265 /*
2266  *      task_port_notify
2267  *
2268  *      Called whenever the Mach port system detects no-senders on
2269  *      the task port of a corpse.
2270  *      Each notification that comes in should terminate the task (corpse).
2271  */
2272 void
2273 task_port_notify(mach_msg_header_t *msg)
2274 {
2275         mach_no_senders_notification_t *notification = (void *)msg;
2276         ipc_port_t port = notification->not_header.msgh_remote_port;
2277         task_t task;
2278
2279         require_ip_active(port);
2280         assert(IKOT_TASK == ip_kotype(port));
2281         task = (task_t) port->ip_kobject;
2282
2283         assert(task_is_a_corpse(task));
2284
2285         /* Remove the task from global corpse task list */
2286         task_remove_from_corpse_task_list(task);
2287
2288         task_clear_corpse(task);
2289         task_terminate_internal(task);
2290 }
2291
2292 /*
2293  *      task_wait_till_threads_terminate_locked
2294  *
2295  *      Wait till all the threads in the task are terminated.
2296  *      Might release the task lock and re-acquire it.
2297  */
2298 void
2299 task_wait_till_threads_terminate_locked(task_t task)
2300 {
2301         /* wait for all the threads in the task to terminate */
2302         while (task->active_thread_count != 0) {
2303                 assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
2304                 task_unlock(task);
2305                 thread_block(THREAD_CONTINUE_NULL);
2306
2307                 task_lock(task);
2308         }
2309 }
2310
2311 /*
2312  *      task_duplicate_map_and_threads
2313  *
2314  *      Copy vmmap of source task.
2315  *      Copy active threads from source task to destination task.
2316  *      Source task would be suspended during the copy.
2317  */
2318 kern_return_t
2319 task_duplicate_map_and_threads(
2320         task_t task,
2321         void *p,
2322         task_t new_task,
2323         thread_t *thread_ret,
2324         uint64_t **udata_buffer,
2325         int *size,
2326         int *num_udata)
2327 {
2328         kern_return_t kr = KERN_SUCCESS;
2329         int active;
2330         thread_t thread, self, thread_return = THREAD_NULL;
2331         thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
2332         thread_t *thread_array;
2333         uint32_t active_thread_count = 0, array_count = 0, i;
2334         vm_map_t oldmap;
2335         uint64_t *buffer = NULL;
2336         int buf_size = 0;
2337         int est_knotes = 0, num_knotes = 0;
2338
2339         self = current_thread();
2340
2341         /*
2342          * Suspend the task to copy thread state, use the internal
2343          * variant so that no user-space process can resume
2344          * the task from under us
2345          */
2346         kr = task_suspend_internal(task);
2347         if (kr != KERN_SUCCESS) {
2348                 return kr;
2349         }
2350
2351         if (task->map->disable_vmentry_reuse == TRUE) {
2352                 /*
2353                  * Quite likely GuardMalloc (or some debugging tool)
2354                  * is being used on this task. And it has gone through
2355                  * its limit. Making a corpse will likely encounter
2356                  * a lot of VM entries that will need COW.
2357                  *
2358                  * Skip it.
2359                  */
2360 #if DEVELOPMENT || DEBUG
2361                 memorystatus_abort_vm_map_fork(task);
2362 #endif
2363                 task_resume_internal(task);
2364                 return KERN_FAILURE;
2365         }
2366
2367         /* Check with VM if vm_map_fork is allowed for this task */
2368         if (memorystatus_allowed_vm_map_fork(task)) {
2369                 /* Setup new task's vmmap, switch from parent task's map to it COW map */
2370                 oldmap = new_task->map;
2371                 new_task->map = vm_map_fork(new_task->ledger,
2372                     task->map,
2373                     (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
2374                     VM_MAP_FORK_PRESERVE_PURGEABLE |
2375                     VM_MAP_FORK_CORPSE_FOOTPRINT));
2376                 vm_map_deallocate(oldmap);
2377
2378                 /* copy ledgers that impact the memory footprint */
2379                 vm_map_copy_footprint_ledgers(task, new_task);
2380
2381                 /* Get all the udata pointers from kqueue */
2382                 est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
2383                 if (est_knotes > 0) {
2384                         buf_size = (est_knotes + 32) * sizeof(uint64_t);
2385                         buffer = (uint64_t *) kalloc(buf_size);
2386                         num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2387                         if (num_knotes > est_knotes + 32) {
2388                                 num_knotes = est_knotes + 32;
2389                         }
2390                 }
2391         }
2392
2393         active_thread_count = task->active_thread_count;
2394         if (active_thread_count == 0) {
2395                 if (buffer != NULL) {
2396                         kfree(buffer, buf_size);
2397                 }
2398                 task_resume_internal(task);
2399                 return KERN_FAILURE;
2400         }
2401
2402         thread_array = (thread_t *) kalloc(sizeof(thread_t) * active_thread_count);
2403
2404         /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2405         task_lock(task);
2406         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2407                 /* Skip inactive threads */
2408                 active = thread->active;
2409                 if (!active) {
2410                         continue;
2411                 }
2412
2413                 if (array_count >= active_thread_count) {
2414                         break;
2415                 }
2416
2417                 thread_array[array_count++] = thread;
2418                 thread_reference(thread);
2419         }
2420         task_unlock(task);
2421
2422         for (i = 0; i < array_count; i++) {
2423                 kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2424                 if (kr != KERN_SUCCESS) {
2425                         break;
2426                 }
2427
2428                 /* Equivalent of current thread in corpse */
2429                 if (thread_array[i] == self) {
2430                         thread_return = new_thread;
2431                         new_task->crashed_thread_id = thread_tid(new_thread);
2432                 } else if (first_thread == NULL) {
2433                         first_thread = new_thread;
2434                 } else {
2435                         /* drop the extra ref returned by thread_create_with_continuation */
2436                         thread_deallocate(new_thread);
2437                 }
2438
2439                 kr = thread_dup2(thread_array[i], new_thread);
2440                 if (kr != KERN_SUCCESS) {
2441                         thread_mtx_lock(new_thread);
2442                         new_thread->corpse_dup = TRUE;
2443                         thread_mtx_unlock(new_thread);
2444                         continue;
2445                 }
2446
2447                 /* Copy thread name */
2448                 bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread);
2449                 new_thread->thread_tag = thread_array[i]->thread_tag;
2450                 thread_copy_resource_info(new_thread, thread_array[i]);
2451         }
2452
2453         /* return the first thread if we couldn't find the equivalent of current */
2454         if (thread_return == THREAD_NULL) {
2455                 thread_return = first_thread;
2456         } else if (first_thread != THREAD_NULL) {
2457                 /* drop the extra ref returned by thread_create_with_continuation */
2458                 thread_deallocate(first_thread);
2459         }
2460
2461         task_resume_internal(task);
2462
2463         for (i = 0; i < array_count; i++) {
2464                 thread_deallocate(thread_array[i]);
2465         }
2466         kfree(thread_array, sizeof(thread_t) * active_thread_count);
2467
2468         if (kr == KERN_SUCCESS) {
2469                 *thread_ret = thread_return;
2470                 *udata_buffer = buffer;
2471                 *size = buf_size;
2472                 *num_udata = num_knotes;
2473         } else {
2474                 if (thread_return != THREAD_NULL) {
2475                         thread_deallocate(thread_return);
2476                 }
2477                 if (buffer != NULL) {
2478                         kfree(buffer, buf_size);
2479                 }
2480         }
2481
2482         return kr;
2483 }
2484
2485 #if CONFIG_SECLUDED_MEMORY
2486 extern void task_set_can_use_secluded_mem_locked(
2487         task_t          task,
2488         boolean_t       can_use_secluded_mem);
2489 #endif /* CONFIG_SECLUDED_MEMORY */
2490
2491 kern_return_t
2492 task_terminate_internal(
2493         task_t                  task)
2494 {
2495         thread_t                        thread, self;
2496         task_t                          self_task;
2497         boolean_t                       interrupt_save;
2498         int                             pid = 0;
2499
2500         assert(task != kernel_task);
2501
2502         self = current_thread();
2503         self_task = self->task;
2504
2505         /*
2506          *      Get the task locked and make sure that we are not racing
2507          *      with someone else trying to terminate us.
2508          */
2509         if (task == self_task) {
2510                 task_lock(task);
2511         } else if (task < self_task) {
2512                 task_lock(task);
2513                 task_lock(self_task);
2514         } else {
2515                 task_lock(self_task);
2516                 task_lock(task);
2517         }
2518
2519 #if CONFIG_SECLUDED_MEMORY
2520         if (task->task_can_use_secluded_mem) {
2521                 task_set_can_use_secluded_mem_locked(task, FALSE);
2522         }
2523         task->task_could_use_secluded_mem = FALSE;
2524         task->task_could_also_use_secluded_mem = FALSE;
2525
2526         if (task->task_suppressed_secluded) {
2527                 stop_secluded_suppression(task);
2528         }
2529 #endif /* CONFIG_SECLUDED_MEMORY */
2530
2531         if (!task->active) {
2532                 /*
2533                  *      Task is already being terminated.
2534                  *      Just return an error. If we are dying, this will
2535                  *      just get us to our AST special handler and that
2536                  *      will get us to finalize the termination of ourselves.
2537                  */
2538                 task_unlock(task);
2539                 if (self_task != task) {
2540                         task_unlock(self_task);
2541                 }
2542
2543                 return KERN_FAILURE;
2544         }
2545
2546         if (task_corpse_pending_report(task)) {
2547                 /*
2548                  *      Task is marked for reporting as corpse.
2549                  *      Just return an error. This will
2550                  *      just get us to our AST special handler and that
2551                  *      will get us to finish the path to death
2552                  */
2553                 task_unlock(task);
2554                 if (self_task != task) {
2555                         task_unlock(self_task);
2556                 }
2557
2558                 return KERN_FAILURE;
2559         }
2560
2561         if (self_task != task) {
2562                 task_unlock(self_task);
2563         }
2564
2565         /*
2566          * Make sure the current thread does not get aborted out of
2567          * the waits inside these operations.
2568          */
2569         interrupt_save = thread_interrupt_level(THREAD_UNINT);
2570
2571         /*
2572          *      Indicate that we want all the threads to stop executing
2573          *      at user space by holding the task (we would have held
2574          *      each thread independently in thread_terminate_internal -
2575          *      but this way we may be more likely to already find it
2576          *      held there).  Mark the task inactive, and prevent
2577          *      further task operations via the task port.
2578          */
2579         task_hold_locked(task);
2580         task->active = FALSE;
2581         ipc_task_disable(task);
2582
2583 #if CONFIG_TELEMETRY
2584         /*
2585          * Notify telemetry that this task is going away.
2586          */
2587         telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2588 #endif
2589
2590         /*
2591          *      Terminate each thread in the task.
2592          */
2593         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2594                 thread_terminate_internal(thread);
2595         }
2596
2597 #ifdef MACH_BSD
2598         if (task->bsd_info != NULL && !task_is_exec_copy(task)) {
2599                 pid = proc_pid(task->bsd_info);
2600         }
2601 #endif /* MACH_BSD */
2602
2603         task_unlock(task);
2604
2605         proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
2606             TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
2607
2608         /* Early object reap phase */
2609
2610 // PR-17045188: Revisit implementation
2611 //        task_partial_reap(task, pid);
2612
2613 #if CONFIG_EMBEDDED
2614         /*
2615          * remove all task watchers
2616          */
2617         task_removewatchers(task);
2618
2619 #endif /* CONFIG_EMBEDDED */
2620
2621         /*
2622          *      Destroy all synchronizers owned by the task.
2623          */
2624         task_synchronizer_destroy_all(task);
2625
2626         /*
2627          *      Clear the watchport boost on the task.
2628          */
2629         task_remove_turnstile_watchports(task);
2630
2631         /*
2632          *      Destroy the IPC space, leaving just a reference for it.
2633          */
2634         ipc_space_terminate(task->itk_space);
2635
2636 #if 00
2637         /* if some ledgers go negative on tear-down again... */
2638         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2639             task_ledgers.phys_footprint);
2640         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2641             task_ledgers.internal);
2642         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2643             task_ledgers.internal_compressed);
2644         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2645             task_ledgers.iokit_mapped);
2646         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2647             task_ledgers.alternate_accounting);
2648         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2649             task_ledgers.alternate_accounting_compressed);
2650 #endif
2651
2652         /*
2653          * If the current thread is a member of the task
2654          * being terminated, then the last reference to
2655          * the task will not be dropped until the thread
2656          * is finally reaped.  To avoid incurring the
2657          * expense of removing the address space regions
2658          * at reap time, we do it explictly here.
2659          */
2660
2661         vm_map_lock(task->map);
2662         vm_map_disable_hole_optimization(task->map);
2663         vm_map_unlock(task->map);
2664
2665 #if MACH_ASSERT
2666         /*
2667          * Identify the pmap's process, in case the pmap ledgers drift
2668          * and we have to report it.
2669          */
2670         char procname[17];
2671         if (task->bsd_info && !task_is_exec_copy(task)) {
2672                 pid = proc_pid(task->bsd_info);
2673                 proc_name_kdp(task, procname, sizeof(procname));
2674         } else {
2675                 pid = 0;
2676                 strlcpy(procname, "<unknown>", sizeof(procname));
2677         }
2678         pmap_set_process(task->map->pmap, pid, procname);
2679 #endif /* MACH_ASSERT */
2680
2681         vm_map_remove(task->map,
2682             task->map->min_offset,
2683             task->map->max_offset,
2684             /*
2685              * Final cleanup:
2686              * + no unnesting
2687              * + remove immutable mappings
2688              * + allow gaps in range
2689              */
2690             (VM_MAP_REMOVE_NO_UNNESTING |
2691             VM_MAP_REMOVE_IMMUTABLE |
2692             VM_MAP_REMOVE_GAPS_OK));
2693
2694         /* release our shared region */
2695         vm_shared_region_set(task, NULL);
2696
2697
2698         lck_mtx_lock(&tasks_threads_lock);
2699         queue_remove(&tasks, task, task_t, tasks);
2700         queue_enter(&terminated_tasks, task, task_t, tasks);
2701         tasks_count--;
2702         terminated_tasks_count++;
2703         lck_mtx_unlock(&tasks_threads_lock);
2704
2705         /*
2706          * We no longer need to guard against being aborted, so restore
2707          * the previous interruptible state.
2708          */
2709         thread_interrupt_level(interrupt_save);
2710
2711 #if KPC
2712         /* force the task to release all ctrs */
2713         if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS) {
2714                 kpc_force_all_ctrs(task, 0);
2715         }
2716 #endif /* KPC */
2717
2718 #if CONFIG_COALITIONS
2719         /*
2720          * Leave our coalitions. (drop activation but not reference)
2721          */
2722         coalitions_remove_task(task);
2723 #endif
2724
2725         /*
2726          * Get rid of the task active reference on itself.
2727          */
2728         task_deallocate(task);
2729
2730         return KERN_SUCCESS;
2731 }
2732
2733 void
2734 tasks_system_suspend(boolean_t suspend)
2735 {
2736         task_t task;
2737
2738         lck_mtx_lock(&tasks_threads_lock);
2739         assert(tasks_suspend_state != suspend);
2740         tasks_suspend_state = suspend;
2741         queue_iterate(&tasks, task, task_t, tasks) {
2742                 if (task == kernel_task) {
2743                         continue;
2744                 }
2745                 suspend ? task_suspend_internal(task) : task_resume_internal(task);
2746         }
2747         lck_mtx_unlock(&tasks_threads_lock);
2748 }
2749
2750 /*
2751  * task_start_halt:
2752  *
2753  *      Shut the current task down (except for the current thread) in
2754  *      preparation for dramatic changes to the task (probably exec).
2755  *      We hold the task and mark all other threads in the task for
2756  *      termination.
2757  */
2758 kern_return_t
2759 task_start_halt(task_t task)
2760 {
2761         kern_return_t kr = KERN_SUCCESS;
2762         task_lock(task);
2763         kr = task_start_halt_locked(task, FALSE);
2764         task_unlock(task);
2765         return kr;
2766 }
2767
2768 static kern_return_t
2769 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
2770 {
2771         thread_t thread, self;
2772         uint64_t dispatchqueue_offset;
2773
2774         assert(task != kernel_task);
2775
2776         self = current_thread();
2777
2778         if (task != self->task && !task_is_a_corpse_fork(task)) {
2779                 return KERN_INVALID_ARGUMENT;
2780         }
2781
2782         if (task->halting || !task->active || !self->active) {
2783                 /*
2784                  * Task or current thread is already being terminated.
2785                  * Hurry up and return out of the current kernel context
2786                  * so that we run our AST special handler to terminate
2787                  * ourselves.
2788                  */
2789                 return KERN_FAILURE;
2790         }
2791
2792         task->halting = TRUE;
2793
2794         /*
2795          * Mark all the threads to keep them from starting any more
2796          * user-level execution.  The thread_terminate_internal code
2797          * would do this on a thread by thread basis anyway, but this
2798          * gives us a better chance of not having to wait there.
2799          */
2800         task_hold_locked(task);
2801         dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
2802
2803         /*
2804          * Terminate all the other threads in the task.
2805          */
2806         queue_iterate(&task->threads, thread, thread_t, task_threads)
2807         {
2808                 if (should_mark_corpse) {
2809                         thread_mtx_lock(thread);
2810                         thread->inspection = TRUE;
2811                         thread_mtx_unlock(thread);
2812                 }
2813                 if (thread != self) {
2814                         thread_terminate_internal(thread);
2815                 }
2816         }
2817         task->dispatchqueue_offset = dispatchqueue_offset;
2818
2819         task_release_locked(task);
2820
2821         return KERN_SUCCESS;
2822 }
2823
2824
2825 /*
2826  * task_complete_halt:
2827  *
2828  *      Complete task halt by waiting for threads to terminate, then clean
2829  *      up task resources (VM, port namespace, etc...) and then let the
2830  *      current thread go in the (practically empty) task context.
2831  *
2832  *      Note: task->halting flag is not cleared in order to avoid creation
2833  *      of new thread in old exec'ed task.
2834  */
2835 void
2836 task_complete_halt(task_t task)
2837 {
2838         task_lock(task);
2839         assert(task->halting);
2840         assert(task == current_task());
2841
2842         /*
2843          *      Wait for the other threads to get shut down.
2844          *      When the last other thread is reaped, we'll be
2845          *      woken up.
2846          */
2847         if (task->thread_count > 1) {
2848                 assert_wait((event_t)&task->halting, THREAD_UNINT);
2849                 task_unlock(task);
2850                 thread_block(THREAD_CONTINUE_NULL);
2851         } else {
2852                 task_unlock(task);
2853         }
2854
2855         /*
2856          *      Give the machine dependent code a chance
2857          *      to perform cleanup of task-level resources
2858          *      associated with the current thread before
2859          *      ripping apart the task.
2860          */
2861         machine_task_terminate(task);
2862
2863         /*
2864          *      Destroy all synchronizers owned by the task.
2865          */
2866         task_synchronizer_destroy_all(task);
2867
2868         /*
2869          *      Destroy the contents of the IPC space, leaving just
2870          *      a reference for it.
2871          */
2872         ipc_space_clean(task->itk_space);
2873
2874         /*
2875          * Clean out the address space, as we are going to be
2876          * getting a new one.
2877          */
2878         vm_map_remove(task->map, task->map->min_offset,
2879             task->map->max_offset,
2880             /*
2881              * Final cleanup:
2882              * + no unnesting
2883              * + remove immutable mappings
2884              * + allow gaps in the range
2885              */
2886             (VM_MAP_REMOVE_NO_UNNESTING |
2887             VM_MAP_REMOVE_IMMUTABLE |
2888             VM_MAP_REMOVE_GAPS_OK));
2889
2890         /*
2891          * Kick out any IOKitUser handles to the task. At best they're stale,
2892          * at worst someone is racing a SUID exec.
2893          */
2894         iokit_task_terminate(task);
2895 }
2896
2897 /*
2898  *      task_hold_locked:
2899  *
2900  *      Suspend execution of the specified task.
2901  *      This is a recursive-style suspension of the task, a count of
2902  *      suspends is maintained.
2903  *
2904  *      CONDITIONS: the task is locked and active.
2905  */
2906 void
2907 task_hold_locked(
2908         task_t          task)
2909 {
2910         thread_t        thread;
2911
2912         assert(task->active);
2913
2914         if (task->suspend_count++ > 0) {
2915                 return;
2916         }
2917
2918         if (task->bsd_info) {
2919                 workq_proc_suspended(task->bsd_info);
2920         }
2921
2922         /*
2923          *      Iterate through all the threads and hold them.
2924          */
2925         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2926                 thread_mtx_lock(thread);
2927                 thread_hold(thread);
2928                 thread_mtx_unlock(thread);
2929         }
2930 }
2931
2932 /*
2933  *      task_hold:
2934  *
2935  *      Same as the internal routine above, except that is must lock
2936  *      and verify that the task is active.  This differs from task_suspend
2937  *      in that it places a kernel hold on the task rather than just a
2938  *      user-level hold.  This keeps users from over resuming and setting
2939  *      it running out from under the kernel.
2940  *
2941  *      CONDITIONS: the caller holds a reference on the task
2942  */
2943 kern_return_t
2944 task_hold(
2945         task_t          task)
2946 {
2947         if (task == TASK_NULL) {
2948                 return KERN_INVALID_ARGUMENT;
2949         }
2950
2951         task_lock(task);
2952
2953         if (!task->active) {
2954                 task_unlock(task);
2955
2956                 return KERN_FAILURE;
2957         }
2958
2959         task_hold_locked(task);
2960         task_unlock(task);
2961
2962         return KERN_SUCCESS;
2963 }
2964
2965 kern_return_t
2966 task_wait(
2967         task_t          task,
2968         boolean_t       until_not_runnable)
2969 {
2970         if (task == TASK_NULL) {
2971                 return KERN_INVALID_ARGUMENT;
2972         }
2973
2974         task_lock(task);
2975
2976         if (!task->active) {
2977                 task_unlock(task);
2978
2979                 return KERN_FAILURE;
2980         }
2981
2982         task_wait_locked(task, until_not_runnable);
2983         task_unlock(task);
2984
2985         return KERN_SUCCESS;
2986 }
2987
2988 /*
2989  *      task_wait_locked:
2990  *
2991  *      Wait for all threads in task to stop.
2992  *
2993  * Conditions:
2994  *      Called with task locked, active, and held.
2995  */
2996 void
2997 task_wait_locked(
2998         task_t          task,
2999         boolean_t               until_not_runnable)
3000 {
3001         thread_t        thread, self;
3002
3003         assert(task->active);
3004         assert(task->suspend_count > 0);
3005
3006         self = current_thread();
3007
3008         /*
3009          *      Iterate through all the threads and wait for them to
3010          *      stop.  Do not wait for the current thread if it is within
3011          *      the task.
3012          */
3013         queue_iterate(&task->threads, thread, thread_t, task_threads) {
3014                 if (thread != self) {
3015                         thread_wait(thread, until_not_runnable);
3016                 }
3017         }
3018 }
3019
3020 boolean_t
3021 task_is_app_suspended(task_t task)
3022 {
3023         return task->pidsuspended;
3024 }
3025
3026 /*
3027  *      task_release_locked:
3028  *
3029  *      Release a kernel hold on a task.
3030  *
3031  *      CONDITIONS: the task is locked and active
3032  */
3033 void
3034 task_release_locked(
3035         task_t          task)
3036 {
3037         thread_t        thread;
3038
3039         assert(task->active);
3040         assert(task->suspend_count > 0);
3041
3042         if (--task->suspend_count > 0) {
3043                 return;
3044         }
3045
3046         if (task->bsd_info) {
3047                 workq_proc_resumed(task->bsd_info);
3048         }
3049
3050         queue_iterate(&task->threads, thread, thread_t, task_threads) {
3051                 thread_mtx_lock(thread);
3052                 thread_release(thread);
3053                 thread_mtx_unlock(thread);
3054         }
3055 }
3056
3057 /*
3058  *      task_release:
3059  *
3060  *      Same as the internal routine above, except that it must lock
3061  *      and verify that the task is active.
3062  *
3063  *      CONDITIONS: The caller holds a reference to the task
3064  */
3065 kern_return_t
3066 task_release(
3067         task_t          task)
3068 {
3069         if (task == TASK_NULL) {
3070                 return KERN_INVALID_ARGUMENT;
3071         }
3072
3073         task_lock(task);
3074
3075         if (!task->active) {
3076                 task_unlock(task);
3077
3078                 return KERN_FAILURE;
3079         }
3080
3081         task_release_locked(task);
3082         task_unlock(task);
3083
3084         return KERN_SUCCESS;
3085 }
3086
3087 kern_return_t
3088 task_threads(
3089         task_t                                  task,
3090         thread_act_array_t              *threads_out,
3091         mach_msg_type_number_t  *count)
3092 {
3093         mach_msg_type_number_t  actual;
3094         thread_t                                *thread_list;
3095         thread_t                                thread;
3096         vm_size_t                               size, size_needed;
3097         void                                    *addr;
3098         unsigned int                    i, j;
3099
3100         if (task == TASK_NULL) {
3101                 return KERN_INVALID_ARGUMENT;
3102         }
3103
3104         size = 0; addr = NULL;
3105
3106         for (;;) {
3107                 task_lock(task);
3108                 if (!task->active) {
3109                         task_unlock(task);
3110
3111                         if (size != 0) {
3112                                 kfree(addr, size);
3113                         }
3114
3115                         return KERN_FAILURE;
3116                 }
3117
3118                 actual = task->thread_count;
3119
3120                 /* do we have the memory we need? */
3121                 size_needed = actual * sizeof(mach_port_t);
3122                 if (size_needed <= size) {
3123                         break;
3124                 }
3125
3126                 /* unlock the task and allocate more memory */
3127                 task_unlock(task);
3128
3129                 if (size != 0) {
3130                         kfree(addr, size);
3131                 }
3132
3133                 assert(size_needed > 0);
3134                 size = size_needed;
3135
3136                 addr = kalloc(size);
3137                 if (addr == 0) {
3138                         return KERN_RESOURCE_SHORTAGE;
3139                 }
3140         }
3141
3142         /* OK, have memory and the task is locked & active */
3143         thread_list = (thread_t *)addr;
3144
3145         i = j = 0;
3146
3147         for (thread = (thread_t)queue_first(&task->threads); i < actual;
3148             ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
3149                 thread_reference_internal(thread);
3150                 thread_list[j++] = thread;
3151         }
3152
3153         assert(queue_end(&task->threads, (queue_entry_t)thread));
3154
3155         actual = j;
3156         size_needed = actual * sizeof(mach_port_t);
3157
3158         /* can unlock task now that we've got the thread refs */
3159         task_unlock(task);
3160
3161         if (actual == 0) {
3162                 /* no threads, so return null pointer and deallocate memory */
3163
3164                 *threads_out = NULL;
3165                 *count = 0;
3166
3167                 if (size != 0) {
3168                         kfree(addr, size);
3169                 }
3170         } else {
3171                 /* if we allocated too much, must copy */
3172
3173                 if (size_needed < size) {
3174                         void *newaddr;
3175
3176                         newaddr = kalloc(size_needed);
3177                         if (newaddr == 0) {
3178                                 for (i = 0; i < actual; ++i) {
3179                                         thread_deallocate(thread_list[i]);
3180                                 }
3181                                 kfree(addr, size);
3182                                 return KERN_RESOURCE_SHORTAGE;
3183                         }
3184
3185                         bcopy(addr, newaddr, size_needed);
3186                         kfree(addr, size);
3187                         thread_list = (thread_t *)newaddr;
3188                 }
3189
3190                 *threads_out = thread_list;
3191                 *count = actual;
3192
3193                 /* do the conversion that Mig should handle */
3194
3195                 for (i = 0; i < actual; ++i) {
3196                         ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
3197                 }
3198         }
3199
3200         return KERN_SUCCESS;
3201 }
3202
3203 #define TASK_HOLD_NORMAL        0
3204 #define TASK_HOLD_PIDSUSPEND    1
3205 #define TASK_HOLD_LEGACY        2
3206 #define TASK_HOLD_LEGACY_ALL    3
3207
3208 static kern_return_t
3209 place_task_hold(
3210         task_t task,
3211         int mode)
3212 {
3213         if (!task->active && !task_is_a_corpse(task)) {
3214                 return KERN_FAILURE;
3215         }
3216
3217         /* Return success for corpse task */
3218         if (task_is_a_corpse(task)) {
3219                 return KERN_SUCCESS;
3220         }
3221
3222         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3223             MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
3224             task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3225             task->user_stop_count, task->user_stop_count + 1, 0);
3226
3227 #if MACH_ASSERT
3228         current_task()->suspends_outstanding++;
3229 #endif
3230
3231         if (mode == TASK_HOLD_LEGACY) {
3232                 task->legacy_stop_count++;
3233         }
3234
3235         if (task->user_stop_count++ > 0) {
3236                 /*
3237                  *      If the stop count was positive, the task is
3238                  *      already stopped and we can exit.
3239                  */
3240                 return KERN_SUCCESS;
3241         }
3242
3243         /*
3244          * Put a kernel-level hold on the threads in the task (all
3245          * user-level task suspensions added together represent a
3246          * single kernel-level hold).  We then wait for the threads
3247          * to stop executing user code.
3248          */
3249         task_hold_locked(task);
3250         task_wait_locked(task, FALSE);
3251
3252         return KERN_SUCCESS;
3253 }
3254
3255 static kern_return_t
3256 release_task_hold(
3257         task_t          task,
3258         int                     mode)
3259 {
3260         boolean_t release = FALSE;
3261
3262         if (!task->active && !task_is_a_corpse(task)) {
3263                 return KERN_FAILURE;
3264         }
3265
3266         /* Return success for corpse task */
3267         if (task_is_a_corpse(task)) {
3268                 return KERN_SUCCESS;
3269         }
3270
3271         if (mode == TASK_HOLD_PIDSUSPEND) {
3272                 if (task->pidsuspended == FALSE) {
3273                         return KERN_FAILURE;
3274                 }
3275                 task->pidsuspended = FALSE;
3276         }
3277
3278         if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
3279                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3280                     MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_RESUME) | DBG_FUNC_NONE,
3281                     task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3282                     task->user_stop_count, mode, task->legacy_stop_count);
3283
3284 #if MACH_ASSERT
3285                 /*
3286                  * This is obviously not robust; if we suspend one task and then resume a different one,
3287                  * we'll fly under the radar. This is only meant to catch the common case of a crashed
3288                  * or buggy suspender.
3289                  */
3290                 current_task()->suspends_outstanding--;
3291 #endif
3292
3293                 if (mode == TASK_HOLD_LEGACY_ALL) {
3294                         if (task->legacy_stop_count >= task->user_stop_count) {
3295                                 task->user_stop_count = 0;
3296                                 release = TRUE;
3297                         } else {
3298                                 task->user_stop_count -= task->legacy_stop_count;
3299                         }
3300                         task->legacy_stop_count = 0;
3301                 } else {
3302                         if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0) {
3303                                 task->legacy_stop_count--;
3304                         }
3305                         if (--task->user_stop_count == 0) {
3306                                 release = TRUE;
3307                         }
3308                 }
3309         } else {
3310                 return KERN_FAILURE;
3311         }
3312
3313         /*
3314          *      Release the task if necessary.
3315          */
3316         if (release) {
3317                 task_release_locked(task);
3318         }
3319
3320         return KERN_SUCCESS;
3321 }
3322
3323 boolean_t
3324 get_task_suspended(task_t task)
3325 {
3326         return 0 != task->user_stop_count;
3327 }
3328
3329 /*
3330  *      task_suspend:
3331  *
3332  *      Implement an (old-fashioned) user-level suspension on a task.
3333  *
3334  *      Because the user isn't expecting to have to manage a suspension
3335  *      token, we'll track it for him in the kernel in the form of a naked
3336  *      send right to the task's resume port.  All such send rights
3337  *      account for a single suspension against the task (unlike task_suspend2()
3338  *      where each caller gets a unique suspension count represented by a
3339  *      unique send-once right).
3340  *
3341  * Conditions:
3342  *      The caller holds a reference to the task
3343  */
3344 kern_return_t
3345 task_suspend(
3346         task_t          task)
3347 {
3348         kern_return_t                   kr;
3349         mach_port_t                     port;
3350         mach_port_name_t                name;
3351
3352         if (task == TASK_NULL || task == kernel_task) {
3353                 return KERN_INVALID_ARGUMENT;
3354         }
3355
3356         task_lock(task);
3357
3358         /*
3359          * place a legacy hold on the task.
3360          */
3361         kr = place_task_hold(task, TASK_HOLD_LEGACY);
3362         if (kr != KERN_SUCCESS) {
3363                 task_unlock(task);
3364                 return kr;
3365         }
3366
3367         /*
3368          * Claim a send right on the task resume port, and request a no-senders
3369          * notification on that port (if none outstanding).
3370          */
3371         (void)ipc_kobject_make_send_lazy_alloc_port(&task->itk_resume,
3372             (ipc_kobject_t)task, IKOT_TASK_RESUME);
3373         port = task->itk_resume;
3374
3375         task_unlock(task);
3376
3377         /*
3378          * Copyout the send right into the calling task's IPC space.  It won't know it is there,
3379          * but we'll look it up when calling a traditional resume.  Any IPC operations that
3380          * deallocate the send right will auto-release the suspension.
3381          */
3382         if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, ip_to_object(port),
3383             MACH_MSG_TYPE_MOVE_SEND, NULL, NULL, &name)) != KERN_SUCCESS) {
3384                 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
3385                     proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3386                     task_pid(task), kr);
3387                 return kr;
3388         }
3389
3390         return kr;
3391 }
3392
3393 /*
3394  *      task_resume:
3395  *              Release a user hold on a task.
3396  *
3397  * Conditions:
3398  *              The caller holds a reference to the task
3399  */
3400 kern_return_t
3401 task_resume(
3402         task_t  task)
3403 {
3404         kern_return_t    kr;
3405         mach_port_name_t resume_port_name;
3406         ipc_entry_t              resume_port_entry;
3407         ipc_space_t              space = current_task()->itk_space;
3408
3409         if (task == TASK_NULL || task == kernel_task) {
3410                 return KERN_INVALID_ARGUMENT;
3411         }
3412
3413         /* release a legacy task hold */
3414         task_lock(task);
3415         kr = release_task_hold(task, TASK_HOLD_LEGACY);
3416         task_unlock(task);
3417
3418         is_write_lock(space);
3419         if (is_active(space) && IP_VALID(task->itk_resume) &&
3420             ipc_hash_lookup(space, ip_to_object(task->itk_resume), &resume_port_name, &resume_port_entry) == TRUE) {
3421                 /*
3422                  * We found a suspension token in the caller's IPC space. Release a send right to indicate that
3423                  * we are holding one less legacy hold on the task from this caller.  If the release failed,
3424                  * go ahead and drop all the rights, as someone either already released our holds or the task
3425                  * is gone.
3426                  */
3427                 if (kr == KERN_SUCCESS) {
3428                         ipc_right_dealloc(space, resume_port_name, resume_port_entry);
3429                 } else {
3430                         ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
3431                 }
3432                 /* space unlocked */
3433         } else {
3434                 is_write_unlock(space);
3435                 if (kr == KERN_SUCCESS) {
3436                         printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
3437                             proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3438                             task_pid(task));
3439                 }
3440         }
3441
3442         return kr;
3443 }
3444
3445 /*
3446  * Suspend the target task.
3447  * Making/holding a token/reference/port is the callers responsibility.
3448  */
3449 kern_return_t
3450 task_suspend_internal(task_t task)
3451 {
3452         kern_return_t    kr;
3453
3454         if (task == TASK_NULL || task == kernel_task) {
3455                 return KERN_INVALID_ARGUMENT;
3456         }
3457
3458         task_lock(task);
3459         kr = place_task_hold(task, TASK_HOLD_NORMAL);
3460         task_unlock(task);
3461         return kr;
3462 }
3463
3464 /*
3465  * Suspend the target task, and return a suspension token. The token
3466  * represents a reference on the suspended task.
3467  */
3468 kern_return_t
3469 task_suspend2(
3470         task_t                  task,
3471         task_suspension_token_t *suspend_token)
3472 {
3473         kern_return_t    kr;
3474
3475         kr = task_suspend_internal(task);
3476         if (kr != KERN_SUCCESS) {
3477                 *suspend_token = TASK_NULL;
3478                 return kr;
3479         }
3480
3481         /*
3482          * Take a reference on the target task and return that to the caller
3483          * as a "suspension token," which can be converted into an SO right to
3484          * the now-suspended task's resume port.
3485          */
3486         task_reference_internal(task);
3487         *suspend_token = task;
3488
3489         return KERN_SUCCESS;
3490 }
3491
3492 /*
3493  * Resume the task
3494  * (reference/token/port management is caller's responsibility).
3495  */
3496 kern_return_t
3497 task_resume_internal(
3498         task_suspension_token_t         task)
3499 {
3500         kern_return_t kr;
3501
3502         if (task == TASK_NULL || task == kernel_task) {
3503                 return KERN_INVALID_ARGUMENT;
3504         }
3505
3506         task_lock(task);
3507         kr = release_task_hold(task, TASK_HOLD_NORMAL);
3508         task_unlock(task);
3509         return kr;
3510 }
3511
3512 /*
3513  * Resume the task using a suspension token. Consumes the token's ref.
3514  */
3515 kern_return_t
3516 task_resume2(
3517         task_suspension_token_t         task)
3518 {
3519         kern_return_t kr;
3520
3521         kr = task_resume_internal(task);
3522         task_suspension_token_deallocate(task);
3523
3524         return kr;
3525 }
3526
3527 boolean_t
3528 task_suspension_notify(mach_msg_header_t *request_header)
3529 {
3530         ipc_port_t port = request_header->msgh_remote_port;
3531         task_t task = convert_port_to_task_suspension_token(port);
3532         mach_msg_type_number_t not_count;
3533
3534         if (task == TASK_NULL || task == kernel_task) {
3535                 return TRUE;  /* nothing to do */
3536         }
3537         switch (request_header->msgh_id) {
3538         case MACH_NOTIFY_SEND_ONCE:
3539                 /* release the hold held by this specific send-once right */
3540                 task_lock(task);
3541                 release_task_hold(task, TASK_HOLD_NORMAL);
3542                 task_unlock(task);
3543                 break;
3544
3545         case MACH_NOTIFY_NO_SENDERS:
3546                 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
3547
3548                 task_lock(task);
3549                 ip_lock(port);
3550                 if (port->ip_mscount == not_count) {
3551                         /* release all the [remaining] outstanding legacy holds */
3552                         assert(port->ip_nsrequest == IP_NULL);
3553                         ip_unlock(port);
3554                         release_task_hold(task, TASK_HOLD_LEGACY_ALL);
3555                         task_unlock(task);
3556                 } else if (port->ip_nsrequest == IP_NULL) {
3557                         ipc_port_t old_notify;
3558
3559                         task_unlock(task);
3560                         /* new send rights, re-arm notification at current make-send count */
3561                         ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3562                         assert(old_notify == IP_NULL);
3563                         /* port unlocked */
3564                 } else {
3565                         ip_unlock(port);
3566                         task_unlock(task);
3567                 }
3568                 break;
3569
3570         default:
3571                 break;
3572         }
3573
3574         task_suspension_token_deallocate(task); /* drop token reference */
3575         return TRUE;
3576 }
3577
3578 static kern_return_t
3579 task_pidsuspend_locked(task_t task)
3580 {
3581         kern_return_t kr;
3582
3583         if (task->pidsuspended) {
3584                 kr = KERN_FAILURE;
3585                 goto out;
3586         }
3587
3588         task->pidsuspended = TRUE;
3589
3590         kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
3591         if (kr != KERN_SUCCESS) {
3592                 task->pidsuspended = FALSE;
3593         }
3594 out:
3595         return kr;
3596 }
3597
3598
3599 /*
3600  *      task_pidsuspend:
3601  *
3602  *      Suspends a task by placing a hold on its threads.
3603  *
3604  * Conditions:
3605  *      The caller holds a reference to the task
3606  */
3607 kern_return_t
3608 task_pidsuspend(
3609         task_t          task)
3610 {
3611         kern_return_t    kr;
3612
3613         if (task == TASK_NULL || task == kernel_task) {
3614                 return KERN_INVALID_ARGUMENT;
3615         }
3616
3617         task_lock(task);
3618
3619         kr = task_pidsuspend_locked(task);
3620
3621         task_unlock(task);
3622
3623         if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
3624                 iokit_task_app_suspended_changed(task);
3625         }
3626
3627         return kr;
3628 }
3629
3630 /*
3631  *      task_pidresume:
3632  *              Resumes a previously suspended task.
3633  *
3634  * Conditions:
3635  *              The caller holds a reference to the task
3636  */
3637 kern_return_t
3638 task_pidresume(
3639         task_t  task)
3640 {
3641         kern_return_t    kr;
3642
3643         if (task == TASK_NULL || task == kernel_task) {
3644                 return KERN_INVALID_ARGUMENT;
3645         }
3646
3647         task_lock(task);
3648
3649 #if CONFIG_FREEZE
3650
3651         while (task->changing_freeze_state) {
3652                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3653                 task_unlock(task);
3654                 thread_block(THREAD_CONTINUE_NULL);
3655
3656                 task_lock(task);
3657         }
3658         task->changing_freeze_state = TRUE;
3659 #endif
3660
3661         kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
3662
3663         task_unlock(task);
3664
3665         if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
3666                 iokit_task_app_suspended_changed(task);
3667         }
3668
3669 #if CONFIG_FREEZE
3670
3671         task_lock(task);
3672
3673         if (kr == KERN_SUCCESS) {
3674                 task->frozen = FALSE;
3675         }
3676         task->changing_freeze_state = FALSE;
3677         thread_wakeup(&task->changing_freeze_state);
3678
3679         task_unlock(task);
3680 #endif
3681
3682         return kr;
3683 }
3684
3685 os_refgrp_decl(static, task_watchports_refgrp, "task_watchports", NULL);
3686
3687 /*
3688  *      task_add_turnstile_watchports:
3689  *              Setup watchports to boost the main thread of the task.
3690  *
3691  *      Arguments:
3692  *              task: task being spawned
3693  *              thread: main thread of task
3694  *              portwatch_ports: array of watchports
3695  *              portwatch_count: number of watchports
3696  *
3697  *      Conditions:
3698  *              Nothing locked.
3699  */
3700 void
3701 task_add_turnstile_watchports(
3702         task_t          task,
3703         thread_t        thread,
3704         ipc_port_t      *portwatch_ports,
3705         uint32_t        portwatch_count)
3706 {
3707         struct task_watchports *watchports = NULL;
3708         struct task_watchport_elem *previous_elem_array[TASK_MAX_WATCHPORT_COUNT] = {};
3709         os_ref_count_t refs;
3710
3711         /* Check if the task has terminated */
3712         if (!task->active) {
3713                 return;
3714         }
3715
3716         assert(portwatch_count <= TASK_MAX_WATCHPORT_COUNT);
3717
3718         watchports = task_watchports_alloc_init(task, thread, portwatch_count);
3719
3720         /* Lock the ipc space */
3721         is_write_lock(task->itk_space);
3722
3723         /* Setup watchports to boost the main thread */
3724         refs = task_add_turnstile_watchports_locked(task,
3725             watchports, previous_elem_array, portwatch_ports,
3726             portwatch_count);
3727
3728         /* Drop the space lock */
3729         is_write_unlock(task->itk_space);
3730
3731         if (refs == 0) {
3732                 task_watchports_deallocate(watchports);
3733         }
3734
3735         /* Drop the ref on previous_elem_array */
3736         for (uint32_t i = 0; i < portwatch_count && previous_elem_array[i] != NULL; i++) {
3737                 task_watchport_elem_deallocate(previous_elem_array[i]);
3738         }
3739 }
3740
3741 /*
3742  *      task_remove_turnstile_watchports:
3743  *              Clear all turnstile boost on the task from watchports.
3744  *
3745  *      Arguments:
3746  *              task: task being terminated
3747  *
3748  *      Conditions:
3749  *              Nothing locked.
3750  */
3751 void
3752 task_remove_turnstile_watchports(
3753         task_t          task)
3754 {
3755         os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
3756         struct task_watchports *watchports = NULL;
3757         ipc_port_t port_freelist[TASK_MAX_WATCHPORT_COUNT] = {};
3758         uint32_t portwatch_count;
3759
3760         /* Lock the ipc space */
3761         is_write_lock(task->itk_space);
3762
3763         /* Check if watchport boost exist */
3764         if (task->watchports == NULL) {
3765                 is_write_unlock(task->itk_space);
3766                 return;
3767         }
3768         watchports = task->watchports;
3769         portwatch_count = watchports->tw_elem_array_count;
3770
3771         refs = task_remove_turnstile_watchports_locked(task, watchports,
3772             port_freelist);
3773
3774         is_write_unlock(task->itk_space);
3775
3776         /* Drop all the port references */
3777         for (uint32_t i = 0; i < portwatch_count && port_freelist[i] != NULL; i++) {
3778                 ip_release(port_freelist[i]);
3779         }
3780
3781         /* Clear the task and thread references for task_watchport */
3782         if (refs == 0) {
3783                 task_watchports_deallocate(watchports);
3784         }
3785 }
3786
3787 /*
3788  *      task_transfer_turnstile_watchports:
3789  *              Transfer all watchport turnstile boost from old task to new task.
3790  *
3791  *      Arguments:
3792  *              old_task: task calling exec
3793  *              new_task: new exec'ed task
3794  *              thread: main thread of new task
3795  *
3796  *      Conditions:
3797  *              Nothing locked.
3798  */
3799 void
3800 task_transfer_turnstile_watchports(
3801         task_t   old_task,
3802         task_t   new_task,
3803         thread_t new_thread)
3804 {
3805         struct task_watchports *old_watchports = NULL;
3806         struct task_watchports *new_watchports = NULL;
3807         os_ref_count_t old_refs = TASK_MAX_WATCHPORT_COUNT;
3808         os_ref_count_t new_refs = TASK_MAX_WATCHPORT_COUNT;
3809         uint32_t portwatch_count;
3810
3811         if (old_task->watchports == NULL || !new_task->active) {
3812                 return;
3813         }
3814
3815         /* Get the watch port count from the old task */
3816         is_write_lock(old_task->itk_space);
3817         if (old_task->watchports == NULL) {
3818                 is_write_unlock(old_task->itk_space);
3819                 return;
3820         }
3821
3822         portwatch_count = old_task->watchports->tw_elem_array_count;
3823         is_write_unlock(old_task->itk_space);
3824
3825         new_watchports = task_watchports_alloc_init(new_task, new_thread, portwatch_count);
3826
3827         /* Lock the ipc space for old task */
3828         is_write_lock(old_task->itk_space);
3829
3830         /* Lock the ipc space for new task */
3831         is_write_lock(new_task->itk_space);
3832
3833         /* Check if watchport boost exist */
3834         if (old_task->watchports == NULL || !new_task->active) {
3835                 is_write_unlock(new_task->itk_space);
3836                 is_write_unlock(old_task->itk_space);
3837                 (void)task_watchports_release(new_watchports);
3838                 task_watchports_deallocate(new_watchports);
3839                 return;
3840         }
3841
3842         old_watchports = old_task->watchports;
3843         assert(portwatch_count == old_task->watchports->tw_elem_array_count);
3844
3845         /* Setup new task watchports */
3846         new_task->watchports = new_watchports;
3847
3848         for (uint32_t i = 0; i < portwatch_count; i++) {
3849                 ipc_port_t port = old_watchports->tw_elem[i].twe_port;
3850
3851                 if (port == NULL) {
3852                         task_watchport_elem_clear(&new_watchports->tw_elem[i]);
3853                         continue;
3854                 }
3855
3856                 /* Lock the port and check if it has the entry */
3857                 ip_lock(port);
3858                 imq_lock(&port->ip_messages);
3859
3860                 task_watchport_elem_init(&new_watchports->tw_elem[i], new_task, port);
3861
3862                 if (ipc_port_replace_watchport_elem_conditional_locked(port,
3863                     &old_watchports->tw_elem[i], &new_watchports->tw_elem[i]) == KERN_SUCCESS) {
3864                         task_watchport_elem_clear(&old_watchports->tw_elem[i]);
3865
3866                         task_watchports_retain(new_watchports);
3867                         old_refs = task_watchports_release(old_watchports);
3868
3869                         /* Check if all ports are cleaned */
3870                         if (old_refs == 0) {
3871                                 old_task->watchports = NULL;
3872                         }
3873                 } else {
3874                         task_watchport_elem_clear(&new_watchports->tw_elem[i]);
3875                 }
3876                 /* mqueue and port unlocked by ipc_port_replace_watchport_elem_conditional_locked */
3877         }
3878
3879         /* Drop the reference on new task_watchports struct returned by task_watchports_alloc_init */
3880         new_refs = task_watchports_release(new_watchports);
3881         if (new_refs == 0) {
3882                 new_task->watchports = NULL;
3883         }
3884
3885         is_write_unlock(new_task->itk_space);
3886         is_write_unlock(old_task->itk_space);
3887
3888         /* Clear the task and thread references for old_watchport */
3889         if (old_refs == 0) {
3890                 task_watchports_deallocate(old_watchports);
3891         }
3892
3893         /* Clear the task and thread references for new_watchport */
3894         if (new_refs == 0) {
3895                 task_watchports_deallocate(new_watchports);
3896         }
3897 }
3898
3899 /*
3900  *      task_add_turnstile_watchports_locked:
3901  *              Setup watchports to boost the main thread of the task.
3902  *
3903  *      Arguments:
3904  *              task: task to boost
3905  *              watchports: watchport structure to be attached to the task
3906  *              previous_elem_array: an array of old watchport_elem to be returned to caller
3907  *              portwatch_ports: array of watchports
3908  *              portwatch_count: number of watchports
3909  *
3910  *      Conditions:
3911  *              ipc space of the task locked.
3912  *              returns array of old watchport_elem in previous_elem_array
3913  */
3914 static os_ref_count_t
3915 task_add_turnstile_watchports_locked(
3916         task_t                      task,
3917         struct task_watchports      *watchports,
3918         struct task_watchport_elem  **previous_elem_array,
3919         ipc_port_t                  *portwatch_ports,
3920         uint32_t                    portwatch_count)
3921 {
3922         os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
3923
3924         /* Check if the task is still active */
3925         if (!task->active) {
3926                 refs = task_watchports_release(watchports);
3927                 return refs;
3928         }
3929
3930         assert(task->watchports == NULL);
3931         task->watchports = watchports;
3932
3933         for (uint32_t i = 0, j = 0; i < portwatch_count; i++) {
3934                 ipc_port_t port = portwatch_ports[i];
3935
3936                 task_watchport_elem_init(&watchports->tw_elem[i], task, port);
3937                 if (port == NULL) {
3938                         task_watchport_elem_clear(&watchports->tw_elem[i]);
3939                         continue;
3940                 }
3941
3942                 ip_lock(port);
3943                 imq_lock(&port->ip_messages);
3944
3945                 /* Check if port is in valid state to be setup as watchport */
3946                 if (ipc_port_add_watchport_elem_locked(port, &watchports->tw_elem[i],
3947                     &previous_elem_array[j]) != KERN_SUCCESS) {
3948                         task_watchport_elem_clear(&watchports->tw_elem[i]);
3949                         continue;
3950                 }
3951                 /* port and mqueue unlocked on return */
3952
3953                 ip_reference(port);
3954                 task_watchports_retain(watchports);
3955                 if (previous_elem_array[j] != NULL) {
3956                         j++;
3957                 }
3958         }
3959
3960         /* Drop the reference on task_watchport struct returned by os_ref_init */
3961         refs = task_watchports_release(watchports);
3962         if (refs == 0) {
3963                 task->watchports = NULL;
3964         }
3965
3966         return refs;
3967 }
3968
3969 /*
3970  *      task_remove_turnstile_watchports_locked:
3971  *              Clear all turnstile boost on the task from watchports.
3972  *
3973  *      Arguments:
3974  *              task: task to remove watchports from
3975  *              watchports: watchports structure for the task
3976  *              port_freelist: array of ports returned with ref to caller
3977  *
3978  *
3979  *      Conditions:
3980  *              ipc space of the task locked.
3981  *              array of ports with refs are returned in port_freelist
3982  */
3983 static os_ref_count_t
3984 task_remove_turnstile_watchports_locked(
3985         task_t                 task,
3986         struct task_watchports *watchports,
3987         ipc_port_t             *port_freelist)
3988 {
3989         os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
3990
3991         for (uint32_t i = 0, j = 0; i < watchports->tw_elem_array_count; i++) {
3992                 ipc_port_t port = watchports->tw_elem[i].twe_port;
3993                 if (port == NULL) {
3994                         continue;
3995                 }
3996
3997                 /* Lock the port and check if it has the entry */
3998                 ip_lock(port);
3999                 imq_lock(&port->ip_messages);
4000                 if (ipc_port_clear_watchport_elem_internal_conditional_locked(port,
4001                     &watchports->tw_elem[i]) == KERN_SUCCESS) {
4002                         task_watchport_elem_clear(&watchports->tw_elem[i]);
4003                         port_freelist[j++] = port;
4004                         refs = task_watchports_release(watchports);
4005
4006                         /* Check if all ports are cleaned */
4007                         if (refs == 0) {
4008                                 task->watchports = NULL;
4009                                 break;
4010                         }
4011                 }
4012                 /* mqueue and port unlocked by ipc_port_clear_watchport_elem_internal_conditional_locked */
4013         }
4014         return refs;
4015 }
4016
4017 /*
4018  *      task_watchports_alloc_init:
4019  *              Allocate and initialize task watchport struct.
4020  *
4021  *      Conditions:
4022  *              Nothing locked.
4023  */
4024 static struct task_watchports *
4025 task_watchports_alloc_init(
4026         task_t        task,
4027         thread_t      thread,
4028         uint32_t      count)
4029 {
4030         struct task_watchports *watchports = kalloc(sizeof(struct task_watchports) +
4031             count * sizeof(struct task_watchport_elem));
4032
4033         task_reference(task);
4034         thread_reference(thread);
4035         watchports->tw_task = task;
4036         watchports->tw_thread = thread;
4037         watchports->tw_elem_array_count = count;
4038         os_ref_init(&watchports->tw_refcount, &task_watchports_refgrp);
4039
4040         return watchports;
4041 }
4042
4043 /*
4044  *      task_watchports_deallocate:
4045  *              Deallocate task watchport struct.
4046  *
4047  *      Conditions:
4048  *              Nothing locked.
4049  */
4050 static void
4051 task_watchports_deallocate(
4052         struct task_watchports *watchports)
4053 {
4054         uint32_t portwatch_count = watchports->tw_elem_array_count;
4055
4056         task_deallocate(watchports->tw_task);
4057         thread_deallocate(watchports->tw_thread);
4058         kfree(watchports, sizeof(struct task_watchports) + portwatch_count * sizeof(struct task_watchport_elem));
4059 }
4060
4061 /*
4062  *      task_watchport_elem_deallocate:
4063  *              Deallocate task watchport element and release its ref on task_watchport.
4064  *
4065  *      Conditions:
4066  *              Nothing locked.
4067  */
4068 void
4069 task_watchport_elem_deallocate(
4070         struct task_watchport_elem *watchport_elem)
4071 {
4072         os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4073         task_t task = watchport_elem->twe_task;
4074         struct task_watchports *watchports = NULL;
4075         ipc_port_t port = NULL;
4076
4077         assert(task != NULL);
4078
4079         /* Take the space lock to modify the elememt */
4080         is_write_lock(task->itk_space);
4081
4082         watchports = task->watchports;
4083         assert(watchports != NULL);
4084
4085         port = watchport_elem->twe_port;
4086         assert(port != NULL);
4087
4088         task_watchport_elem_clear(watchport_elem);
4089         refs = task_watchports_release(watchports);
4090
4091         if (refs == 0) {
4092                 task->watchports = NULL;
4093         }
4094
4095         is_write_unlock(task->itk_space);
4096
4097         ip_release(port);
4098         if (refs == 0) {
4099                 task_watchports_deallocate(watchports);
4100         }
4101 }
4102
4103 /*
4104  *      task_has_watchports:
4105  *              Return TRUE if task has watchport boosts.
4106  *
4107  *      Conditions:
4108  *              Nothing locked.
4109  */
4110 boolean_t
4111 task_has_watchports(task_t task)
4112 {
4113         return task->watchports != NULL;
4114 }
4115
4116 #if DEVELOPMENT || DEBUG
4117
4118 extern void IOSleep(int);
4119
4120 kern_return_t
4121 task_disconnect_page_mappings(task_t task)
4122 {
4123         int     n;
4124
4125         if (task == TASK_NULL || task == kernel_task) {
4126                 return KERN_INVALID_ARGUMENT;
4127         }
4128
4129         /*
4130          * this function is used to strip all of the mappings from
4131          * the pmap for the specified task to force the task to
4132          * re-fault all of the pages it is actively using... this
4133          * allows us to approximate the true working set of the
4134          * specified task.  We only engage if at least 1 of the
4135          * threads in the task is runnable, but we want to continuously
4136          * sweep (at least for a while - I've arbitrarily set the limit at
4137          * 100 sweeps to be re-looked at as we gain experience) to get a better
4138          * view into what areas within a page are being visited (as opposed to only
4139          * seeing the first fault of a page after the task becomes
4140          * runnable)...  in the future I may
4141          * try to block until awakened by a thread in this task
4142          * being made runnable, but for now we'll periodically poll from the
4143          * user level debug tool driving the sysctl
4144          */
4145         for (n = 0; n < 100; n++) {
4146                 thread_t        thread;
4147                 boolean_t       runnable;
4148                 boolean_t       do_unnest;
4149                 int             page_count;
4150
4151                 runnable = FALSE;
4152                 do_unnest = FALSE;
4153
4154                 task_lock(task);
4155
4156                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4157                         if (thread->state & TH_RUN) {
4158                                 runnable = TRUE;
4159                                 break;
4160                         }
4161                 }
4162                 if (n == 0) {
4163                         task->task_disconnected_count++;
4164                 }
4165
4166                 if (task->task_unnested == FALSE) {
4167                         if (runnable == TRUE) {
4168                                 task->task_unnested = TRUE;
4169                                 do_unnest = TRUE;
4170                         }
4171                 }
4172                 task_unlock(task);
4173
4174                 if (runnable == FALSE) {
4175                         break;
4176                 }
4177
4178                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
4179                     task, do_unnest, task->task_disconnected_count, 0, 0);
4180
4181                 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
4182
4183                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
4184                     task, page_count, 0, 0, 0);
4185
4186                 if ((n % 5) == 4) {
4187                         IOSleep(1);
4188                 }
4189         }
4190         return KERN_SUCCESS;
4191 }
4192
4193 #endif
4194
4195
4196 #if CONFIG_FREEZE
4197
4198 /*
4199  *      task_freeze:
4200  *
4201  *      Freeze a task.
4202  *
4203  * Conditions:
4204  *      The caller holds a reference to the task
4205  */
4206 extern void             vm_wake_compactor_swapper(void);
4207 extern queue_head_t     c_swapout_list_head;
4208
4209 kern_return_t
4210 task_freeze(
4211         task_t    task,
4212         uint32_t           *purgeable_count,
4213         uint32_t           *wired_count,
4214         uint32_t           *clean_count,
4215         uint32_t           *dirty_count,
4216         uint32_t           dirty_budget,
4217         uint32_t           *shared_count,
4218         int                *freezer_error_code,
4219         boolean_t          eval_only)
4220 {
4221         kern_return_t kr = KERN_SUCCESS;
4222
4223         if (task == TASK_NULL || task == kernel_task) {
4224                 return KERN_INVALID_ARGUMENT;
4225         }
4226
4227         task_lock(task);
4228
4229         while (task->changing_freeze_state) {
4230                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4231                 task_unlock(task);
4232                 thread_block(THREAD_CONTINUE_NULL);
4233
4234                 task_lock(task);
4235         }
4236         if (task->frozen) {
4237                 task_unlock(task);
4238                 return KERN_FAILURE;
4239         }
4240         task->changing_freeze_state = TRUE;
4241
4242         task_unlock(task);
4243
4244         kr = vm_map_freeze(task,
4245             purgeable_count,
4246             wired_count,
4247             clean_count,
4248             dirty_count,
4249             dirty_budget,
4250             shared_count,
4251             freezer_error_code,
4252             eval_only);
4253
4254         task_lock(task);
4255
4256         if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
4257                 task->frozen = TRUE;
4258         }
4259
4260         task->changing_freeze_state = FALSE;
4261         thread_wakeup(&task->changing_freeze_state);
4262
4263         task_unlock(task);
4264
4265         if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
4266             (eval_only == FALSE)) {
4267                 vm_wake_compactor_swapper();
4268                 /*
4269                  * We do an explicit wakeup of the swapout thread here
4270                  * because the compact_and_swap routines don't have
4271                  * knowledge about these kind of "per-task packed c_segs"
4272                  * and so will not be evaluating whether we need to do
4273                  * a wakeup there.
4274                  */
4275                 thread_wakeup((event_t)&c_swapout_list_head);
4276         }
4277
4278         return kr;
4279 }
4280
4281 /*
4282  *      task_thaw:
4283  *
4284  *      Thaw a currently frozen task.
4285  *
4286  * Conditions:
4287  *      The caller holds a reference to the task
4288  */
4289 kern_return_t
4290 task_thaw(
4291         task_t          task)
4292 {
4293         if (task == TASK_NULL || task == kernel_task) {
4294                 return KERN_INVALID_ARGUMENT;
4295         }
4296
4297         task_lock(task);
4298
4299         while (task->changing_freeze_state) {
4300                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4301                 task_unlock(task);
4302                 thread_block(THREAD_CONTINUE_NULL);
4303
4304                 task_lock(task);
4305         }
4306         if (!task->frozen) {
4307                 task_unlock(task);
4308                 return KERN_FAILURE;
4309         }
4310         task->frozen = FALSE;
4311
4312         task_unlock(task);
4313
4314         return KERN_SUCCESS;
4315 }
4316
4317 #endif /* CONFIG_FREEZE */
4318
4319 kern_return_t
4320 host_security_set_task_token(
4321         host_security_t  host_security,
4322         task_t           task,
4323         security_token_t sec_token,
4324         audit_token_t    audit_token,
4325         host_priv_t      host_priv)
4326 {
4327         ipc_port_t       host_port;
4328         kern_return_t    kr;
4329
4330         if (task == TASK_NULL) {
4331                 return KERN_INVALID_ARGUMENT;
4332         }
4333
4334         if (host_security == HOST_NULL) {
4335                 return KERN_INVALID_SECURITY;
4336         }
4337
4338         task_lock(task);
4339         task->sec_token = sec_token;
4340         task->audit_token = audit_token;
4341
4342         task_unlock(task);
4343
4344         if (host_priv != HOST_PRIV_NULL) {
4345                 kr = host_get_host_priv_port(host_priv, &host_port);
4346         } else {
4347                 kr = host_get_host_port(host_priv_self(), &host_port);
4348         }
4349         assert(kr == KERN_SUCCESS);
4350         kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
4351         return kr;
4352 }
4353
4354 kern_return_t
4355 task_send_trace_memory(
4356         __unused task_t   target_task,
4357         __unused uint32_t pid,
4358         __unused uint64_t uniqueid)
4359 {
4360         return KERN_INVALID_ARGUMENT;
4361 }
4362
4363 /*
4364  * This routine was added, pretty much exclusively, for registering the
4365  * RPC glue vector for in-kernel short circuited tasks.  Rather than
4366  * removing it completely, I have only disabled that feature (which was
4367  * the only feature at the time).  It just appears that we are going to
4368  * want to add some user data to tasks in the future (i.e. bsd info,
4369  * task names, etc...), so I left it in the formal task interface.
4370  */
4371 kern_return_t
4372 task_set_info(
4373         task_t          task,
4374         task_flavor_t   flavor,
4375         __unused task_info_t    task_info_in,           /* pointer to IN array */
4376         __unused mach_msg_type_number_t task_info_count)
4377 {
4378         if (task == TASK_NULL) {
4379                 return KERN_INVALID_ARGUMENT;
4380         }
4381
4382         switch (flavor) {
4383 #if CONFIG_ATM
4384         case TASK_TRACE_MEMORY_INFO:
4385         {
4386                 if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT) {
4387                         return KERN_INVALID_ARGUMENT;
4388                 }
4389
4390                 assert(task_info_in != NULL);
4391                 task_trace_memory_info_t mem_info;
4392                 mem_info = (task_trace_memory_info_t) task_info_in;
4393                 kern_return_t kr = atm_register_trace_memory(task,
4394                     mem_info->user_memory_address,
4395                     mem_info->buffer_size);
4396                 return kr;
4397         }
4398
4399 #endif
4400         default:
4401                 return KERN_INVALID_ARGUMENT;
4402         }
4403         return KERN_SUCCESS;
4404 }
4405
4406 int radar_20146450 = 1;
4407 kern_return_t
4408 task_info(
4409         task_t                  task,
4410         task_flavor_t           flavor,
4411         task_info_t             task_info_out,
4412         mach_msg_type_number_t  *task_info_count)
4413 {
4414         kern_return_t error = KERN_SUCCESS;
4415         mach_msg_type_number_t  original_task_info_count;
4416
4417         if (task == TASK_NULL) {
4418                 return KERN_INVALID_ARGUMENT;
4419         }
4420
4421         original_task_info_count = *task_info_count;
4422         task_lock(task);
4423
4424         if ((task != current_task()) && (!task->active)) {
4425                 task_unlock(task);
4426                 return KERN_INVALID_ARGUMENT;
4427         }
4428
4429         switch (flavor) {
4430         case TASK_BASIC_INFO_32:
4431         case TASK_BASIC2_INFO_32:
4432 #if defined(__arm__) || defined(__arm64__)
4433         case TASK_BASIC_INFO_64:
4434 #endif
4435                 {
4436                         task_basic_info_32_t    basic_info;
4437                         vm_map_t                                map;
4438                         clock_sec_t                             secs;
4439                         clock_usec_t                    usecs;
4440
4441                         if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
4442                                 error = KERN_INVALID_ARGUMENT;
4443                                 break;
4444                         }
4445
4446                         basic_info = (task_basic_info_32_t)task_info_out;
4447
4448                         map = (task == kernel_task)? kernel_map: task->map;
4449                         basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
4450                         if (flavor == TASK_BASIC2_INFO_32) {
4451                                 /*
4452                                  * The "BASIC2" flavor gets the maximum resident
4453                                  * size instead of the current resident size...
4454                                  */
4455                                 basic_info->resident_size = pmap_resident_max(map->pmap);
4456                         } else {
4457                                 basic_info->resident_size = pmap_resident_count(map->pmap);
4458                         }
4459                         basic_info->resident_size *= PAGE_SIZE;
4460
4461                         basic_info->policy = ((task != kernel_task)?
4462                             POLICY_TIMESHARE: POLICY_RR);
4463                         basic_info->suspend_count = task->user_stop_count;
4464
4465                         absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4466                         basic_info->user_time.seconds =
4467                             (typeof(basic_info->user_time.seconds))secs;
4468                         basic_info->user_time.microseconds = usecs;
4469
4470                         absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4471                         basic_info->system_time.seconds =
4472                             (typeof(basic_info->system_time.seconds))secs;
4473                         basic_info->system_time.microseconds = usecs;
4474
4475                         *task_info_count = TASK_BASIC_INFO_32_COUNT;
4476                         break;
4477                 }
4478
4479 #if defined(__arm__) || defined(__arm64__)
4480         case TASK_BASIC_INFO_64_2:
4481         {
4482                 task_basic_info_64_2_t  basic_info;
4483                 vm_map_t                                map;
4484                 clock_sec_t                             secs;
4485                 clock_usec_t                    usecs;
4486
4487                 if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
4488                         error = KERN_INVALID_ARGUMENT;
4489                         break;
4490                 }
4491
4492                 basic_info = (task_basic_info_64_2_t)task_info_out;
4493
4494                 map = (task == kernel_task)? kernel_map: task->map;
4495                 basic_info->virtual_size  = map->size;
4496                 basic_info->resident_size =
4497                     (mach_vm_size_t)(pmap_resident_count(map->pmap))
4498                     * PAGE_SIZE_64;
4499
4500                 basic_info->policy = ((task != kernel_task)?
4501                     POLICY_TIMESHARE: POLICY_RR);
4502                 basic_info->suspend_count = task->user_stop_count;
4503
4504                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4505                 basic_info->user_time.seconds =
4506                     (typeof(basic_info->user_time.seconds))secs;
4507                 basic_info->user_time.microseconds = usecs;
4508
4509                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4510                 basic_info->system_time.seconds =
4511                     (typeof(basic_info->system_time.seconds))secs;
4512                 basic_info->system_time.microseconds = usecs;
4513
4514                 *task_info_count = TASK_BASIC_INFO_64_2_COUNT;
4515                 break;
4516         }
4517
4518 #else /* defined(__arm__) || defined(__arm64__) */
4519         case TASK_BASIC_INFO_64:
4520         {
4521                 task_basic_info_64_t    basic_info;
4522                 vm_map_t                                map;
4523                 clock_sec_t                             secs;
4524                 clock_usec_t                    usecs;
4525
4526                 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
4527                         error = KERN_INVALID_ARGUMENT;
4528                         break;
4529                 }
4530
4531                 basic_info = (task_basic_info_64_t)task_info_out;
4532
4533                 map = (task == kernel_task)? kernel_map: task->map;
4534                 basic_info->virtual_size  = map->size;
4535                 basic_info->resident_size =
4536                     (mach_vm_size_t)(pmap_resident_count(map->pmap))
4537                     * PAGE_SIZE_64;
4538
4539                 basic_info->policy = ((task != kernel_task)?
4540                     POLICY_TIMESHARE: POLICY_RR);
4541                 basic_info->suspend_count = task->user_stop_count;
4542
4543                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4544                 basic_info->user_time.seconds =
4545                     (typeof(basic_info->user_time.seconds))secs;
4546                 basic_info->user_time.microseconds = usecs;
4547
4548                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4549                 basic_info->system_time.seconds =
4550                     (typeof(basic_info->system_time.seconds))secs;
4551                 basic_info->system_time.microseconds = usecs;
4552
4553                 *task_info_count = TASK_BASIC_INFO_64_COUNT;
4554                 break;
4555         }
4556 #endif /* defined(__arm__) || defined(__arm64__) */
4557
4558         case MACH_TASK_BASIC_INFO:
4559         {
4560                 mach_task_basic_info_t  basic_info;
4561                 vm_map_t                map;
4562                 clock_sec_t             secs;
4563                 clock_usec_t            usecs;
4564
4565                 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
4566                         error = KERN_INVALID_ARGUMENT;
4567                         break;
4568                 }
4569
4570                 basic_info = (mach_task_basic_info_t)task_info_out;
4571
4572                 map = (task == kernel_task) ? kernel_map : task->map;
4573
4574                 basic_info->virtual_size  = map->size;
4575
4576                 basic_info->resident_size =
4577                     (mach_vm_size_t)(pmap_resident_count(map->pmap));
4578                 basic_info->resident_size *= PAGE_SIZE_64;
4579
4580                 basic_info->resident_size_max =
4581                     (mach_vm_size_t)(pmap_resident_max(map->pmap));
4582                 basic_info->resident_size_max *= PAGE_SIZE_64;
4583
4584                 basic_info->policy = ((task != kernel_task) ?
4585                     POLICY_TIMESHARE : POLICY_RR);
4586
4587                 basic_info->suspend_count = task->user_stop_count;
4588
4589                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4590                 basic_info->user_time.seconds =
4591                     (typeof(basic_info->user_time.seconds))secs;
4592                 basic_info->user_time.microseconds = usecs;
4593
4594                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4595                 basic_info->system_time.seconds =
4596                     (typeof(basic_info->system_time.seconds))secs;
4597                 basic_info->system_time.microseconds = usecs;
4598
4599                 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
4600                 break;
4601         }
4602
4603         case TASK_THREAD_TIMES_INFO:
4604         {
4605                 task_thread_times_info_t        times_info;
4606                 thread_t                                        thread;
4607
4608                 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
4609                         error = KERN_INVALID_ARGUMENT;
4610                         break;
4611                 }
4612
4613                 times_info = (task_thread_times_info_t) task_info_out;
4614                 times_info->user_time.seconds = 0;
4615                 times_info->user_time.microseconds = 0;
4616                 times_info->system_time.seconds = 0;
4617                 times_info->system_time.microseconds = 0;
4618
4619
4620                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4621                         time_value_t    user_time, system_time;
4622
4623                         if (thread->options & TH_OPT_IDLE_THREAD) {
4624                                 continue;
4625                         }
4626
4627                         thread_read_times(thread, &user_time, &system_time, NULL);
4628
4629                         time_value_add(&times_info->user_time, &user_time);
4630                         time_value_add(&times_info->system_time, &system_time);
4631                 }
4632
4633                 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
4634                 break;
4635         }
4636
4637         case TASK_ABSOLUTETIME_INFO:
4638         {
4639                 task_absolutetime_info_t        info;
4640                 thread_t                        thread;
4641
4642                 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
4643                         error = KERN_INVALID_ARGUMENT;
4644                         break;
4645                 }
4646
4647                 info = (task_absolutetime_info_t)task_info_out;
4648                 info->threads_user = info->threads_system = 0;
4649
4650
4651                 info->total_user = task->total_user_time;
4652                 info->total_system = task->total_system_time;
4653
4654                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4655                         uint64_t        tval;
4656                         spl_t           x;
4657
4658                         if (thread->options & TH_OPT_IDLE_THREAD) {
4659                                 continue;
4660                         }
4661
4662                         x = splsched();
4663                         thread_lock(thread);
4664
4665                         tval = timer_grab(&thread->user_timer);
4666                         info->threads_user += tval;
4667                         info->total_user += tval;
4668
4669                         tval = timer_grab(&thread->system_timer);
4670                         if (thread->precise_user_kernel_time) {
4671                                 info->threads_system += tval;
4672                                 info->total_system += tval;
4673                         } else {
4674                                 /* system_timer may represent either sys or user */
4675                                 info->threads_user += tval;
4676                                 info->total_user += tval;
4677                         }
4678
4679                         thread_unlock(thread);
4680                         splx(x);
4681                 }
4682
4683
4684                 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
4685                 break;
4686         }
4687
4688         case TASK_DYLD_INFO:
4689         {
4690                 task_dyld_info_t info;
4691
4692                 /*
4693                  * We added the format field to TASK_DYLD_INFO output.  For
4694                  * temporary backward compatibility, accept the fact that
4695                  * clients may ask for the old version - distinquished by the
4696                  * size of the expected result structure.
4697                  */
4698 #define TASK_LEGACY_DYLD_INFO_COUNT \
4699                 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
4700
4701                 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
4702                         error = KERN_INVALID_ARGUMENT;
4703                         break;
4704                 }
4705
4706                 info = (task_dyld_info_t)task_info_out;
4707                 info->all_image_info_addr = task->all_image_info_addr;
4708                 info->all_image_info_size = task->all_image_info_size;
4709
4710                 /* only set format on output for those expecting it */
4711                 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
4712                         info->all_image_info_format = task_has_64Bit_addr(task) ?
4713                             TASK_DYLD_ALL_IMAGE_INFO_64 :
4714                             TASK_DYLD_ALL_IMAGE_INFO_32;
4715                         *task_info_count = TASK_DYLD_INFO_COUNT;
4716                 } else {
4717                         *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
4718                 }
4719                 break;
4720         }
4721
4722         case TASK_EXTMOD_INFO:
4723         {
4724                 task_extmod_info_t info;
4725                 void *p;
4726
4727                 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
4728                         error = KERN_INVALID_ARGUMENT;
4729                         break;
4730                 }
4731
4732                 info = (task_extmod_info_t)task_info_out;
4733
4734                 p = get_bsdtask_info(task);
4735                 if (p) {
4736                         proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
4737                 } else {
4738                         bzero(info->task_uuid, sizeof(info->task_uuid));
4739                 }
4740                 info->extmod_statistics = task->extmod_statistics;
4741                 *task_info_count = TASK_EXTMOD_INFO_COUNT;
4742
4743                 break;
4744         }
4745
4746         case TASK_KERNELMEMORY_INFO:
4747         {
4748                 task_kernelmemory_info_t        tkm_info;
4749                 ledger_amount_t                 credit, debit;
4750
4751                 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
4752                         error = KERN_INVALID_ARGUMENT;
4753                         break;
4754                 }
4755
4756                 tkm_info = (task_kernelmemory_info_t) task_info_out;
4757                 tkm_info->total_palloc = 0;
4758                 tkm_info->total_pfree = 0;
4759                 tkm_info->total_salloc = 0;
4760                 tkm_info->total_sfree = 0;
4761
4762                 if (task == kernel_task) {
4763                         /*
4764                          * All shared allocs/frees from other tasks count against
4765                          * the kernel private memory usage.  If we are looking up
4766                          * info for the kernel task, gather from everywhere.
4767                          */
4768                         task_unlock(task);
4769
4770                         /* start by accounting for all the terminated tasks against the kernel */
4771                         tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
4772                         tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
4773
4774                         /* count all other task/thread shared alloc/free against the kernel */
4775                         lck_mtx_lock(&tasks_threads_lock);
4776
4777                         /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
4778                         queue_iterate(&tasks, task, task_t, tasks) {
4779                                 if (task == kernel_task) {
4780                                         if (ledger_get_entries(task->ledger,
4781                                             task_ledgers.tkm_private, &credit,
4782                                             &debit) == KERN_SUCCESS) {
4783                                                 tkm_info->total_palloc += credit;
4784                                                 tkm_info->total_pfree += debit;
4785                                         }
4786                                 }
4787                                 if (!ledger_get_entries(task->ledger,
4788                                     task_ledgers.tkm_shared, &credit, &debit)) {
4789                                         tkm_info->total_palloc += credit;
4790                                         tkm_info->total_pfree += debit;
4791                                 }
4792                         }
4793                         lck_mtx_unlock(&tasks_threads_lock);
4794                 } else {
4795                         if (!ledger_get_entries(task->ledger,
4796                             task_ledgers.tkm_private, &credit, &debit)) {
4797                                 tkm_info->total_palloc = credit;
4798                                 tkm_info->total_pfree = debit;
4799                         }
4800                         if (!ledger_get_entries(task->ledger,
4801                             task_ledgers.tkm_shared, &credit, &debit)) {
4802                                 tkm_info->total_salloc = credit;
4803                                 tkm_info->total_sfree = debit;
4804                         }
4805                         task_unlock(task);
4806                 }
4807
4808                 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
4809                 return KERN_SUCCESS;
4810         }
4811
4812         /* OBSOLETE */
4813         case TASK_SCHED_FIFO_INFO:
4814         {
4815                 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
4816                         error = KERN_INVALID_ARGUMENT;
4817                         break;
4818                 }
4819
4820                 error = KERN_INVALID_POLICY;
4821                 break;
4822         }
4823
4824         /* OBSOLETE */
4825         case TASK_SCHED_RR_INFO:
4826         {
4827                 policy_rr_base_t        rr_base;
4828                 uint32_t quantum_time;
4829                 uint64_t quantum_ns;
4830
4831                 if (*task_info_count < POLICY_RR_BASE_COUNT) {
4832                         error = KERN_INVALID_ARGUMENT;
4833                         break;
4834                 }
4835
4836                 rr_base = (policy_rr_base_t) task_info_out;
4837
4838                 if (task != kernel_task) {
4839                         error = KERN_INVALID_POLICY;
4840                         break;
4841                 }
4842
4843                 rr_base->base_priority = task->priority;
4844
4845                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
4846                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
4847
4848                 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
4849
4850                 *task_info_count = POLICY_RR_BASE_COUNT;
4851                 break;
4852         }
4853
4854         /* OBSOLETE */
4855         case TASK_SCHED_TIMESHARE_INFO:
4856         {
4857                 policy_timeshare_base_t ts_base;
4858
4859                 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
4860                         error = KERN_INVALID_ARGUMENT;
4861                         break;
4862                 }
4863
4864                 ts_base = (policy_timeshare_base_t) task_info_out;
4865
4866                 if (task == kernel_task) {
4867                         error = KERN_INVALID_POLICY;
4868                         break;
4869                 }
4870
4871                 ts_base->base_priority = task->priority;
4872
4873                 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
4874                 break;
4875         }
4876
4877         case TASK_SECURITY_TOKEN:
4878         {
4879                 security_token_t        *sec_token_p;
4880
4881                 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
4882                         error = KERN_INVALID_ARGUMENT;
4883                         break;
4884                 }
4885
4886                 sec_token_p = (security_token_t *) task_info_out;
4887
4888                 *sec_token_p = task->sec_token;
4889
4890                 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
4891                 break;
4892         }
4893
4894         case TASK_AUDIT_TOKEN:
4895         {
4896                 audit_token_t   *audit_token_p;
4897
4898                 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
4899                         error = KERN_INVALID_ARGUMENT;
4900                         break;
4901                 }
4902
4903                 audit_token_p = (audit_token_t *) task_info_out;
4904
4905                 *audit_token_p = task->audit_token;
4906
4907                 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
4908                 break;
4909         }
4910
4911         case TASK_SCHED_INFO:
4912                 error = KERN_INVALID_ARGUMENT;
4913                 break;
4914
4915         case TASK_EVENTS_INFO:
4916         {
4917                 task_events_info_t      events_info;
4918                 thread_t                        thread;
4919
4920                 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
4921                         error = KERN_INVALID_ARGUMENT;
4922                         break;
4923                 }
4924
4925                 events_info = (task_events_info_t) task_info_out;
4926
4927
4928                 events_info->faults = task->faults;
4929                 events_info->pageins = task->pageins;
4930                 events_info->cow_faults = task->cow_faults;
4931                 events_info->messages_sent = task->messages_sent;
4932                 events_info->messages_received = task->messages_received;
4933                 events_info->syscalls_mach = task->syscalls_mach;
4934                 events_info->syscalls_unix = task->syscalls_unix;
4935
4936                 events_info->csw = task->c_switch;
4937
4938                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4939                         events_info->csw           += thread->c_switch;
4940                         events_info->syscalls_mach += thread->syscalls_mach;
4941                         events_info->syscalls_unix += thread->syscalls_unix;
4942                 }
4943
4944
4945                 *task_info_count = TASK_EVENTS_INFO_COUNT;
4946                 break;
4947         }
4948         case TASK_AFFINITY_TAG_INFO:
4949         {
4950                 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
4951                         error = KERN_INVALID_ARGUMENT;
4952                         break;
4953                 }
4954
4955                 error = task_affinity_info(task, task_info_out, task_info_count);
4956                 break;
4957         }
4958         case TASK_POWER_INFO:
4959         {
4960                 if (*task_info_count < TASK_POWER_INFO_COUNT) {
4961                         error = KERN_INVALID_ARGUMENT;
4962                         break;
4963                 }
4964
4965                 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL, NULL);
4966                 break;
4967         }
4968
4969         case TASK_POWER_INFO_V2:
4970         {
4971                 if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
4972                         error = KERN_INVALID_ARGUMENT;
4973                         break;
4974                 }
4975                 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
4976                 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2, NULL);
4977                 break;
4978         }
4979
4980         case TASK_VM_INFO:
4981         case TASK_VM_INFO_PURGEABLE:
4982         {
4983                 task_vm_info_t          vm_info;
4984                 vm_map_t                map;
4985
4986 #if __arm64__
4987                 struct proc *p;
4988                 uint32_t platform, sdk;
4989                 p = current_proc();
4990                 platform = proc_platform(p);
4991                 sdk = proc_sdk(p);
4992                 if (original_task_info_count > TASK_VM_INFO_REV2_COUNT &&
4993                     platform == PLATFORM_IOS &&
4994                     sdk != 0 &&
4995                     (sdk >> 16) <= 12) {
4996                         /*
4997                          * Some iOS apps pass an incorrect value for
4998                          * task_info_count, expressed in number of bytes
4999                          * instead of number of "natural_t" elements.
5000                          * For the sake of backwards binary compatibility
5001                          * for apps built with an iOS12 or older SDK and using
5002                          * the "rev2" data structure, let's fix task_info_count
5003                          * for them, to avoid stomping past the actual end
5004                          * of their buffer.
5005                          */
5006 #if DEVELOPMENT || DEBUG
5007                         printf("%s:%d %d[%s] rdar://49484582 task_info_count %d -> %d platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p), proc_name_address(p), original_task_info_count, TASK_VM_INFO_REV2_COUNT, platform, (sdk >> 16), ((sdk >> 8) & 0xff), (sdk & 0xff));
5008 #endif /* DEVELOPMENT || DEBUG */
5009                         DTRACE_VM4(workaround_task_vm_info_count,
5010                             mach_msg_type_number_t, original_task_info_count,
5011                             mach_msg_type_number_t, TASK_VM_INFO_REV2_COUNT,
5012                             uint32_t, platform,
5013                             uint32_t, sdk);
5014                         original_task_info_count = TASK_VM_INFO_REV2_COUNT;
5015                         *task_info_count = original_task_info_count;
5016                 }
5017 #endif /* __arm64__ */
5018
5019                 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
5020                         error = KERN_INVALID_ARGUMENT;
5021                         break;
5022                 }
5023
5024                 vm_info = (task_vm_info_t)task_info_out;
5025
5026                 if (task == kernel_task) {
5027                         map = kernel_map;
5028                         /* no lock */
5029                 } else {
5030                         map = task->map;
5031                         vm_map_lock_read(map);
5032                 }
5033
5034                 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
5035                 vm_info->region_count = map->hdr.nentries;
5036                 vm_info->page_size = vm_map_page_size(map);
5037
5038                 vm_info->resident_size = pmap_resident_count(map->pmap);
5039                 vm_info->resident_size *= PAGE_SIZE;
5040                 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
5041                 vm_info->resident_size_peak *= PAGE_SIZE;
5042
5043 #define _VM_INFO(_name) \
5044         vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
5045
5046                 _VM_INFO(device);
5047                 _VM_INFO(device_peak);
5048                 _VM_INFO(external);
5049                 _VM_INFO(external_peak);
5050                 _VM_INFO(internal);
5051                 _VM_INFO(internal_peak);
5052                 _VM_INFO(reusable);
5053                 _VM_INFO(reusable_peak);
5054                 _VM_INFO(compressed);
5055                 _VM_INFO(compressed_peak);
5056                 _VM_INFO(compressed_lifetime);
5057
5058                 vm_info->purgeable_volatile_pmap = 0;
5059                 vm_info->purgeable_volatile_resident = 0;
5060                 vm_info->purgeable_volatile_virtual = 0;
5061                 if (task == kernel_task) {
5062                         /*
5063                          * We do not maintain the detailed stats for the
5064                          * kernel_pmap, so just count everything as
5065                          * "internal"...
5066                          */
5067                         vm_info->internal = vm_info->resident_size;
5068                         /*
5069                          * ... but since the memory held by the VM compressor
5070                          * in the kernel address space ought to be attributed
5071                          * to user-space tasks, we subtract it from "internal"
5072                          * to give memory reporting tools a more accurate idea
5073                          * of what the kernel itself is actually using, instead
5074                          * of making it look like the kernel is leaking memory
5075                          * when the system is under memory pressure.
5076                          */
5077                         vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
5078                             PAGE_SIZE);
5079                 } else {
5080                         mach_vm_size_t  volatile_virtual_size;
5081                         mach_vm_size_t  volatile_resident_size;
5082                         mach_vm_size_t  volatile_compressed_size;
5083                         mach_vm_size_t  volatile_pmap_size;
5084                         mach_vm_size_t  volatile_compressed_pmap_size;
5085                         kern_return_t   kr;
5086
5087                         if (flavor == TASK_VM_INFO_PURGEABLE) {
5088                                 kr = vm_map_query_volatile(
5089                                         map,
5090                                         &volatile_virtual_size,
5091                                         &volatile_resident_size,
5092                                         &volatile_compressed_size,
5093                                         &volatile_pmap_size,
5094                                         &volatile_compressed_pmap_size);
5095                                 if (kr == KERN_SUCCESS) {
5096                                         vm_info->purgeable_volatile_pmap =
5097                                             volatile_pmap_size;
5098                                         if (radar_20146450) {
5099                                                 vm_info->compressed -=
5100                                                     volatile_compressed_pmap_size;
5101                                         }
5102                                         vm_info->purgeable_volatile_resident =
5103                                             volatile_resident_size;
5104                                         vm_info->purgeable_volatile_virtual =
5105                                             volatile_virtual_size;
5106                                 }
5107                         }
5108                 }
5109                 *task_info_count = TASK_VM_INFO_REV0_COUNT;
5110
5111                 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
5112                         vm_info->phys_footprint =
5113                             (mach_vm_size_t) get_task_phys_footprint(task);
5114                         *task_info_count = TASK_VM_INFO_REV1_COUNT;
5115                 }
5116                 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5117                         vm_info->min_address = map->min_offset;
5118                         vm_info->max_address = map->max_offset;
5119                         *task_info_count = TASK_VM_INFO_REV2_COUNT;
5120                 }
5121                 if (original_task_info_count >= TASK_VM_INFO_REV3_COUNT) {
5122                         ledger_get_lifetime_max(task->ledger,
5123                             task_ledgers.phys_footprint,
5124                             &vm_info->ledger_phys_footprint_peak);
5125                         ledger_get_balance(task->ledger,
5126                             task_ledgers.purgeable_nonvolatile,
5127                             &vm_info->ledger_purgeable_nonvolatile);
5128                         ledger_get_balance(task->ledger,
5129                             task_ledgers.purgeable_nonvolatile_compressed,
5130                             &vm_info->ledger_purgeable_novolatile_compressed);
5131                         ledger_get_balance(task->ledger,
5132                             task_ledgers.purgeable_volatile,
5133                             &vm_info->ledger_purgeable_volatile);
5134                         ledger_get_balance(task->ledger,
5135                             task_ledgers.purgeable_volatile_compressed,
5136                             &vm_info->ledger_purgeable_volatile_compressed);
5137                         ledger_get_balance(task->ledger,
5138                             task_ledgers.network_nonvolatile,
5139                             &vm_info->ledger_tag_network_nonvolatile);
5140                         ledger_get_balance(task->ledger,
5141                             task_ledgers.network_nonvolatile_compressed,
5142                             &vm_info->ledger_tag_network_nonvolatile_compressed);
5143                         ledger_get_balance(task->ledger,
5144                             task_ledgers.network_volatile,
5145                             &vm_info->ledger_tag_network_volatile);
5146                         ledger_get_balance(task->ledger,
5147                             task_ledgers.network_volatile_compressed,
5148                             &vm_info->ledger_tag_network_volatile_compressed);
5149                         ledger_get_balance(task->ledger,
5150                             task_ledgers.media_footprint,
5151                             &vm_info->ledger_tag_media_footprint);
5152                         ledger_get_balance(task->ledger,
5153                             task_ledgers.media_footprint_compressed,
5154                             &vm_info->ledger_tag_media_footprint_compressed);
5155                         ledger_get_balance(task->ledger,
5156                             task_ledgers.media_nofootprint,
5157                             &vm_info->ledger_tag_media_nofootprint);
5158                         ledger_get_balance(task->ledger,
5159                             task_ledgers.media_nofootprint_compressed,
5160                             &vm_info->ledger_tag_media_nofootprint_compressed);
5161                         ledger_get_balance(task->ledger,
5162                             task_ledgers.graphics_footprint,
5163                             &vm_info->ledger_tag_graphics_footprint);
5164                         ledger_get_balance(task->ledger,
5165                             task_ledgers.graphics_footprint_compressed,
5166                             &vm_info->ledger_tag_graphics_footprint_compressed);
5167                         ledger_get_balance(task->ledger,
5168                             task_ledgers.graphics_nofootprint,
5169                             &vm_info->ledger_tag_graphics_nofootprint);
5170                         ledger_get_balance(task->ledger,
5171                             task_ledgers.graphics_nofootprint_compressed,
5172                             &vm_info->ledger_tag_graphics_nofootprint_compressed);
5173                         ledger_get_balance(task->ledger,
5174                             task_ledgers.neural_footprint,
5175                             &vm_info->ledger_tag_neural_footprint);
5176                         ledger_get_balance(task->ledger,
5177                             task_ledgers.neural_footprint_compressed,
5178                             &vm_info->ledger_tag_neural_footprint_compressed);
5179                         ledger_get_balance(task->ledger,
5180                             task_ledgers.neural_nofootprint,
5181                             &vm_info->ledger_tag_neural_nofootprint);
5182                         ledger_get_balance(task->ledger,
5183                             task_ledgers.neural_nofootprint_compressed,
5184                             &vm_info->ledger_tag_neural_nofootprint_compressed);
5185                         *task_info_count = TASK_VM_INFO_REV3_COUNT;
5186                 }
5187                 if (original_task_info_count >= TASK_VM_INFO_REV4_COUNT) {
5188                         if (task->bsd_info) {
5189                                 vm_info->limit_bytes_remaining =
5190                                     memorystatus_available_memory_internal(task->bsd_info);
5191                         } else {
5192                                 vm_info->limit_bytes_remaining = 0;
5193                         }
5194                         *task_info_count = TASK_VM_INFO_REV4_COUNT;
5195                 }
5196                 if (original_task_info_count >= TASK_VM_INFO_REV5_COUNT) {
5197                         thread_t thread;
5198                         integer_t total = task->decompressions;
5199                         queue_iterate(&task->threads, thread, thread_t, task_threads) {
5200                                 total += thread->decompressions;
5201                         }
5202                         vm_info->decompressions = total;
5203                         *task_info_count = TASK_VM_INFO_REV5_COUNT;
5204                 }
5205
5206                 if (task != kernel_task) {
5207                         vm_map_unlock_read(map);
5208                 }
5209
5210                 break;
5211         }
5212
5213         case TASK_WAIT_STATE_INFO:
5214         {
5215                 /*
5216                  * Deprecated flavor. Currently allowing some results until all users
5217                  * stop calling it. The results may not be accurate.
5218                  */
5219                 task_wait_state_info_t  wait_state_info;
5220                 uint64_t total_sfi_ledger_val = 0;
5221
5222                 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
5223                         error = KERN_INVALID_ARGUMENT;
5224                         break;
5225                 }
5226
5227                 wait_state_info = (task_wait_state_info_t) task_info_out;
5228
5229                 wait_state_info->total_wait_state_time = 0;
5230                 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
5231
5232 #if CONFIG_SCHED_SFI
5233                 int i, prev_lentry = -1;
5234                 int64_t  val_credit, val_debit;
5235
5236                 for (i = 0; i < MAX_SFI_CLASS_ID; i++) {
5237                         val_credit = 0;
5238                         /*
5239                          * checking with prev_lentry != entry ensures adjacent classes
5240                          * which share the same ledger do not add wait times twice.
5241                          * Note: Use ledger() call to get data for each individual sfi class.
5242                          */
5243                         if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
5244                             KERN_SUCCESS == ledger_get_entries(task->ledger,
5245                             task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
5246                                 total_sfi_ledger_val += val_credit;
5247                         }
5248                         prev_lentry = task_ledgers.sfi_wait_times[i];
5249                 }
5250
5251 #endif /* CONFIG_SCHED_SFI */
5252                 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
5253                 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
5254
5255                 break;
5256         }
5257         case TASK_VM_INFO_PURGEABLE_ACCOUNT:
5258         {
5259 #if DEVELOPMENT || DEBUG
5260                 pvm_account_info_t      acnt_info;
5261
5262                 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
5263                         error = KERN_INVALID_ARGUMENT;
5264                         break;
5265                 }
5266
5267                 if (task_info_out == NULL) {
5268                         error = KERN_INVALID_ARGUMENT;
5269                         break;
5270                 }
5271
5272                 acnt_info = (pvm_account_info_t) task_info_out;
5273
5274                 error = vm_purgeable_account(task, acnt_info);
5275
5276                 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
5277
5278                 break;
5279 #else /* DEVELOPMENT || DEBUG */
5280                 error = KERN_NOT_SUPPORTED;
5281                 break;
5282 #endif /* DEVELOPMENT || DEBUG */
5283         }
5284         case TASK_FLAGS_INFO:
5285         {
5286                 task_flags_info_t               flags_info;
5287
5288                 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
5289                         error = KERN_INVALID_ARGUMENT;
5290                         break;
5291                 }
5292
5293                 flags_info = (task_flags_info_t)task_info_out;
5294
5295                 /* only publish the 64-bit flag of the task */
5296                 flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA);
5297
5298                 *task_info_count = TASK_FLAGS_INFO_COUNT;
5299                 break;
5300         }
5301
5302         case TASK_DEBUG_INFO_INTERNAL:
5303         {
5304 #if DEVELOPMENT || DEBUG
5305                 task_debug_info_internal_t dbg_info;
5306                 ipc_space_t space = task->itk_space;
5307                 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
5308                         error = KERN_NOT_SUPPORTED;
5309                         break;
5310                 }
5311
5312                 if (task_info_out == NULL) {
5313                         error = KERN_INVALID_ARGUMENT;
5314                         break;
5315                 }
5316                 dbg_info = (task_debug_info_internal_t) task_info_out;
5317                 dbg_info->ipc_space_size = 0;
5318
5319                 if (space) {
5320                         is_read_lock(space);
5321                         dbg_info->ipc_space_size = space->is_table_size;
5322                         is_read_unlock(space);
5323                 }
5324
5325                 dbg_info->suspend_count = task->suspend_count;
5326
5327                 error = KERN_SUCCESS;
5328                 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
5329                 break;
5330 #else /* DEVELOPMENT || DEBUG */
5331                 error = KERN_NOT_SUPPORTED;
5332                 break;
5333 #endif /* DEVELOPMENT || DEBUG */
5334         }
5335         default:
5336                 error = KERN_INVALID_ARGUMENT;
5337         }
5338
5339         task_unlock(task);
5340         return error;
5341 }
5342
5343 /*
5344  * task_info_from_user
5345  *
5346  * When calling task_info from user space,
5347  * this function will be executed as mig server side
5348  * instead of calling directly into task_info.
5349  * This gives the possibility to perform more security
5350  * checks on task_port.
5351  *
5352  * In the case of TASK_DYLD_INFO, we require the more
5353  * privileged task_port not the less-privileged task_name_port.
5354  *
5355  */
5356 kern_return_t
5357 task_info_from_user(
5358         mach_port_t             task_port,
5359         task_flavor_t           flavor,
5360         task_info_t             task_info_out,
5361         mach_msg_type_number_t  *task_info_count)
5362 {
5363         task_t task;
5364         kern_return_t ret;
5365
5366         if (flavor == TASK_DYLD_INFO) {
5367                 task = convert_port_to_task(task_port);
5368         } else {
5369                 task = convert_port_to_task_name(task_port);
5370         }
5371
5372         ret = task_info(task, flavor, task_info_out, task_info_count);
5373
5374         task_deallocate(task);
5375
5376         return ret;
5377 }
5378
5379 /*
5380  *      task_power_info
5381  *
5382  *      Returns power stats for the task.
5383  *      Note: Called with task locked.
5384  */
5385 void
5386 task_power_info_locked(
5387         task_t                  task,
5388         task_power_info_t       info,
5389         gpu_energy_data_t       ginfo,
5390         task_power_info_v2_t    infov2,
5391         uint64_t                *runnable_time)
5392 {
5393         thread_t                thread;
5394         ledger_amount_t         tmp;
5395
5396         uint64_t                runnable_time_sum = 0;
5397
5398         task_lock_assert_owned(task);
5399
5400         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
5401             (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
5402         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
5403             (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
5404
5405         info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
5406         info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
5407
5408         info->total_user = task->total_user_time;
5409         info->total_system = task->total_system_time;
5410         runnable_time_sum = task->total_runnable_time;
5411
5412 #if CONFIG_EMBEDDED
5413         if (infov2) {
5414                 infov2->task_energy = task->task_energy;
5415         }
5416 #endif
5417
5418         if (ginfo) {
5419                 ginfo->task_gpu_utilisation = task->task_gpu_ns;
5420         }
5421
5422         if (infov2) {
5423                 infov2->task_ptime = task->total_ptime;
5424                 infov2->task_pset_switches = task->ps_switch;
5425         }
5426
5427         queue_iterate(&task->threads, thread, thread_t, task_threads) {
5428                 uint64_t        tval;
5429                 spl_t           x;
5430
5431                 if (thread->options & TH_OPT_IDLE_THREAD) {
5432                         continue;
5433                 }
5434
5435                 x = splsched();
5436                 thread_lock(thread);
5437
5438                 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
5439                 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
5440
5441 #if CONFIG_EMBEDDED
5442                 if (infov2) {
5443                         infov2->task_energy += ml_energy_stat(thread);
5444                 }
5445 #endif
5446
5447                 tval = timer_grab(&thread->user_timer);
5448                 info->total_user += tval;
5449
5450                 if (infov2) {
5451                         tval = timer_grab(&thread->ptime);
5452                         infov2->task_ptime += tval;
5453                         infov2->task_pset_switches += thread->ps_switch;
5454                 }
5455
5456                 tval = timer_grab(&thread->system_timer);
5457                 if (thread->precise_user_kernel_time) {
5458                         info->total_system += tval;
5459                 } else {
5460                         /* system_timer may represent either sys or user */
5461                         info->total_user += tval;
5462                 }
5463
5464                 tval = timer_grab(&thread->runnable_timer);
5465
5466                 runnable_time_sum += tval;
5467
5468                 if (ginfo) {
5469                         ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
5470                 }
5471                 thread_unlock(thread);
5472                 splx(x);
5473         }
5474
5475         if (runnable_time) {
5476                 *runnable_time = runnable_time_sum;
5477         }
5478 }
5479
5480 /*
5481  *      task_gpu_utilisation
5482  *
5483  *      Returns the total gpu time used by the all the threads of the task
5484  *  (both dead and alive)
5485  */
5486 uint64_t
5487 task_gpu_utilisation(
5488         task_t  task)
5489 {
5490         uint64_t gpu_time = 0;
5491 #if !CONFIG_EMBEDDED
5492         thread_t thread;
5493
5494         task_lock(task);
5495         gpu_time += task->task_gpu_ns;
5496
5497         queue_iterate(&task->threads, thread, thread_t, task_threads) {
5498                 spl_t x;
5499                 x = splsched();
5500                 thread_lock(thread);
5501                 gpu_time += ml_gpu_stat(thread);
5502                 thread_unlock(thread);
5503                 splx(x);
5504         }
5505
5506         task_unlock(task);
5507 #else /* CONFIG_EMBEDDED */
5508         /* silence compiler warning */
5509         (void)task;
5510 #endif /* !CONFIG_EMBEDDED */
5511         return gpu_time;
5512 }
5513
5514 /*
5515  *      task_energy
5516  *
5517  *      Returns the total energy used by the all the threads of the task
5518  *  (both dead and alive)
5519  */
5520 uint64_t
5521 task_energy(
5522         task_t  task)
5523 {
5524         uint64_t energy = 0;
5525         thread_t thread;
5526
5527         task_lock(task);
5528         energy += task->task_energy;
5529
5530         queue_iterate(&task->threads, thread, thread_t, task_threads) {
5531                 spl_t x;
5532                 x = splsched();
5533                 thread_lock(thread);
5534                 energy += ml_energy_stat(thread);
5535                 thread_unlock(thread);
5536                 splx(x);
5537         }
5538
5539         task_unlock(task);
5540         return energy;
5541 }
5542
5543
5544 uint64_t
5545 task_cpu_ptime(
5546         __unused task_t  task)
5547 {
5548         return 0;
5549 }
5550
5551
5552 /* This function updates the cpu time in the arrays for each
5553  * effective and requested QoS class
5554  */
5555 void
5556 task_update_cpu_time_qos_stats(
5557         task_t  task,
5558         uint64_t *eqos_stats,
5559         uint64_t *rqos_stats)
5560 {
5561         if (!eqos_stats && !rqos_stats) {
5562                 return;
5563         }
5564
5565         task_lock(task);
5566         thread_t thread;
5567         queue_iterate(&task->threads, thread, thread_t, task_threads) {
5568                 if (thread->options & TH_OPT_IDLE_THREAD) {
5569                         continue;
5570                 }
5571
5572                 thread_update_qos_cpu_time(thread);
5573         }
5574
5575         if (eqos_stats) {
5576                 eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
5577                 eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
5578                 eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
5579                 eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
5580                 eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
5581                 eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
5582                 eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
5583         }
5584
5585         if (rqos_stats) {
5586                 rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
5587                 rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
5588                 rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
5589                 rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
5590                 rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
5591                 rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
5592                 rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
5593         }
5594
5595         task_unlock(task);
5596 }
5597
5598 kern_return_t
5599 task_purgable_info(
5600         task_t                  task,
5601         task_purgable_info_t    *stats)
5602 {
5603         if (task == TASK_NULL || stats == NULL) {
5604                 return KERN_INVALID_ARGUMENT;
5605         }
5606         /* Take task reference */
5607         task_reference(task);
5608         vm_purgeable_stats((vm_purgeable_info_t)stats, task);
5609         /* Drop task reference */
5610         task_deallocate(task);
5611         return KERN_SUCCESS;
5612 }
5613
5614 void
5615 task_vtimer_set(
5616         task_t          task,
5617         integer_t       which)
5618 {
5619         thread_t        thread;
5620         spl_t           x;
5621
5622         task_lock(task);
5623
5624         task->vtimers |= which;
5625
5626         switch (which) {
5627         case TASK_VTIMER_USER:
5628                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5629                         x = splsched();
5630                         thread_lock(thread);
5631                         if (thread->precise_user_kernel_time) {
5632                                 thread->vtimer_user_save = timer_grab(&thread->user_timer);
5633                         } else {
5634                                 thread->vtimer_user_save = timer_grab(&thread->system_timer);
5635                         }
5636                         thread_unlock(thread);
5637                         splx(x);
5638                 }
5639                 break;
5640
5641         case TASK_VTIMER_PROF:
5642                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5643                         x = splsched();
5644                         thread_lock(thread);
5645                         thread->vtimer_prof_save = timer_grab(&thread->user_timer);
5646                         thread->vtimer_prof_save += timer_grab(&thread->system_timer);
5647                         thread_unlock(thread);
5648                         splx(x);
5649                 }
5650                 break;
5651
5652         case TASK_VTIMER_RLIM:
5653                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5654                         x = splsched();
5655                         thread_lock(thread);
5656                         thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
5657                         thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
5658                         thread_unlock(thread);
5659                         splx(x);
5660                 }
5661                 break;
5662         }
5663
5664         task_unlock(task);
5665 }
5666
5667 void
5668 task_vtimer_clear(
5669         task_t          task,
5670         integer_t       which)
5671 {
5672         assert(task == current_task());
5673
5674         task_lock(task);
5675
5676         task->vtimers &= ~which;
5677
5678         task_unlock(task);
5679 }
5680
5681 void
5682 task_vtimer_update(
5683         __unused
5684         task_t          task,
5685         integer_t       which,
5686         uint32_t        *microsecs)
5687 {
5688         thread_t        thread = current_thread();
5689         uint32_t        tdelt = 0;
5690         clock_sec_t     secs = 0;
5691         uint64_t        tsum;
5692
5693         assert(task == current_task());
5694
5695         spl_t s = splsched();
5696         thread_lock(thread);
5697
5698         if ((task->vtimers & which) != (uint32_t)which) {
5699                 thread_unlock(thread);
5700                 splx(s);
5701                 return;
5702         }
5703
5704         switch (which) {
5705         case TASK_VTIMER_USER:
5706                 if (thread->precise_user_kernel_time) {
5707                         tdelt = (uint32_t)timer_delta(&thread->user_timer,
5708                             &thread->vtimer_user_save);
5709                 } else {
5710                         tdelt = (uint32_t)timer_delta(&thread->system_timer,
5711                             &thread->vtimer_user_save);
5712                 }
5713                 absolutetime_to_microtime(tdelt, &secs, microsecs);
5714                 break;
5715
5716         case TASK_VTIMER_PROF:
5717                 tsum = timer_grab(&thread->user_timer);
5718                 tsum += timer_grab(&thread->system_timer);
5719                 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
5720                 absolutetime_to_microtime(tdelt, &secs, microsecs);
5721                 /* if the time delta is smaller than a usec, ignore */
5722                 if (*microsecs != 0) {
5723                         thread->vtimer_prof_save = tsum;
5724                 }
5725                 break;
5726
5727         case TASK_VTIMER_RLIM:
5728                 tsum = timer_grab(&thread->user_timer);
5729                 tsum += timer_grab(&thread->system_timer);
5730                 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
5731                 thread->vtimer_rlim_save = tsum;
5732                 absolutetime_to_microtime(tdelt, &secs, microsecs);
5733                 break;
5734         }
5735
5736         thread_unlock(thread);
5737         splx(s);
5738 }
5739
5740 /*
5741  *      task_assign:
5742  *
5743  *      Change the assigned processor set for the task
5744  */
5745 kern_return_t
5746 task_assign(
5747         __unused task_t         task,
5748         __unused processor_set_t        new_pset,
5749         __unused boolean_t      assign_threads)
5750 {
5751         return KERN_FAILURE;
5752 }
5753
5754 /*
5755  *      task_assign_default:
5756  *
5757  *      Version of task_assign to assign to default processor set.
5758  */
5759 kern_return_t
5760 task_assign_default(
5761         task_t          task,
5762         boolean_t       assign_threads)
5763 {
5764         return task_assign(task, &pset0, assign_threads);
5765 }
5766
5767 /*
5768  *      task_get_assignment
5769  *
5770  *      Return name of processor set that task is assigned to.
5771  */
5772 kern_return_t
5773 task_get_assignment(
5774         task_t          task,
5775         processor_set_t *pset)
5776 {
5777         if (!task || !task->active) {
5778                 return KERN_FAILURE;
5779         }
5780
5781         *pset = &pset0;
5782
5783         return KERN_SUCCESS;
5784 }
5785
5786 uint64_t
5787 get_task_dispatchqueue_offset(
5788         task_t          task)
5789 {
5790         return task->dispatchqueue_offset;
5791 }
5792
5793 /*
5794  *      task_policy
5795  *
5796  *      Set scheduling policy and parameters, both base and limit, for
5797  *      the given task. Policy must be a policy which is enabled for the
5798  *      processor set. Change contained threads if requested.
5799  */
5800 kern_return_t
5801 task_policy(
5802         __unused task_t                 task,
5803         __unused policy_t                       policy_id,
5804         __unused policy_base_t          base,
5805         __unused mach_msg_type_number_t count,
5806         __unused boolean_t                      set_limit,
5807         __unused boolean_t                      change)
5808 {
5809         return KERN_FAILURE;
5810 }
5811
5812 /*
5813  *      task_set_policy
5814  *
5815  *      Set scheduling policy and parameters, both base and limit, for
5816  *      the given task. Policy can be any policy implemented by the
5817  *      processor set, whether enabled or not. Change contained threads
5818  *      if requested.
5819  */
5820 kern_return_t
5821 task_set_policy(
5822         __unused task_t                 task,
5823         __unused processor_set_t                pset,
5824         __unused policy_t                       policy_id,
5825         __unused policy_base_t          base,
5826         __unused mach_msg_type_number_t base_count,
5827         __unused policy_limit_t         limit,
5828         __unused mach_msg_type_number_t limit_count,
5829         __unused boolean_t                      change)
5830 {
5831         return KERN_FAILURE;
5832 }
5833
5834 kern_return_t
5835 task_set_ras_pc(
5836         __unused task_t task,
5837         __unused vm_offset_t    pc,
5838         __unused vm_offset_t    endpc)
5839 {
5840         return KERN_FAILURE;
5841 }
5842
5843 void
5844 task_synchronizer_destroy_all(task_t task)
5845 {
5846         /*
5847          *  Destroy owned semaphores
5848          */
5849         semaphore_destroy_all(task);
5850 }
5851
5852 /*
5853  * Install default (machine-dependent) initial thread state
5854  * on the task.  Subsequent thread creation will have this initial
5855  * state set on the thread by machine_thread_inherit_taskwide().
5856  * Flavors and structures are exactly the same as those to thread_set_state()
5857  */
5858 kern_return_t
5859 task_set_state(
5860         task_t task,
5861         int flavor,
5862         thread_state_t state,
5863         mach_msg_type_number_t state_count)
5864 {
5865         kern_return_t ret;
5866
5867         if (task == TASK_NULL) {
5868                 return KERN_INVALID_ARGUMENT;
5869         }
5870
5871         task_lock(task);
5872
5873         if (!task->active) {
5874                 task_unlock(task);
5875                 return KERN_FAILURE;
5876         }
5877
5878         ret = machine_task_set_state(task, flavor, state, state_count);
5879
5880         task_unlock(task);
5881         return ret;
5882 }
5883
5884 /*
5885  * Examine the default (machine-dependent) initial thread state
5886  * on the task, as set by task_set_state().  Flavors and structures
5887  * are exactly the same as those passed to thread_get_state().
5888  */
5889 kern_return_t
5890 task_get_state(
5891         task_t  task,
5892         int     flavor,
5893         thread_state_t state,
5894         mach_msg_type_number_t *state_count)
5895 {
5896         kern_return_t ret;
5897
5898         if (task == TASK_NULL) {
5899                 return KERN_INVALID_ARGUMENT;
5900         }
5901
5902         task_lock(task);
5903
5904         if (!task->active) {
5905                 task_unlock(task);
5906                 return KERN_FAILURE;
5907         }
5908
5909         ret = machine_task_get_state(task, flavor, state, state_count);
5910
5911         task_unlock(task);
5912         return ret;
5913 }
5914
5915
5916 static kern_return_t __attribute__((noinline, not_tail_called))
5917 PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(
5918         mach_exception_code_t code,
5919         mach_exception_subcode_t subcode,
5920         void *reason)
5921 {
5922 #ifdef MACH_BSD
5923         if (1 == proc_selfpid()) {
5924                 return KERN_NOT_SUPPORTED;              // initproc is immune
5925         }
5926 #endif
5927         mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
5928                 [0] = code,
5929                 [1] = subcode,
5930         };
5931         task_t task = current_task();
5932         kern_return_t kr;
5933
5934         /* (See jetsam-related comments below) */
5935
5936         proc_memstat_terminated(task->bsd_info, TRUE);
5937         kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason);
5938         proc_memstat_terminated(task->bsd_info, FALSE);
5939         return kr;
5940 }
5941
5942 kern_return_t
5943 task_violated_guard(
5944         mach_exception_code_t code,
5945         mach_exception_subcode_t subcode,
5946         void *reason)
5947 {
5948         return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code, subcode, reason);
5949 }
5950
5951
5952 #if CONFIG_MEMORYSTATUS
5953
5954 boolean_t
5955 task_get_memlimit_is_active(task_t task)
5956 {
5957         assert(task != NULL);
5958
5959         if (task->memlimit_is_active == 1) {
5960                 return TRUE;
5961         } else {
5962                 return FALSE;
5963         }
5964 }
5965
5966 void
5967 task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
5968 {
5969         assert(task != NULL);
5970
5971         if (memlimit_is_active) {
5972                 task->memlimit_is_active = 1;
5973         } else {
5974                 task->memlimit_is_active = 0;
5975         }
5976 }
5977
5978 boolean_t
5979 task_get_memlimit_is_fatal(task_t task)
5980 {
5981         assert(task != NULL);
5982
5983         if (task->memlimit_is_fatal == 1) {
5984                 return TRUE;
5985         } else {
5986                 return FALSE;
5987         }
5988 }
5989
5990 void
5991 task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
5992 {
5993         assert(task != NULL);
5994
5995         if (memlimit_is_fatal) {
5996                 task->memlimit_is_fatal = 1;
5997         } else {
5998                 task->memlimit_is_fatal = 0;
5999         }
6000 }
6001
6002 boolean_t
6003 task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6004 {
6005         boolean_t triggered = FALSE;
6006
6007         assert(task == current_task());
6008
6009         /*
6010          * Returns true, if task has already triggered an exc_resource exception.
6011          */
6012
6013         if (memlimit_is_active) {
6014                 triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
6015         } else {
6016                 triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
6017         }
6018
6019         return triggered;
6020 }
6021
6022 void
6023 task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6024 {
6025         assert(task == current_task());
6026
6027         /*
6028          * We allow one exc_resource per process per active/inactive limit.
6029          * The limit's fatal attribute does not come into play.
6030          */
6031
6032         if (memlimit_is_active) {
6033                 task->memlimit_active_exc_resource = 1;
6034         } else {
6035                 task->memlimit_inactive_exc_resource = 1;
6036         }
6037 }
6038
6039 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
6040
6041 void __attribute__((noinline))
6042 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
6043 {
6044         task_t                                          task            = current_task();
6045         int                                                     pid         = 0;
6046         const char                                      *procname       = "unknown";
6047         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
6048         boolean_t send_sync_exc_resource = FALSE;
6049
6050 #ifdef MACH_BSD
6051         pid = proc_selfpid();
6052
6053         if (pid == 1) {
6054                 /*
6055                  * Cannot have ReportCrash analyzing
6056                  * a suspended initproc.
6057                  */
6058                 return;
6059         }
6060
6061         if (task->bsd_info != NULL) {
6062                 procname = proc_name_address(current_task()->bsd_info);
6063                 send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(current_task()->bsd_info);
6064         }
6065 #endif
6066 #if CONFIG_COREDUMP
6067         if (hwm_user_cores) {
6068                 int                             error;
6069                 uint64_t                starttime, end;
6070                 clock_sec_t             secs = 0;
6071                 uint32_t                microsecs = 0;
6072
6073                 starttime = mach_absolute_time();
6074                 /*
6075                  * Trigger a coredump of this process. Don't proceed unless we know we won't
6076                  * be filling up the disk; and ignore the core size resource limit for this
6077                  * core file.
6078                  */
6079                 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
6080                         printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
6081                 }
6082                 /*
6083                  * coredump() leaves the task suspended.
6084                  */
6085                 task_resume_internal(current_task());
6086
6087                 end = mach_absolute_time();
6088                 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
6089                 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
6090                     proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
6091         }
6092 #endif /* CONFIG_COREDUMP */
6093
6094         if (disable_exc_resource) {
6095                 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6096                     "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
6097                 return;
6098         }
6099
6100         /*
6101          * A task that has triggered an EXC_RESOURCE, should not be
6102          * jetsammed when the device is under memory pressure.  Here
6103          * we set the P_MEMSTAT_TERMINATED flag so that the process
6104          * will be skipped if the memorystatus_thread wakes up.
6105          */
6106         proc_memstat_terminated(current_task()->bsd_info, TRUE);
6107
6108         code[0] = code[1] = 0;
6109         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
6110         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
6111         EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
6112
6113         /*
6114          * Do not generate a corpse fork if the violation is a fatal one
6115          * or the process wants synchronous EXC_RESOURCE exceptions.
6116          */
6117         if (is_fatal || send_sync_exc_resource || exc_via_corpse_forking == 0) {
6118                 /* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */
6119                 if (send_sync_exc_resource || corpse_for_fatal_memkill == 0) {
6120                         /*
6121                          * Use the _internal_ variant so that no user-space
6122                          * process can resume our task from under us.
6123                          */
6124                         task_suspend_internal(task);
6125                         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6126                         task_resume_internal(task);
6127                 }
6128         } else {
6129                 if (audio_active) {
6130                         printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6131                             "supressed due to audio playback.\n", procname, pid, max_footprint_mb);
6132                 } else {
6133                         task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
6134                             code, EXCEPTION_CODE_MAX, NULL);
6135                 }
6136         }
6137
6138         /*
6139          * After the EXC_RESOURCE has been handled, we must clear the
6140          * P_MEMSTAT_TERMINATED flag so that the process can again be
6141          * considered for jetsam if the memorystatus_thread wakes up.
6142          */
6143         proc_memstat_terminated(current_task()->bsd_info, FALSE);  /* clear the flag */
6144 }
6145
6146 /*
6147  * Callback invoked when a task exceeds its physical footprint limit.
6148  */
6149 void
6150 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
6151 {
6152         ledger_amount_t max_footprint, max_footprint_mb;
6153         task_t task;
6154         boolean_t is_warning;
6155         boolean_t memlimit_is_active;
6156         boolean_t memlimit_is_fatal;
6157
6158         if (warning == LEDGER_WARNING_DIPPED_BELOW) {
6159                 /*
6160                  * Task memory limits only provide a warning on the way up.
6161                  */
6162                 return;
6163         } else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
6164                 /*
6165                  * This task is in danger of violating a memory limit,
6166                  * It has exceeded a percentage level of the limit.
6167                  */
6168                 is_warning = TRUE;
6169         } else {
6170                 /*
6171                  * The task has exceeded the physical footprint limit.
6172                  * This is not a warning but a true limit violation.
6173                  */
6174                 is_warning = FALSE;
6175         }
6176
6177         task = current_task();
6178
6179         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
6180         max_footprint_mb = max_footprint >> 20;
6181
6182         memlimit_is_active = task_get_memlimit_is_active(task);
6183         memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6184
6185         /*
6186          * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
6187          * We only generate the exception once per process per memlimit (active/inactive limit).
6188          * To enforce this, we monitor state based on the  memlimit's active/inactive attribute
6189          * and we disable it by marking that memlimit as exception triggered.
6190          */
6191         if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
6192                 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
6193                 memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
6194                 task_mark_has_triggered_exc_resource(task, memlimit_is_active);
6195         }
6196
6197         memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
6198 }
6199
6200 extern int proc_check_footprint_priv(void);
6201
6202 kern_return_t
6203 task_set_phys_footprint_limit(
6204         task_t task,
6205         int new_limit_mb,
6206         int *old_limit_mb)
6207 {
6208         kern_return_t error;
6209
6210         boolean_t memlimit_is_active;
6211         boolean_t memlimit_is_fatal;
6212
6213         if ((error = proc_check_footprint_priv())) {
6214                 return KERN_NO_ACCESS;
6215         }
6216
6217         /*
6218          * This call should probably be obsoleted.
6219          * But for now, we default to current state.
6220          */
6221         memlimit_is_active = task_get_memlimit_is_active(task);
6222         memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6223
6224         return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
6225 }
6226
6227 kern_return_t
6228 task_convert_phys_footprint_limit(
6229         int limit_mb,
6230         int *converted_limit_mb)
6231 {
6232         if (limit_mb == -1) {
6233                 /*
6234                  * No limit
6235                  */
6236                 if (max_task_footprint != 0) {
6237                         *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024);   /* bytes to MB */
6238                 } else {
6239                         *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
6240                 }
6241         } else {
6242                 /* nothing to convert */
6243                 *converted_limit_mb = limit_mb;
6244         }
6245         return KERN_SUCCESS;
6246 }
6247
6248
6249 kern_return_t
6250 task_set_phys_footprint_limit_internal(
6251         task_t task,
6252         int new_limit_mb,
6253         int *old_limit_mb,
6254         boolean_t memlimit_is_active,
6255         boolean_t memlimit_is_fatal)
6256 {
6257         ledger_amount_t old;
6258         kern_return_t ret;
6259
6260         ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
6261
6262         if (ret != KERN_SUCCESS) {
6263                 return ret;
6264         }
6265
6266         /*
6267          * Check that limit >> 20 will not give an "unexpected" 32-bit
6268          * result. There are, however, implicit assumptions that -1 mb limit
6269          * equates to LEDGER_LIMIT_INFINITY.
6270          */
6271         assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
6272
6273         if (old_limit_mb) {
6274                 *old_limit_mb = (int)(old >> 20);
6275         }
6276
6277         if (new_limit_mb == -1) {
6278                 /*
6279                  * Caller wishes to remove the limit.
6280                  */
6281                 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
6282                     max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
6283                     max_task_footprint ? max_task_footprint_warning_level : 0);
6284
6285                 task_lock(task);
6286                 task_set_memlimit_is_active(task, memlimit_is_active);
6287                 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
6288                 task_unlock(task);
6289
6290                 return KERN_SUCCESS;
6291         }
6292
6293 #ifdef CONFIG_NOMONITORS
6294         return KERN_SUCCESS;
6295 #endif /* CONFIG_NOMONITORS */
6296
6297         task_lock(task);
6298
6299         if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
6300             (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
6301             (((ledger_amount_t)new_limit_mb << 20) == old)) {
6302                 /*
6303                  * memlimit state is not changing
6304                  */
6305                 task_unlock(task);
6306                 return KERN_SUCCESS;
6307         }
6308
6309         task_set_memlimit_is_active(task, memlimit_is_active);
6310         task_set_memlimit_is_fatal(task, memlimit_is_fatal);
6311
6312         ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
6313             (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
6314
6315         if (task == current_task()) {
6316                 ledger_check_new_balance(current_thread(), task->ledger,
6317                     task_ledgers.phys_footprint);
6318         }
6319
6320         task_unlock(task);
6321
6322         return KERN_SUCCESS;
6323 }
6324
6325 kern_return_t
6326 task_get_phys_footprint_limit(
6327         task_t task,
6328         int *limit_mb)
6329 {
6330         ledger_amount_t limit;
6331         kern_return_t ret;
6332
6333         ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
6334         if (ret != KERN_SUCCESS) {
6335                 return ret;
6336         }
6337
6338         /*
6339          * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
6340          * result. There are, however, implicit assumptions that -1 mb limit
6341          * equates to LEDGER_LIMIT_INFINITY.
6342          */
6343         assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
6344         *limit_mb = (int)(limit >> 20);
6345
6346         return KERN_SUCCESS;
6347 }
6348 #else /* CONFIG_MEMORYSTATUS */
6349 kern_return_t
6350 task_set_phys_footprint_limit(
6351         __unused task_t task,
6352         __unused int new_limit_mb,
6353         __unused int *old_limit_mb)
6354 {
6355         return KERN_FAILURE;
6356 }
6357
6358 kern_return_t
6359 task_get_phys_footprint_limit(
6360         __unused task_t task,
6361         __unused int *limit_mb)
6362 {
6363         return KERN_FAILURE;
6364 }
6365 #endif /* CONFIG_MEMORYSTATUS */
6366
6367 void
6368 task_set_thread_limit(task_t task, uint16_t thread_limit)
6369 {
6370         assert(task != kernel_task);
6371         if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
6372                 task_lock(task);
6373                 task->task_thread_limit = thread_limit;
6374                 task_unlock(task);
6375         }
6376 }
6377
6378 /*
6379  * We need to export some functions to other components that
6380  * are currently implemented in macros within the osfmk
6381  * component.  Just export them as functions of the same name.
6382  */
6383 boolean_t
6384 is_kerneltask(task_t t)
6385 {
6386         if (t == kernel_task) {
6387                 return TRUE;
6388         }
6389
6390         return FALSE;
6391 }
6392
6393 boolean_t
6394 is_corpsetask(task_t t)
6395 {
6396         return task_is_a_corpse(t);
6397 }
6398
6399 #undef current_task
6400 task_t current_task(void);
6401 task_t
6402 current_task(void)
6403 {
6404         return current_task_fast();
6405 }
6406
6407 #undef task_reference
6408 void task_reference(task_t task);
6409 void
6410 task_reference(
6411         task_t          task)
6412 {
6413         if (task != TASK_NULL) {
6414                 task_reference_internal(task);
6415         }
6416 }
6417
6418 /* defined in bsd/kern/kern_prot.c */
6419 extern int get_audit_token_pid(audit_token_t *audit_token);
6420
6421 int
6422 task_pid(task_t task)
6423 {
6424         if (task) {
6425                 return get_audit_token_pid(&task->audit_token);
6426         }
6427         return -1;
6428 }
6429
6430
6431 /*
6432  * This routine finds a thread in a task by its unique id
6433  * Returns a referenced thread or THREAD_NULL if the thread was not found
6434  *
6435  * TODO: This is super inefficient - it's an O(threads in task) list walk!
6436  *       We should make a tid hash, or transition all tid clients to thread ports
6437  *
6438  * Precondition: No locks held (will take task lock)
6439  */
6440 thread_t
6441 task_findtid(task_t task, uint64_t tid)
6442 {
6443         thread_t self           = current_thread();
6444         thread_t found_thread   = THREAD_NULL;
6445         thread_t iter_thread    = THREAD_NULL;
6446
6447         /* Short-circuit the lookup if we're looking up ourselves */
6448         if (tid == self->thread_id || tid == TID_NULL) {
6449                 assert(self->task == task);
6450
6451                 thread_reference(self);
6452
6453                 return self;
6454         }
6455
6456         task_lock(task);
6457
6458         queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
6459                 if (iter_thread->thread_id == tid) {
6460                         found_thread = iter_thread;
6461                         thread_reference(found_thread);
6462                         break;
6463                 }
6464         }
6465
6466         task_unlock(task);
6467
6468         return found_thread;
6469 }
6470
6471 int
6472 pid_from_task(task_t task)
6473 {
6474         int pid = -1;
6475
6476         if (task->bsd_info) {
6477                 pid = proc_pid(task->bsd_info);
6478         } else {
6479                 pid = task_pid(task);
6480         }
6481
6482         return pid;
6483 }
6484
6485 /*
6486  * Control the CPU usage monitor for a task.
6487  */
6488 kern_return_t
6489 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
6490 {
6491         int error = KERN_SUCCESS;
6492
6493         if (*flags & CPUMON_MAKE_FATAL) {
6494                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
6495         } else {
6496                 error = KERN_INVALID_ARGUMENT;
6497         }
6498
6499         return error;
6500 }
6501
6502 /*
6503  * Control the wakeups monitor for a task.
6504  */
6505 kern_return_t
6506 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
6507 {
6508         ledger_t ledger = task->ledger;
6509
6510         task_lock(task);
6511         if (*flags & WAKEMON_GET_PARAMS) {
6512                 ledger_amount_t limit;
6513                 uint64_t                period;
6514
6515                 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
6516                 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
6517
6518                 if (limit != LEDGER_LIMIT_INFINITY) {
6519                         /*
6520                          * An active limit means the wakeups monitor is enabled.
6521                          */
6522                         *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
6523                         *flags = WAKEMON_ENABLE;
6524                         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
6525                                 *flags |= WAKEMON_MAKE_FATAL;
6526                         }
6527                 } else {
6528                         *flags = WAKEMON_DISABLE;
6529                         *rate_hz = -1;
6530                 }
6531
6532                 /*
6533                  * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
6534                  */
6535                 task_unlock(task);
6536                 return KERN_SUCCESS;
6537         }
6538
6539         if (*flags & WAKEMON_ENABLE) {
6540                 if (*flags & WAKEMON_SET_DEFAULTS) {
6541                         *rate_hz = task_wakeups_monitor_rate;
6542                 }
6543
6544 #ifndef CONFIG_NOMONITORS
6545                 if (*flags & WAKEMON_MAKE_FATAL) {
6546                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
6547                 }
6548 #endif /* CONFIG_NOMONITORS */
6549
6550                 if (*rate_hz <= 0) {
6551                         task_unlock(task);
6552                         return KERN_INVALID_ARGUMENT;
6553                 }
6554
6555 #ifndef CONFIG_NOMONITORS
6556                 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
6557                     task_wakeups_monitor_ustackshots_trigger_pct);
6558                 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
6559                 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
6560 #endif /* CONFIG_NOMONITORS */
6561         } else if (*flags & WAKEMON_DISABLE) {
6562                 /*
6563                  * Caller wishes to disable wakeups monitor on the task.
6564                  *
6565                  * Disable telemetry if it was triggered by the wakeups monitor, and
6566                  * remove the limit & callback on the wakeups ledger entry.
6567                  */
6568 #if CONFIG_TELEMETRY
6569                 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
6570 #endif
6571                 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
6572                 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
6573         }
6574
6575         task_unlock(task);
6576         return KERN_SUCCESS;
6577 }
6578
6579 void
6580 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
6581 {
6582         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
6583 #if CONFIG_TELEMETRY
6584                 /*
6585                  * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
6586                  * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
6587                  */
6588                 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
6589 #endif
6590                 return;
6591         }
6592
6593 #if CONFIG_TELEMETRY
6594         /*
6595          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
6596          * exceeded the limit, turn telemetry off for the task.
6597          */
6598         telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
6599 #endif
6600
6601         if (warning == 0) {
6602                 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
6603         }
6604 }
6605
6606 void __attribute__((noinline))
6607 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
6608 {
6609         task_t                      task        = current_task();
6610         int                         pid         = 0;
6611         const char                  *procname   = "unknown";
6612         boolean_t                   fatal;
6613         kern_return_t               kr;
6614 #ifdef EXC_RESOURCE_MONITORS
6615         mach_exception_data_type_t  code[EXCEPTION_CODE_MAX];
6616 #endif /* EXC_RESOURCE_MONITORS */
6617         struct ledger_entry_info    lei;
6618
6619 #ifdef MACH_BSD
6620         pid = proc_selfpid();
6621         if (task->bsd_info != NULL) {
6622                 procname = proc_name_address(current_task()->bsd_info);
6623         }
6624 #endif
6625
6626         ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
6627
6628         /*
6629          * Disable the exception notification so we don't overwhelm
6630          * the listener with an endless stream of redundant exceptions.
6631          * TODO: detect whether another thread is already reporting the violation.
6632          */
6633         uint32_t flags = WAKEMON_DISABLE;
6634         task_wakeups_monitor_ctl(task, &flags, NULL);
6635
6636         fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
6637         trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
6638         os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
6639             "over ~%llu seconds, averaging %llu wakes / second and "
6640             "violating a %slimit of %llu wakes over %llu seconds.\n",
6641             procname, pid,
6642             lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
6643             lei.lei_last_refill == 0 ? 0 :
6644             (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
6645             fatal ? "FATAL " : "",
6646             lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
6647
6648         kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
6649             fatal ? kRNFatalLimitFlag : 0);
6650         if (kr) {
6651                 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
6652         }
6653
6654 #ifdef EXC_RESOURCE_MONITORS
6655         if (disable_exc_resource) {
6656                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
6657                     "supressed by a boot-arg\n", procname, pid);
6658                 return;
6659         }
6660         if (audio_active) {
6661                 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
6662                     "supressed due to audio playback\n", procname, pid);
6663                 return;
6664         }
6665         if (lei.lei_last_refill == 0) {
6666                 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
6667                     "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
6668         }
6669
6670         code[0] = code[1] = 0;
6671         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
6672         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
6673         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
6674             NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
6675         EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
6676             lei.lei_last_refill);
6677         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
6678             NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
6679         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6680 #endif /* EXC_RESOURCE_MONITORS */
6681
6682         if (fatal) {
6683                 task_terminate_internal(task);
6684         }
6685 }
6686
6687 static boolean_t
6688 global_update_logical_writes(int64_t io_delta, int64_t *global_write_count)
6689 {
6690         int64_t old_count, new_count;
6691         boolean_t needs_telemetry;
6692
6693         do {
6694                 new_count = old_count = *global_write_count;
6695                 new_count += io_delta;
6696                 if (new_count >= io_telemetry_limit) {
6697                         new_count = 0;
6698                         needs_telemetry = TRUE;
6699                 } else {
6700                         needs_telemetry = FALSE;
6701                 }
6702         } while (!OSCompareAndSwap64(old_count, new_count, global_write_count));
6703         return needs_telemetry;
6704 }
6705
6706 void
6707 task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
6708 {
6709         int64_t io_delta = 0;
6710         int64_t * global_counter_to_update;
6711         boolean_t needs_telemetry = FALSE;
6712         int ledger_to_update = 0;
6713         struct task_writes_counters * writes_counters_to_update;
6714
6715         if ((!task) || (!io_size) || (!vp)) {
6716                 return;
6717         }
6718
6719         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
6720             task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
6721         DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
6722
6723         // Is the drive backing this vnode internal or external to the system?
6724         if (vnode_isonexternalstorage(vp) == false) {
6725                 global_counter_to_update = &global_logical_writes_count;
6726                 ledger_to_update = task_ledgers.logical_writes;
6727                 writes_counters_to_update = &task->task_writes_counters_internal;
6728         } else {
6729                 global_counter_to_update = &global_logical_writes_to_external_count;
6730                 ledger_to_update = task_ledgers.logical_writes_to_external;
6731                 writes_counters_to_update = &task->task_writes_counters_external;
6732         }
6733
6734         switch (flags) {
6735         case TASK_WRITE_IMMEDIATE:
6736                 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_immediate_writes));
6737                 ledger_credit(task->ledger, ledger_to_update, io_size);
6738                 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
6739                 break;
6740         case TASK_WRITE_DEFERRED:
6741                 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_deferred_writes));
6742                 ledger_credit(task->ledger, ledger_to_update, io_size);
6743                 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
6744                 break;
6745         case TASK_WRITE_INVALIDATED:
6746                 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_invalidated_writes));
6747                 ledger_debit(task->ledger, ledger_to_update, io_size);
6748                 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
6749                 break;
6750         case TASK_WRITE_METADATA:
6751                 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_metadata_writes));
6752                 ledger_credit(task->ledger, ledger_to_update, io_size);
6753                 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
6754                 break;
6755         }
6756
6757         io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
6758         if (io_telemetry_limit != 0) {
6759                 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
6760                 needs_telemetry = global_update_logical_writes(io_delta, global_counter_to_update);
6761                 if (needs_telemetry) {
6762                         act_set_io_telemetry_ast(current_thread());
6763                 }
6764         }
6765 }
6766
6767 /*
6768  * Control the I/O monitor for a task.
6769  */
6770 kern_return_t
6771 task_io_monitor_ctl(task_t task, uint32_t *flags)
6772 {
6773         ledger_t ledger = task->ledger;
6774
6775         task_lock(task);
6776         if (*flags & IOMON_ENABLE) {
6777                 /* Configure the physical I/O ledger */
6778                 ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
6779                 ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
6780         } else if (*flags & IOMON_DISABLE) {
6781                 /*
6782                  * Caller wishes to disable I/O monitor on the task.
6783                  */
6784                 ledger_disable_refill(ledger, task_ledgers.physical_writes);
6785                 ledger_disable_callback(ledger, task_ledgers.physical_writes);
6786         }
6787
6788         task_unlock(task);
6789         return KERN_SUCCESS;
6790 }
6791
6792 void
6793 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
6794 {
6795         if (warning == 0) {
6796                 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
6797         }
6798 }
6799
6800 void __attribute__((noinline))
6801 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
6802 {
6803         int                             pid = 0;
6804         task_t                          task = current_task();
6805 #ifdef EXC_RESOURCE_MONITORS
6806         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
6807 #endif /* EXC_RESOURCE_MONITORS */
6808         struct ledger_entry_info        lei;
6809         kern_return_t                   kr;
6810
6811 #ifdef MACH_BSD
6812         pid = proc_selfpid();
6813 #endif
6814         /*
6815          * Get the ledger entry info. We need to do this before disabling the exception
6816          * to get correct values for all fields.
6817          */
6818         switch (flavor) {
6819         case FLAVOR_IO_PHYSICAL_WRITES:
6820                 ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
6821                 break;
6822         }
6823
6824
6825         /*
6826          * Disable the exception notification so we don't overwhelm
6827          * the listener with an endless stream of redundant exceptions.
6828          * TODO: detect whether another thread is already reporting the violation.
6829          */
6830         uint32_t flags = IOMON_DISABLE;
6831         task_io_monitor_ctl(task, &flags);
6832
6833         if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
6834                 trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
6835         }
6836         os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
6837             pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
6838
6839         kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
6840         if (kr) {
6841                 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
6842         }
6843
6844 #ifdef EXC_RESOURCE_MONITORS
6845         code[0] = code[1] = 0;
6846         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
6847         EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
6848         EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
6849         EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
6850         EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
6851         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6852 #endif /* EXC_RESOURCE_MONITORS */
6853 }
6854
6855 /* Placeholders for the task set/get voucher interfaces */
6856 kern_return_t
6857 task_get_mach_voucher(
6858         task_t                  task,
6859         mach_voucher_selector_t __unused which,
6860         ipc_voucher_t           *voucher)
6861 {
6862         if (TASK_NULL == task) {
6863                 return KERN_INVALID_TASK;
6864         }
6865
6866         *voucher = NULL;
6867         return KERN_SUCCESS;
6868 }
6869
6870 kern_return_t
6871 task_set_mach_voucher(
6872         task_t                  task,
6873         ipc_voucher_t           __unused voucher)
6874 {
6875         if (TASK_NULL == task) {
6876                 return KERN_INVALID_TASK;
6877         }
6878
6879         return KERN_SUCCESS;
6880 }
6881
6882 kern_return_t
6883 task_swap_mach_voucher(
6884         __unused task_t         task,
6885         __unused ipc_voucher_t  new_voucher,
6886         ipc_voucher_t          *in_out_old_voucher)
6887 {
6888         /*
6889          * Currently this function is only called from a MIG generated
6890          * routine which doesn't release the reference on the voucher
6891          * addressed by in_out_old_voucher. To avoid leaking this reference,
6892          * a call to release it has been added here.
6893          */
6894         ipc_voucher_release(*in_out_old_voucher);
6895         return KERN_NOT_SUPPORTED;
6896 }
6897
6898 void
6899 task_set_gpu_denied(task_t task, boolean_t denied)
6900 {
6901         task_lock(task);
6902
6903         if (denied) {
6904                 task->t_flags |= TF_GPU_DENIED;
6905         } else {
6906                 task->t_flags &= ~TF_GPU_DENIED;
6907         }
6908
6909         task_unlock(task);
6910 }
6911
6912 boolean_t
6913 task_is_gpu_denied(task_t task)
6914 {
6915         /* We don't need the lock to read this flag */
6916         return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
6917 }
6918
6919
6920 uint64_t
6921 get_task_memory_region_count(task_t task)
6922 {
6923         vm_map_t map;
6924         map = (task == kernel_task) ? kernel_map: task->map;
6925         return (uint64_t)get_map_nentries(map);
6926 }
6927
6928 static void
6929 kdebug_trace_dyld_internal(uint32_t base_code,
6930     struct dyld_kernel_image_info *info)
6931 {
6932         static_assert(sizeof(info->uuid) >= 16);
6933
6934 #if defined(__LP64__)
6935         uint64_t *uuid = (uint64_t *)&(info->uuid);
6936
6937         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6938             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
6939             uuid[1], info->load_addr,
6940             (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
6941             0);
6942         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6943             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
6944             (uint64_t)info->fsobjid.fid_objno |
6945             ((uint64_t)info->fsobjid.fid_generation << 32),
6946             0, 0, 0, 0);
6947 #else /* defined(__LP64__) */
6948         uint32_t *uuid = (uint32_t *)&(info->uuid);
6949
6950         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6951             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
6952             uuid[1], uuid[2], uuid[3], 0);
6953         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6954             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
6955             (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
6956             info->fsobjid.fid_objno, 0);
6957         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6958             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
6959             info->fsobjid.fid_generation, 0, 0, 0, 0);
6960 #endif /* !defined(__LP64__) */
6961 }
6962
6963 static kern_return_t
6964 kdebug_trace_dyld(task_t task, uint32_t base_code,
6965     vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
6966 {
6967         kern_return_t kr;
6968         dyld_kernel_image_info_array_t infos;
6969         vm_map_offset_t map_data;
6970         vm_offset_t data;
6971
6972         if (!infos_copy) {
6973                 return KERN_INVALID_ADDRESS;
6974         }
6975
6976         if (!kdebug_enable ||
6977             !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0))) {
6978                 vm_map_copy_discard(infos_copy);
6979                 return KERN_SUCCESS;
6980         }
6981
6982         if (task == NULL || task != current_task()) {
6983                 return KERN_INVALID_TASK;
6984         }
6985
6986         kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
6987         if (kr != KERN_SUCCESS) {
6988                 return kr;
6989         }
6990
6991         infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
6992
6993         for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
6994                 kdebug_trace_dyld_internal(base_code, &(infos[i]));
6995         }
6996
6997         data = CAST_DOWN(vm_offset_t, map_data);
6998         mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
6999         return KERN_SUCCESS;
7000 }
7001
7002 kern_return_t
7003 task_register_dyld_image_infos(task_t task,
7004     dyld_kernel_image_info_array_t infos_copy,
7005     mach_msg_type_number_t infos_len)
7006 {
7007         return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
7008                    (vm_map_copy_t)infos_copy, infos_len);
7009 }
7010
7011 kern_return_t
7012 task_unregister_dyld_image_infos(task_t task,
7013     dyld_kernel_image_info_array_t infos_copy,
7014     mach_msg_type_number_t infos_len)
7015 {
7016         return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
7017                    (vm_map_copy_t)infos_copy, infos_len);
7018 }
7019
7020 kern_return_t
7021 task_get_dyld_image_infos(__unused task_t task,
7022     __unused dyld_kernel_image_info_array_t * dyld_images,
7023     __unused mach_msg_type_number_t * dyld_imagesCnt)
7024 {
7025         return KERN_NOT_SUPPORTED;
7026 }
7027
7028 kern_return_t
7029 task_register_dyld_shared_cache_image_info(task_t task,
7030     dyld_kernel_image_info_t cache_img,
7031     __unused boolean_t no_cache,
7032     __unused boolean_t private_cache)
7033 {
7034         if (task == NULL || task != current_task()) {
7035                 return KERN_INVALID_TASK;
7036         }
7037
7038         kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
7039         return KERN_SUCCESS;
7040 }
7041
7042 kern_return_t
7043 task_register_dyld_set_dyld_state(__unused task_t task,
7044     __unused uint8_t dyld_state)
7045 {
7046         return KERN_NOT_SUPPORTED;
7047 }
7048
7049 kern_return_t
7050 task_register_dyld_get_process_state(__unused task_t task,
7051     __unused dyld_kernel_process_info_t * dyld_process_state)
7052 {
7053         return KERN_NOT_SUPPORTED;
7054 }
7055
7056 kern_return_t
7057 task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
7058     task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
7059 {
7060 #if MONOTONIC
7061         task_t task = (task_t)task_insp;
7062         kern_return_t kr = KERN_SUCCESS;
7063         mach_msg_type_number_t size;
7064
7065         if (task == TASK_NULL) {
7066                 return KERN_INVALID_ARGUMENT;
7067         }
7068
7069         size = *size_in_out;
7070
7071         switch (flavor) {
7072         case TASK_INSPECT_BASIC_COUNTS: {
7073                 struct task_inspect_basic_counts *bc;
7074                 uint64_t task_counts[MT_CORE_NFIXED] = { 0 };
7075
7076                 if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
7077                         kr = KERN_INVALID_ARGUMENT;
7078                         break;
7079                 }
7080
7081                 mt_fixed_task_counts(task, task_counts);
7082                 bc = (struct task_inspect_basic_counts *)info_out;
7083 #ifdef MT_CORE_INSTRS
7084                 bc->instructions = task_counts[MT_CORE_INSTRS];
7085 #else /* defined(MT_CORE_INSTRS) */
7086                 bc->instructions = 0;
7087 #endif /* !defined(MT_CORE_INSTRS) */
7088                 bc->cycles = task_counts[MT_CORE_CYCLES];
7089                 size = TASK_INSPECT_BASIC_COUNTS_COUNT;
7090                 break;
7091         }
7092         default:
7093                 kr = KERN_INVALID_ARGUMENT;
7094                 break;
7095         }
7096
7097         if (kr == KERN_SUCCESS) {
7098                 *size_in_out = size;
7099         }
7100         return kr;
7101 #else /* MONOTONIC */
7102 #pragma unused(task_insp, flavor, info_out, size_in_out)
7103         return KERN_NOT_SUPPORTED;
7104 #endif /* !MONOTONIC */
7105 }
7106
7107 #if CONFIG_SECLUDED_MEMORY
7108 int num_tasks_can_use_secluded_mem = 0;
7109
7110 void
7111 task_set_can_use_secluded_mem(
7112         task_t          task,
7113         boolean_t       can_use_secluded_mem)
7114 {
7115         if (!task->task_could_use_secluded_mem) {
7116                 return;
7117         }
7118         task_lock(task);
7119         task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
7120         task_unlock(task);
7121 }
7122
7123 void
7124 task_set_can_use_secluded_mem_locked(
7125         task_t          task,
7126         boolean_t       can_use_secluded_mem)
7127 {
7128         assert(task->task_could_use_secluded_mem);
7129         if (can_use_secluded_mem &&
7130             secluded_for_apps && /* global boot-arg */
7131             !task->task_can_use_secluded_mem) {
7132                 assert(num_tasks_can_use_secluded_mem >= 0);
7133                 OSAddAtomic(+1,
7134                     (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
7135                 task->task_can_use_secluded_mem = TRUE;
7136         } else if (!can_use_secluded_mem &&
7137             task->task_can_use_secluded_mem) {
7138                 assert(num_tasks_can_use_secluded_mem > 0);
7139                 OSAddAtomic(-1,
7140                     (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
7141                 task->task_can_use_secluded_mem = FALSE;
7142         }
7143 }
7144
7145 void
7146 task_set_could_use_secluded_mem(
7147         task_t          task,
7148         boolean_t       could_use_secluded_mem)
7149 {
7150         task->task_could_use_secluded_mem = could_use_secluded_mem;
7151 }
7152
7153 void
7154 task_set_could_also_use_secluded_mem(
7155         task_t          task,
7156         boolean_t       could_also_use_secluded_mem)
7157 {
7158         task->task_could_also_use_secluded_mem = could_also_use_secluded_mem;
7159 }
7160
7161 boolean_t
7162 task_can_use_secluded_mem(
7163         task_t          task,
7164         boolean_t       is_alloc)
7165 {
7166         if (task->task_can_use_secluded_mem) {
7167                 assert(task->task_could_use_secluded_mem);
7168                 assert(num_tasks_can_use_secluded_mem > 0);
7169                 return TRUE;
7170         }
7171         if (task->task_could_also_use_secluded_mem &&
7172             num_tasks_can_use_secluded_mem > 0) {
7173                 assert(num_tasks_can_use_secluded_mem > 0);
7174                 return TRUE;
7175         }
7176
7177         /*
7178          * If a single task is using more than some amount of
7179          * memory, allow it to dip into secluded and also begin
7180          * suppression of secluded memory until the tasks exits.
7181          */
7182         if (is_alloc && secluded_shutoff_trigger != 0) {
7183                 uint64_t phys_used = get_task_phys_footprint(task);
7184                 if (phys_used > secluded_shutoff_trigger) {
7185                         start_secluded_suppression(task);
7186                         return TRUE;
7187                 }
7188         }
7189
7190         return FALSE;
7191 }
7192
7193 boolean_t
7194 task_could_use_secluded_mem(
7195         task_t  task)
7196 {
7197         return task->task_could_use_secluded_mem;
7198 }
7199
7200 boolean_t
7201 task_could_also_use_secluded_mem(
7202         task_t  task)
7203 {
7204         return task->task_could_also_use_secluded_mem;
7205 }
7206 #endif /* CONFIG_SECLUDED_MEMORY */
7207
7208 queue_head_t *
7209 task_io_user_clients(task_t task)
7210 {
7211         return &task->io_user_clients;
7212 }
7213
7214 void
7215 task_set_message_app_suspended(task_t task, boolean_t enable)
7216 {
7217         task->message_app_suspended = enable;
7218 }
7219
7220 void
7221 task_copy_fields_for_exec(task_t dst_task, task_t src_task)
7222 {
7223         dst_task->vtimers = src_task->vtimers;
7224 }
7225
7226 #if DEVELOPMENT || DEBUG
7227 int vm_region_footprint = 0;
7228 #endif /* DEVELOPMENT || DEBUG */
7229
7230 boolean_t
7231 task_self_region_footprint(void)
7232 {
7233 #if DEVELOPMENT || DEBUG
7234         if (vm_region_footprint) {
7235                 /* system-wide override */
7236                 return TRUE;
7237         }
7238 #endif /* DEVELOPMENT || DEBUG */
7239         return current_task()->task_region_footprint;
7240 }
7241
7242 void
7243 task_self_region_footprint_set(
7244         boolean_t newval)
7245 {
7246         task_t  curtask;
7247
7248         curtask = current_task();
7249         task_lock(curtask);
7250         if (newval) {
7251                 curtask->task_region_footprint = TRUE;
7252         } else {
7253                 curtask->task_region_footprint = FALSE;
7254         }
7255         task_unlock(curtask);
7256 }
7257
7258 void
7259 task_set_darkwake_mode(task_t task, boolean_t set_mode)
7260 {
7261         assert(task);
7262
7263         task_lock(task);
7264
7265         if (set_mode) {
7266                 task->t_flags |= TF_DARKWAKE_MODE;
7267         } else {
7268                 task->t_flags &= ~(TF_DARKWAKE_MODE);
7269         }
7270
7271         task_unlock(task);
7272 }
7273
7274 boolean_t
7275 task_get_darkwake_mode(task_t task)
7276 {
7277         assert(task);
7278         return (task->t_flags & TF_DARKWAKE_MODE) != 0;
7279 }
7280
7281 kern_return_t
7282 task_get_exc_guard_behavior(
7283         task_t task,
7284         task_exc_guard_behavior_t *behaviorp)
7285 {
7286         if (task == TASK_NULL) {
7287                 return KERN_INVALID_TASK;
7288         }
7289         *behaviorp = task->task_exc_guard;
7290         return KERN_SUCCESS;
7291 }
7292
7293 #ifndef TASK_EXC_GUARD_ALL
7294 /* Temporary define until two branches are merged */
7295 #define TASK_EXC_GUARD_ALL (TASK_EXC_GUARD_VM_ALL | 0xf0)
7296 #endif
7297
7298 kern_return_t
7299 task_set_exc_guard_behavior(
7300         task_t task,
7301         task_exc_guard_behavior_t behavior)
7302 {
7303         if (task == TASK_NULL) {
7304                 return KERN_INVALID_TASK;
7305         }
7306         if (behavior & ~TASK_EXC_GUARD_ALL) {
7307                 return KERN_INVALID_VALUE;
7308         }
7309         task->task_exc_guard = behavior;
7310         return KERN_SUCCESS;
7311 }
7312
7313 #if __arm64__
7314 extern int legacy_footprint_entitlement_mode;
7315 extern void memorystatus_act_on_legacy_footprint_entitlement(proc_t, boolean_t);
7316 extern void memorystatus_act_on_ios13extended_footprint_entitlement(proc_t);
7317
7318 void
7319 task_set_legacy_footprint(
7320         task_t task)
7321 {
7322         task_lock(task);
7323         task->task_legacy_footprint = TRUE;
7324         task_unlock(task);
7325 }
7326
7327 void
7328 task_set_extra_footprint_limit(
7329         task_t task)
7330 {
7331         if (task->task_extra_footprint_limit) {
7332                 return;
7333         }
7334         task_lock(task);
7335         if (task->task_extra_footprint_limit) {
7336                 task_unlock(task);
7337                 return;
7338         }
7339         task->task_extra_footprint_limit = TRUE;
7340         task_unlock(task);
7341         memorystatus_act_on_legacy_footprint_entitlement(task->bsd_info, TRUE);
7342 }
7343
7344 void
7345 task_set_ios13extended_footprint_limit(
7346         task_t task)
7347 {
7348         if (task->task_ios13extended_footprint_limit) {
7349                 return;
7350         }
7351         task_lock(task);
7352         if (task->task_ios13extended_footprint_limit) {
7353                 task_unlock(task);
7354                 return;
7355         }
7356         task->task_ios13extended_footprint_limit = TRUE;
7357         task_unlock(task);
7358         memorystatus_act_on_ios13extended_footprint_entitlement(task->bsd_info);
7359 }
7360 #endif /* __arm64__ */
7361
7362 static inline ledger_amount_t
7363 task_ledger_get_balance(
7364         ledger_t        ledger,
7365         int             ledger_idx)
7366 {
7367         ledger_amount_t amount;
7368         amount = 0;
7369         ledger_get_balance(ledger, ledger_idx, &amount);
7370         return amount;
7371 }
7372
7373 /*
7374  * Gather the amount of memory counted in a task's footprint due to
7375  * being in a specific set of ledgers.
7376  */
7377 void
7378 task_ledgers_footprint(
7379         ledger_t        ledger,
7380         ledger_amount_t *ledger_resident,
7381         ledger_amount_t *ledger_compressed)
7382 {
7383         *ledger_resident = 0;
7384         *ledger_compressed = 0;
7385
7386         /* purgeable non-volatile memory */
7387         *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile);
7388         *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile_compressed);
7389
7390         /* "default" tagged memory */
7391         *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint);
7392         *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint_compressed);
7393
7394         /* "network" currently never counts in the footprint... */
7395
7396         /* "media" tagged memory */
7397         *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.media_footprint);
7398         *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.media_footprint_compressed);
7399
7400         /* "graphics" tagged memory */
7401         *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint);
7402         *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint_compressed);
7403
7404         /* "neural" tagged memory */
7405         *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.neural_footprint);
7406         *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.neural_footprint_compressed);
7407 }
7408
7409 void
7410 task_set_memory_ownership_transfer(
7411         task_t    task,
7412         boolean_t value)
7413 {
7414         task_lock(task);
7415         task->task_can_transfer_memory_ownership = value;
7416         task_unlock(task);
7417 }
7418
7419 void
7420 task_copy_vmobjects(task_t task, vm_object_query_t query, int len, int64_t* num)
7421 {
7422         vm_object_t find_vmo;
7423         int64_t size = 0;
7424
7425         task_objq_lock(task);
7426         if (query != NULL) {
7427                 queue_iterate(&task->task_objq, find_vmo, vm_object_t, task_objq)
7428                 {
7429                         int byte_size;
7430                         vm_object_query_t p = &query[size++];
7431
7432                         p->object_id = (vm_object_id_t) VM_KERNEL_ADDRPERM(find_vmo);
7433                         p->virtual_size = find_vmo->internal ? find_vmo->vo_size : 0;
7434                         p->resident_size = find_vmo->resident_page_count * PAGE_SIZE;
7435                         p->wired_size = find_vmo->wired_page_count * PAGE_SIZE;
7436                         p->reusable_size = find_vmo->reusable_page_count * PAGE_SIZE;
7437                         p->vo_no_footprint = find_vmo->vo_no_footprint;
7438                         p->vo_ledger_tag = find_vmo->vo_ledger_tag;
7439                         p->purgable = find_vmo->purgable;
7440
7441                         if (find_vmo->internal && find_vmo->pager_created && find_vmo->pager != NULL) {
7442                                 p->compressed_size = vm_compressor_pager_get_count(find_vmo->pager) * PAGE_SIZE;
7443                         } else {
7444                                 p->compressed_size = 0;
7445                         }
7446
7447                         /* make sure to not overrun */
7448                         byte_size = (int) size * sizeof(vm_object_query_data_t);
7449                         if ((int)(byte_size + sizeof(vm_object_query_data_t)) > len) {
7450                                 break;
7451                         }
7452                 }
7453         } else {
7454                 size = task->task_owned_objects;
7455         }
7456         task_objq_unlock(task);
7457
7458         *num = size;
7459 }