osfmk/kern/task.c

   1 /*
   2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  *      File:   kern/task.c
  58  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
  59  *              David Black
  60  *
  61  *      Task management primitives implementation.
  62  */
  63 /*
  64  * Copyright (c) 1993 The University of Utah and
  65  * the Computer Systems Laboratory (CSL).  All rights reserved.
  66  *
  67  * Permission to use, copy, modify and distribute this software and its
  68  * documentation is hereby granted, provided that both the copyright
  69  * notice and this permission notice appear in all copies of the
  70  * software, derivative works or modified versions, and any portions
  71  * thereof, and that both notices appear in supporting documentation.
  72  *
  73  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  74  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  75  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  76  *
  77  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  78  * improvements that they make and grant CSL redistribution rights.
  79  *
  80  */
  81 /*
  82  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
  83  * support for mandatory and extensible security protections.  This notice
  84  * is included in support of clause 2.2 (b) of the Apple Public License,
  85  * Version 2.0.
  86  * Copyright (c) 2005 SPARTA, Inc.
  87  */
  88
  89 #include <mach/mach_types.h>
  90 #include <mach/boolean.h>
  91 #include <mach/host_priv.h>
  92 #include <mach/machine/vm_types.h>
  93 #include <mach/vm_param.h>
  94 #include <mach/mach_vm.h>
  95 #include <mach/semaphore.h>
  96 #include <mach/task_info.h>
  97 #include <mach/task_inspect.h>
  98 #include <mach/task_special_ports.h>
  99 #include <mach/sdt.h>
 100
 101 #include <ipc/ipc_importance.h>
 102 #include <ipc/ipc_types.h>
 103 #include <ipc/ipc_space.h>
 104 #include <ipc/ipc_entry.h>
 105 #include <ipc/ipc_hash.h>
 106
 107 #include <kern/kern_types.h>
 108 #include <kern/mach_param.h>
 109 #include <kern/misc_protos.h>
 110 #include <kern/task.h>
 111 #include <kern/thread.h>
 112 #include <kern/coalition.h>
 113 #include <kern/zalloc.h>
 114 #include <kern/kalloc.h>
 115 #include <kern/kern_cdata.h>
 116 #include <kern/processor.h>
 117 #include <kern/sched_prim.h>    /* for thread_wakeup */
 118 #include <kern/ipc_tt.h>
 119 #include <kern/host.h>
 120 #include <kern/clock.h>
 121 #include <kern/timer.h>
 122 #include <kern/assert.h>
 123 #include <kern/sync_lock.h>
 124 #include <kern/affinity.h>
 125 #include <kern/exc_resource.h>
 126 #include <kern/machine.h>
 127 #include <kern/policy_internal.h>
 128 #include <kern/restartable.h>
 129
 130 #include <corpses/task_corpse.h>
 131 #if CONFIG_TELEMETRY
 132 #include <kern/telemetry.h>
 133 #endif
 134
 135 #if MONOTONIC
 136 #include <kern/monotonic.h>
 137 #include <machine/monotonic.h>
 138 #endif /* MONOTONIC */
 139
 140 #include <os/log.h>
 141
 142 #include <vm/pmap.h>
 143 #include <vm/vm_map.h>
 144 #include <vm/vm_kern.h>         /* for kernel_map, ipc_kernel_map */
 145 #include <vm/vm_pageout.h>
 146 #include <vm/vm_protos.h>
 147 #include <vm/vm_purgeable_internal.h>
 148 #include <vm/vm_compressor_pager.h>
 149
 150 #include <sys/resource.h>
 151 #include <sys/signalvar.h> /* for coredump */
 152 #include <sys/bsdtask_info.h>
 153 /*
 154  * Exported interfaces
 155  */
 156
 157 #include <mach/task_server.h>
 158 #include <mach/mach_host_server.h>
 159 #include <mach/host_security_server.h>
 160 #include <mach/mach_port_server.h>
 161
 162 #include <vm/vm_shared_region.h>
 163
 164 #include <libkern/OSDebug.h>
 165 #include <libkern/OSAtomic.h>
 166 #include <libkern/section_keywords.h>
 167
 168 #include <mach-o/loader.h>
 169
 170 #if CONFIG_ATM
 171 #include <atm/atm_internal.h>
 172 #endif
 173
 174 #include <kern/sfi.h>           /* picks up ledger.h */
 175
 176 #if CONFIG_MACF
 177 #include <security/mac_mach_internal.h>
 178 #endif
 179
 180 #if KPERF
 181 extern int kpc_force_all_ctrs(task_t, int);
 182 #endif
 183
 184 task_t                  kernel_task;
 185 zone_t                  task_zone;
 186 lck_attr_t      task_lck_attr;
 187 lck_grp_t       task_lck_grp;
 188 lck_grp_attr_t  task_lck_grp_attr;
 189
 190 extern int exc_via_corpse_forking;
 191 extern int corpse_for_fatal_memkill;
 192 extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
 193
 194 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
 195 int audio_active = 0;
 196
 197 zinfo_usage_store_t tasks_tkm_private;
 198 zinfo_usage_store_t tasks_tkm_shared;
 199
 200 /* A container to accumulate statistics for expired tasks */
 201 expired_task_statistics_t               dead_task_statistics;
 202 lck_spin_t              dead_task_statistics_lock;
 203
 204 ledger_template_t task_ledger_template = NULL;
 205
 206 SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
 207 {.cpu_time = -1,
 208  .tkm_private = -1,
 209  .tkm_shared = -1,
 210  .phys_mem = -1,
 211  .wired_mem = -1,
 212  .internal = -1,
 213  .iokit_mapped = -1,
 214  .alternate_accounting = -1,
 215  .alternate_accounting_compressed = -1,
 216  .page_table = -1,
 217  .phys_footprint = -1,
 218  .internal_compressed = -1,
 219  .purgeable_volatile = -1,
 220  .purgeable_nonvolatile = -1,
 221  .purgeable_volatile_compressed = -1,
 222  .purgeable_nonvolatile_compressed = -1,
 223  .tagged_nofootprint = -1,
 224  .tagged_footprint = -1,
 225  .tagged_nofootprint_compressed = -1,
 226  .tagged_footprint_compressed = -1,
 227  .network_volatile = -1,
 228  .network_nonvolatile = -1,
 229  .network_volatile_compressed = -1,
 230  .network_nonvolatile_compressed = -1,
 231  .media_nofootprint = -1,
 232  .media_footprint = -1,
 233  .media_nofootprint_compressed = -1,
 234  .media_footprint_compressed = -1,
 235  .graphics_nofootprint = -1,
 236  .graphics_footprint = -1,
 237  .graphics_nofootprint_compressed = -1,
 238  .graphics_footprint_compressed = -1,
 239  .neural_nofootprint = -1,
 240  .neural_footprint = -1,
 241  .neural_nofootprint_compressed = -1,
 242  .neural_footprint_compressed = -1,
 243  .platform_idle_wakeups = -1,
 244  .interrupt_wakeups = -1,
 245 #if !CONFIG_EMBEDDED
 246  .sfi_wait_times = { 0 /* initialized at runtime */},
 247 #endif /* !CONFIG_EMBEDDED */
 248  .cpu_time_billed_to_me = -1,
 249  .cpu_time_billed_to_others = -1,
 250  .physical_writes = -1,
 251  .logical_writes = -1,
 252  .logical_writes_to_external = -1,
 253 #if DEBUG || DEVELOPMENT
 254  .pages_grabbed = -1,
 255  .pages_grabbed_kern = -1,
 256  .pages_grabbed_iopl = -1,
 257  .pages_grabbed_upl = -1,
 258 #endif
 259  .energy_billed_to_me = -1,
 260  .energy_billed_to_others = -1};
 261
 262 /* System sleep state */
 263 boolean_t tasks_suspend_state;
 264
 265
 266 void init_task_ledgers(void);
 267 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 268 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 269 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
 270 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
 271 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
 272 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
 273
 274 kern_return_t task_suspend_internal(task_t);
 275 kern_return_t task_resume_internal(task_t);
 276 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
 277
 278 extern kern_return_t iokit_task_terminate(task_t task);
 279 extern void          iokit_task_app_suspended_changed(task_t task);
 280
 281 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
 282 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
 283 extern kern_return_t thread_resume(thread_t thread);
 284
 285 // Warn tasks when they hit 80% of their memory limit.
 286 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
 287
 288 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT              150 /* wakeups per second */
 289 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL   300 /* in seconds. */
 290
 291 /*
 292  * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
 293  *
 294  * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
 295  *  stacktraces, aka micro-stackshots)
 296  */
 297 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER        70
 298
 299 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
 300 int task_wakeups_monitor_rate;     /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
 301
 302 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 303
 304 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
 305
 306 ledger_amount_t max_task_footprint = 0;  /* Per-task limit on physical memory consumption in bytes     */
 307 int max_task_footprint_warning_level = 0;  /* Per-task limit warning percentage */
 308 int max_task_footprint_mb = 0;  /* Per-task limit on physical memory consumption in megabytes */
 309
 310 /* I/O Monitor Limits */
 311 #define IOMON_DEFAULT_LIMIT                     (20480ull)      /* MB of logical/physical I/O */
 312 #define IOMON_DEFAULT_INTERVAL                  (86400ull)      /* in seconds */
 313
 314 uint64_t task_iomon_limit_mb;           /* Per-task I/O monitor limit in MBs */
 315 uint64_t task_iomon_interval_secs;      /* Per-task I/O monitor interval in secs */
 316
 317 #define IO_TELEMETRY_DEFAULT_LIMIT              (10ll * 1024ll * 1024ll)
 318 int64_t io_telemetry_limit;                     /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
 319 int64_t global_logical_writes_count = 0;        /* Global count for logical writes */
 320 int64_t global_logical_writes_to_external_count = 0;        /* Global count for logical writes to external storage*/
 321 static boolean_t global_update_logical_writes(int64_t, int64_t*);
 322
 323 #define TASK_MAX_THREAD_LIMIT 256
 324
 325 #if MACH_ASSERT
 326 int pmap_ledgers_panic = 1;
 327 int pmap_ledgers_panic_leeway = 3;
 328 #endif /* MACH_ASSERT */
 329
 330 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
 331
 332 #if CONFIG_COREDUMP
 333 int hwm_user_cores = 0; /* high watermark violations generate user core files */
 334 #endif
 335
 336 #ifdef MACH_BSD
 337 extern uint32_t proc_platform(struct proc *);
 338 extern uint32_t proc_sdk(struct proc *);
 339 extern void     proc_getexecutableuuid(void *, unsigned char *, unsigned long);
 340 extern int      proc_pid(struct proc *p);
 341 extern int      proc_selfpid(void);
 342 extern struct proc *current_proc(void);
 343 extern char     *proc_name_address(struct proc *p);
 344 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 345 extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, int bufsize);
 346 extern void workq_proc_suspended(struct proc *p);
 347 extern void workq_proc_resumed(struct proc *p);
 348
 349 #if CONFIG_MEMORYSTATUS
 350 extern void     proc_memstat_terminated(struct proc* p, boolean_t set);
 351 extern void     memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
 352 extern void     memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
 353 extern boolean_t memorystatus_allowed_vm_map_fork(task_t task);
 354 extern uint64_t  memorystatus_available_memory_internal(proc_t p);
 355
 356 #if DEVELOPMENT || DEBUG
 357 extern void memorystatus_abort_vm_map_fork(task_t);
 358 #endif
 359
 360 #endif /* CONFIG_MEMORYSTATUS */
 361
 362 #endif /* MACH_BSD */
 363
 364 #if DEVELOPMENT || DEBUG
 365 int exc_resource_threads_enabled;
 366 #endif /* DEVELOPMENT || DEBUG */
 367
 368 #if (DEVELOPMENT || DEBUG)
 369 uint32_t task_exc_guard_default = TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_ONCE | TASK_EXC_GUARD_MP_CORPSE |
 370     TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_ONCE | TASK_EXC_GUARD_VM_CORPSE;
 371 #else
 372 uint32_t task_exc_guard_default = 0;
 373 #endif
 374
 375 /* Forwards */
 376
 377 static void task_hold_locked(task_t task);
 378 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
 379 static void task_release_locked(task_t task);
 380
 381 static void task_synchronizer_destroy_all(task_t task);
 382 static os_ref_count_t
 383 task_add_turnstile_watchports_locked(
 384         task_t                      task,
 385         struct task_watchports      *watchports,
 386         struct task_watchport_elem  **previous_elem_array,
 387         ipc_port_t                  *portwatch_ports,
 388         uint32_t                    portwatch_count);
 389
 390 static os_ref_count_t
 391 task_remove_turnstile_watchports_locked(
 392         task_t                 task,
 393         struct task_watchports *watchports,
 394         ipc_port_t             *port_freelist);
 395
 396 static struct task_watchports *
 397 task_watchports_alloc_init(
 398         task_t        task,
 399         thread_t      thread,
 400         uint32_t      count);
 401
 402 static void
 403 task_watchports_deallocate(
 404         struct task_watchports *watchports);
 405
 406 void
 407 task_set_64bit(
 408         task_t task,
 409         boolean_t is_64bit,
 410         boolean_t is_64bit_data)
 411 {
 412 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
 413         thread_t thread;
 414 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
 415
 416         task_lock(task);
 417
 418         /*
 419          * Switching to/from 64-bit address spaces
 420          */
 421         if (is_64bit) {
 422                 if (!task_has_64Bit_addr(task)) {
 423                         task_set_64Bit_addr(task);
 424                 }
 425         } else {
 426                 if (task_has_64Bit_addr(task)) {
 427                         task_clear_64Bit_addr(task);
 428                 }
 429         }
 430
 431         /*
 432          * Switching to/from 64-bit register state.
 433          */
 434         if (is_64bit_data) {
 435                 if (task_has_64Bit_data(task)) {
 436                         goto out;
 437                 }
 438
 439                 task_set_64Bit_data(task);
 440         } else {
 441                 if (!task_has_64Bit_data(task)) {
 442                         goto out;
 443                 }
 444
 445                 task_clear_64Bit_data(task);
 446         }
 447
 448         /* FIXME: On x86, the thread save state flavor can diverge from the
 449          * task's 64-bit feature flag due to the 32-bit/64-bit register save
 450          * state dichotomy. Since we can be pre-empted in this interval,
 451          * certain routines may observe the thread as being in an inconsistent
 452          * state with respect to its task's 64-bitness.
 453          */
 454
 455 #if defined(__x86_64__) || defined(__arm64__)
 456         queue_iterate(&task->threads, thread, thread_t, task_threads) {
 457                 thread_mtx_lock(thread);
 458                 machine_thread_switch_addrmode(thread);
 459                 thread_mtx_unlock(thread);
 460
 461 #if defined(__arm64__)
 462                 /* specifically, if running on H9 */
 463                 if (thread == current_thread()) {
 464                         uint64_t arg1, arg2;
 465                         int urgency;
 466                         spl_t spl = splsched();
 467                         /*
 468                          * This call tell that the current thread changed it's 32bitness.
 469                          * Other thread were no more on core when 32bitness was changed,
 470                          * but current_thread() is on core and the previous call to
 471                          * machine_thread_going_on_core() gave 32bitness which is now wrong.
 472                          *
 473                          * This is needed for bring-up, a different callback should be used
 474                          * in the future.
 475                          *
 476                          * TODO: Remove this callout when we no longer support 32-bit code on H9
 477                          */
 478                         thread_lock(thread);
 479                         urgency = thread_get_urgency(thread, &arg1, &arg2);
 480                         machine_thread_going_on_core(thread, urgency, 0, 0, mach_approximate_time());
 481                         thread_unlock(thread);
 482                         splx(spl);
 483                 }
 484 #endif /* defined(__arm64__) */
 485         }
 486 #endif /* defined(__x86_64__) || defined(__arm64__) */
 487
 488 out:
 489         task_unlock(task);
 490 }
 491
 492 boolean_t
 493 task_get_64bit_data(task_t task)
 494 {
 495         return task_has_64Bit_data(task);
 496 }
 497
 498 void
 499 task_set_platform_binary(
 500         task_t task,
 501         boolean_t is_platform)
 502 {
 503         task_lock(task);
 504         if (is_platform) {
 505                 task->t_flags |= TF_PLATFORM;
 506                 /* set exc guard default behavior for first-party code */
 507                 task->task_exc_guard = (task_exc_guard_default & TASK_EXC_GUARD_ALL);
 508         } else {
 509                 task->t_flags &= ~(TF_PLATFORM);
 510                 /* set exc guard default behavior for third-party code */
 511                 task->task_exc_guard = ((task_exc_guard_default >> TASK_EXC_GUARD_THIRD_PARTY_DEFAULT_SHIFT) & TASK_EXC_GUARD_ALL);
 512         }
 513         task_unlock(task);
 514 }
 515
 516 /*
 517  * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
 518  * Returns "false" if flag is already set, and "true" in other cases.
 519  */
 520 bool
 521 task_set_ca_client_wi(
 522         task_t task,
 523         boolean_t set_or_clear)
 524 {
 525         bool ret = true;
 526         task_lock(task);
 527         if (set_or_clear) {
 528                 /* Tasks can have only one CA_CLIENT work interval */
 529                 if (task->t_flags & TF_CA_CLIENT_WI) {
 530                         ret = false;
 531                 } else {
 532                         task->t_flags |= TF_CA_CLIENT_WI;
 533                 }
 534         } else {
 535                 task->t_flags &= ~TF_CA_CLIENT_WI;
 536         }
 537         task_unlock(task);
 538         return ret;
 539 }
 540
 541 void
 542 task_set_dyld_info(
 543         task_t task,
 544         mach_vm_address_t addr,
 545         mach_vm_size_t size)
 546 {
 547         task_lock(task);
 548         task->all_image_info_addr = addr;
 549         task->all_image_info_size = size;
 550         task_unlock(task);
 551 }
 552
 553 void
 554 task_set_mach_header_address(
 555         task_t task,
 556         mach_vm_address_t addr)
 557 {
 558         task_lock(task);
 559         task->mach_header_vm_address = addr;
 560         task_unlock(task);
 561 }
 562
 563 void
 564 task_atm_reset(__unused task_t task)
 565 {
 566 #if CONFIG_ATM
 567         if (task->atm_context != NULL) {
 568                 atm_task_descriptor_destroy(task->atm_context);
 569                 task->atm_context = NULL;
 570         }
 571 #endif
 572 }
 573
 574 void
 575 task_bank_reset(__unused task_t task)
 576 {
 577         if (task->bank_context != NULL) {
 578                 bank_task_destroy(task);
 579         }
 580 }
 581
 582 /*
 583  * NOTE: This should only be called when the P_LINTRANSIT
 584  *       flag is set (the proc_trans lock is held) on the
 585  *       proc associated with the task.
 586  */
 587 void
 588 task_bank_init(__unused task_t task)
 589 {
 590         if (task->bank_context != NULL) {
 591                 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
 592         }
 593         bank_task_initialize(task);
 594 }
 595
 596 void
 597 task_set_did_exec_flag(task_t task)
 598 {
 599         task->t_procflags |= TPF_DID_EXEC;
 600 }
 601
 602 void
 603 task_clear_exec_copy_flag(task_t task)
 604 {
 605         task->t_procflags &= ~TPF_EXEC_COPY;
 606 }
 607
 608 event_t
 609 task_get_return_wait_event(task_t task)
 610 {
 611         return (event_t)&task->returnwait_inheritor;
 612 }
 613
 614 void
 615 task_clear_return_wait(task_t task, uint32_t flags)
 616 {
 617         if (flags & TCRW_CLEAR_INITIAL_WAIT) {
 618                 thread_wakeup(task_get_return_wait_event(task));
 619         }
 620
 621         if (flags & TCRW_CLEAR_FINAL_WAIT) {
 622                 is_write_lock(task->itk_space);
 623
 624                 task->t_returnwaitflags &= ~TRW_LRETURNWAIT;
 625                 task->returnwait_inheritor = NULL;
 626
 627                 if (task->t_returnwaitflags & TRW_LRETURNWAITER) {
 628                         struct turnstile *turnstile = turnstile_prepare((uintptr_t) task_get_return_wait_event(task),
 629                             NULL, TURNSTILE_NULL, TURNSTILE_ULOCK);
 630
 631                         waitq_wakeup64_all(&turnstile->ts_waitq,
 632                             CAST_EVENT64_T(task_get_return_wait_event(task)),
 633                             THREAD_AWAKENED, 0);
 634
 635                         turnstile_update_inheritor(turnstile, NULL,
 636                             TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD);
 637                         turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_HELD);
 638
 639                         turnstile_complete((uintptr_t) task_get_return_wait_event(task), NULL, NULL, TURNSTILE_ULOCK);
 640                         turnstile_cleanup();
 641                         task->t_returnwaitflags &= ~TRW_LRETURNWAITER;
 642                 }
 643                 is_write_unlock(task->itk_space);
 644         }
 645 }
 646
 647 void __attribute__((noreturn))
 648 task_wait_to_return(void)
 649 {
 650         task_t task = current_task();
 651
 652         is_write_lock(task->itk_space);
 653
 654         if (task->t_returnwaitflags & TRW_LRETURNWAIT) {
 655                 struct turnstile *turnstile = turnstile_prepare((uintptr_t) task_get_return_wait_event(task),
 656                     NULL, TURNSTILE_NULL, TURNSTILE_ULOCK);
 657
 658                 do {
 659                         task->t_returnwaitflags |= TRW_LRETURNWAITER;
 660                         turnstile_update_inheritor(turnstile, task->returnwait_inheritor,
 661                             (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
 662
 663                         waitq_assert_wait64(&turnstile->ts_waitq,
 664                             CAST_EVENT64_T(task_get_return_wait_event(task)),
 665                             THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
 666
 667                         is_write_unlock(task->itk_space);
 668
 669                         turnstile_update_inheritor_complete(turnstile, TURNSTILE_INTERLOCK_NOT_HELD);
 670
 671                         thread_block(THREAD_CONTINUE_NULL);
 672
 673                         is_write_lock(task->itk_space);
 674                 } while (task->t_returnwaitflags & TRW_LRETURNWAIT);
 675
 676                 turnstile_complete((uintptr_t) task_get_return_wait_event(task), NULL, NULL, TURNSTILE_ULOCK);
 677         }
 678
 679         is_write_unlock(task->itk_space);
 680         turnstile_cleanup();
 681
 682
 683 #if CONFIG_MACF
 684         /*
 685          * Before jumping to userspace and allowing this process to execute any code,
 686          * notify any interested parties.
 687          */
 688         mac_proc_notify_exec_complete(current_proc());
 689 #endif
 690
 691         thread_bootstrap_return();
 692 }
 693
 694 #ifdef CONFIG_32BIT_TELEMETRY
 695 boolean_t
 696 task_consume_32bit_log_flag(task_t task)
 697 {
 698         if ((task->t_procflags & TPF_LOG_32BIT_TELEMETRY) != 0) {
 699                 task->t_procflags &= ~TPF_LOG_32BIT_TELEMETRY;
 700                 return TRUE;
 701         } else {
 702                 return FALSE;
 703         }
 704 }
 705
 706 void
 707 task_set_32bit_log_flag(task_t task)
 708 {
 709         task->t_procflags |= TPF_LOG_32BIT_TELEMETRY;
 710 }
 711 #endif /* CONFIG_32BIT_TELEMETRY */
 712
 713 boolean_t
 714 task_is_exec_copy(task_t task)
 715 {
 716         return task_is_exec_copy_internal(task);
 717 }
 718
 719 boolean_t
 720 task_did_exec(task_t task)
 721 {
 722         return task_did_exec_internal(task);
 723 }
 724
 725 boolean_t
 726 task_is_active(task_t task)
 727 {
 728         return task->active;
 729 }
 730
 731 boolean_t
 732 task_is_halting(task_t task)
 733 {
 734         return task->halting;
 735 }
 736
 737 #if TASK_REFERENCE_LEAK_DEBUG
 738 #include <kern/btlog.h>
 739
 740 static btlog_t *task_ref_btlog;
 741 #define TASK_REF_OP_INCR        0x1
 742 #define TASK_REF_OP_DECR        0x2
 743
 744 #define TASK_REF_NUM_RECORDS    100000
 745 #define TASK_REF_BTDEPTH        7
 746
 747 void
 748 task_reference_internal(task_t task)
 749 {
 750         void *       bt[TASK_REF_BTDEPTH];
 751         int             numsaved = 0;
 752
 753         os_ref_retain(&task->ref_count);
 754
 755         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 756         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
 757             bt, numsaved);
 758 }
 759
 760 os_ref_count_t
 761 task_deallocate_internal(task_t task)
 762 {
 763         void *       bt[TASK_REF_BTDEPTH];
 764         int             numsaved = 0;
 765
 766         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 767         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
 768             bt, numsaved);
 769
 770         return os_ref_release(&task->ref_count);
 771 }
 772
 773 #endif /* TASK_REFERENCE_LEAK_DEBUG */
 774
 775 void
 776 task_init(void)
 777 {
 778         lck_grp_attr_setdefault(&task_lck_grp_attr);
 779         lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
 780         lck_attr_setdefault(&task_lck_attr);
 781         lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
 782         lck_mtx_init(&tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
 783
 784         task_zone = zinit(
 785                 sizeof(struct task),
 786                 task_max * sizeof(struct task),
 787                 TASK_CHUNK * sizeof(struct task),
 788                 "tasks");
 789
 790         zone_change(task_zone, Z_NOENCRYPT, TRUE);
 791
 792 #if CONFIG_EMBEDDED
 793         task_watch_init();
 794 #endif /* CONFIG_EMBEDDED */
 795
 796         /*
 797          * Configure per-task memory limit.
 798          * The boot-arg is interpreted as Megabytes,
 799          * and takes precedence over the device tree.
 800          * Setting the boot-arg to 0 disables task limits.
 801          */
 802         if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
 803             sizeof(max_task_footprint_mb))) {
 804                 /*
 805                  * No limit was found in boot-args, so go look in the device tree.
 806                  */
 807                 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
 808                     sizeof(max_task_footprint_mb))) {
 809                         /*
 810                          * No limit was found in device tree.
 811                          */
 812                         max_task_footprint_mb = 0;
 813                 }
 814         }
 815
 816         if (max_task_footprint_mb != 0) {
 817 #if CONFIG_MEMORYSTATUS
 818                 if (max_task_footprint_mb < 50) {
 819                         printf("Warning: max_task_pmem %d below minimum.\n",
 820                             max_task_footprint_mb);
 821                         max_task_footprint_mb = 50;
 822                 }
 823                 printf("Limiting task physical memory footprint to %d MB\n",
 824                     max_task_footprint_mb);
 825
 826                 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
 827
 828                 /*
 829                  * Configure the per-task memory limit warning level.
 830                  * This is computed as a percentage.
 831                  */
 832                 max_task_footprint_warning_level = 0;
 833
 834                 if (max_mem < 0x40000000) {
 835                         /*
 836                          * On devices with < 1GB of memory:
 837                          *    -- set warnings to 50MB below the per-task limit.
 838                          */
 839                         if (max_task_footprint_mb > 50) {
 840                                 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
 841                         }
 842                 } else {
 843                         /*
 844                          * On devices with >= 1GB of memory:
 845                          *    -- set warnings to 100MB below the per-task limit.
 846                          */
 847                         if (max_task_footprint_mb > 100) {
 848                                 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
 849                         }
 850                 }
 851
 852                 /*
 853                  * Never allow warning level to land below the default.
 854                  */
 855                 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
 856                         max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
 857                 }
 858
 859                 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
 860
 861 #else
 862                 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
 863 #endif /* CONFIG_MEMORYSTATUS */
 864         }
 865
 866 #if DEVELOPMENT || DEBUG
 867         if (!PE_parse_boot_argn("exc_resource_threads",
 868             &exc_resource_threads_enabled,
 869             sizeof(exc_resource_threads_enabled))) {
 870                 exc_resource_threads_enabled = 1;
 871         }
 872         PE_parse_boot_argn("task_exc_guard_default",
 873             &task_exc_guard_default,
 874             sizeof(task_exc_guard_default));
 875 #endif /* DEVELOPMENT || DEBUG */
 876
 877 #if CONFIG_COREDUMP
 878         if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
 879             sizeof(hwm_user_cores))) {
 880                 hwm_user_cores = 0;
 881         }
 882 #endif
 883
 884         proc_init_cpumon_params();
 885
 886         if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof(task_wakeups_monitor_rate))) {
 887                 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
 888         }
 889
 890         if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof(task_wakeups_monitor_interval))) {
 891                 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
 892         }
 893
 894         if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
 895             sizeof(task_wakeups_monitor_ustackshots_trigger_pct))) {
 896                 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
 897         }
 898
 899         if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
 900             sizeof(disable_exc_resource))) {
 901                 disable_exc_resource = 0;
 902         }
 903
 904         if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof(task_iomon_limit_mb))) {
 905                 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
 906         }
 907
 908         if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof(task_iomon_interval_secs))) {
 909                 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
 910         }
 911
 912         if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof(io_telemetry_limit))) {
 913                 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
 914         }
 915
 916 /*
 917  * If we have coalitions, coalition_init() will call init_task_ledgers() as it
 918  * sets up the ledgers for the default coalition. If we don't have coalitions,
 919  * then we have to call it now.
 920  */
 921 #if CONFIG_COALITIONS
 922         assert(task_ledger_template);
 923 #else /* CONFIG_COALITIONS */
 924         init_task_ledgers();
 925 #endif /* CONFIG_COALITIONS */
 926
 927 #if TASK_REFERENCE_LEAK_DEBUG
 928         task_ref_btlog = btlog_create(TASK_REF_NUM_RECORDS, TASK_REF_BTDEPTH, TRUE /* caller_will_remove_entries_for_element? */);
 929         assert(task_ref_btlog);
 930 #endif
 931
 932         /*
 933          * Create the kernel task as the first task.
 934          */
 935 #ifdef __LP64__
 936         if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TRUE, TF_NONE, TPF_NONE, TWF_NONE, &kernel_task) != KERN_SUCCESS)
 937 #else
 938         if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, FALSE, TF_NONE, TPF_NONE, TWF_NONE, &kernel_task) != KERN_SUCCESS)
 939 #endif
 940         { panic("task_init\n");}
 941
 942 #if defined(HAS_APPLE_PAC)
 943         kernel_task->rop_pid = KERNEL_ROP_ID;
 944         // kernel_task never runs at EL0, but machine_thread_state_convert_from/to_user() relies on
 945         // disable_user_jop to be false for kernel threads (e.g. in exception delivery on thread_exception_daemon)
 946         ml_task_set_disable_user_jop(kernel_task, FALSE);
 947 #endif
 948
 949         vm_map_deallocate(kernel_task->map);
 950         kernel_task->map = kernel_map;
 951         lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
 952 }
 953
 954 /*
 955  * Create a task running in the kernel address space.  It may
 956  * have its own map of size mem_size and may have ipc privileges.
 957  */
 958 kern_return_t
 959 kernel_task_create(
 960         __unused task_t         parent_task,
 961         __unused vm_offset_t            map_base,
 962         __unused vm_size_t              map_size,
 963         __unused task_t         *child_task)
 964 {
 965         return KERN_INVALID_ARGUMENT;
 966 }
 967
 968 kern_return_t
 969 task_create(
 970         task_t                          parent_task,
 971         __unused ledger_port_array_t    ledger_ports,
 972         __unused mach_msg_type_number_t num_ledger_ports,
 973         __unused boolean_t              inherit_memory,
 974         __unused task_t                 *child_task)    /* OUT */
 975 {
 976         if (parent_task == TASK_NULL) {
 977                 return KERN_INVALID_ARGUMENT;
 978         }
 979
 980         /*
 981          * No longer supported: too many calls assume that a task has a valid
 982          * process attached.
 983          */
 984         return KERN_FAILURE;
 985 }
 986
 987 kern_return_t
 988 host_security_create_task_token(
 989         host_security_t                 host_security,
 990         task_t                          parent_task,
 991         __unused security_token_t       sec_token,
 992         __unused audit_token_t          audit_token,
 993         __unused host_priv_t            host_priv,
 994         __unused ledger_port_array_t    ledger_ports,
 995         __unused mach_msg_type_number_t num_ledger_ports,
 996         __unused boolean_t              inherit_memory,
 997         __unused task_t                 *child_task)    /* OUT */
 998 {
 999         if (parent_task == TASK_NULL) {
1000                 return KERN_INVALID_ARGUMENT;
1001         }
1002
1003         if (host_security == HOST_NULL) {
1004                 return KERN_INVALID_SECURITY;
1005         }
1006
1007         /*
1008          * No longer supported.
1009          */
1010         return KERN_FAILURE;
1011 }
1012
1013 /*
1014  * Task ledgers
1015  * ------------
1016  *
1017  * phys_footprint
1018  *   Physical footprint: This is the sum of:
1019  *     + (internal - alternate_accounting)
1020  *     + (internal_compressed - alternate_accounting_compressed)
1021  *     + iokit_mapped
1022  *     + purgeable_nonvolatile
1023  *     + purgeable_nonvolatile_compressed
1024  *     + page_table
1025  *
1026  * internal
1027  *   The task's anonymous memory, which on iOS is always resident.
1028  *
1029  * internal_compressed
1030  *   Amount of this task's internal memory which is held by the compressor.
1031  *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
1032  *   and could be either decompressed back into memory, or paged out to storage, depending
1033  *   on our implementation.
1034  *
1035  * iokit_mapped
1036  *   IOKit mappings: The total size of all IOKit mappings in this task, regardless of
1037  *    clean/dirty or internal/external state].
1038  *
1039  * alternate_accounting
1040  *   The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
1041  *   are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
1042  *   double counting.
1043  *
1044  * pages_grabbed
1045  *   pages_grabbed counts all page grabs in a task.  It is also broken out into three subtypes
1046  *   which track UPL, IOPL and Kernel page grabs.
1047  */
1048 void
1049 init_task_ledgers(void)
1050 {
1051         ledger_template_t t;
1052
1053         assert(task_ledger_template == NULL);
1054         assert(kernel_task == TASK_NULL);
1055
1056 #if MACH_ASSERT
1057         PE_parse_boot_argn("pmap_ledgers_panic",
1058             &pmap_ledgers_panic,
1059             sizeof(pmap_ledgers_panic));
1060         PE_parse_boot_argn("pmap_ledgers_panic_leeway",
1061             &pmap_ledgers_panic_leeway,
1062             sizeof(pmap_ledgers_panic_leeway));
1063 #endif /* MACH_ASSERT */
1064
1065         if ((t = ledger_template_create("Per-task ledger")) == NULL) {
1066                 panic("couldn't create task ledger template");
1067         }
1068
1069         task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
1070         task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
1071             "physmem", "bytes");
1072         task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
1073             "bytes");
1074         task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
1075             "bytes");
1076         task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
1077             "bytes");
1078         task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
1079             "bytes");
1080         task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
1081             "bytes");
1082         task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
1083             "bytes");
1084         task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
1085             "bytes");
1086         task_ledgers.page_table = ledger_entry_add(t, "page_table", "physmem",
1087             "bytes");
1088         task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
1089             "bytes");
1090         task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
1091             "bytes");
1092         task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
1093         task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
1094         task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
1095         task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
1096 #if DEBUG || DEVELOPMENT
1097         task_ledgers.pages_grabbed = ledger_entry_add(t, "pages_grabbed", "physmem", "count");
1098         task_ledgers.pages_grabbed_kern = ledger_entry_add(t, "pages_grabbed_kern", "physmem", "count");
1099         task_ledgers.pages_grabbed_iopl = ledger_entry_add(t, "pages_grabbed_iopl", "physmem", "count");
1100         task_ledgers.pages_grabbed_upl = ledger_entry_add(t, "pages_grabbed_upl", "physmem", "count");
1101 #endif
1102         task_ledgers.tagged_nofootprint = ledger_entry_add(t, "tagged_nofootprint", "physmem", "bytes");
1103         task_ledgers.tagged_footprint = ledger_entry_add(t, "tagged_footprint", "physmem", "bytes");
1104         task_ledgers.tagged_nofootprint_compressed = ledger_entry_add(t, "tagged_nofootprint_compressed", "physmem", "bytes");
1105         task_ledgers.tagged_footprint_compressed = ledger_entry_add(t, "tagged_footprint_compressed", "physmem", "bytes");
1106         task_ledgers.network_volatile = ledger_entry_add(t, "network_volatile", "physmem", "bytes");
1107         task_ledgers.network_nonvolatile = ledger_entry_add(t, "network_nonvolatile", "physmem", "bytes");
1108         task_ledgers.network_volatile_compressed = ledger_entry_add(t, "network_volatile_compressed", "physmem", "bytes");
1109         task_ledgers.network_nonvolatile_compressed = ledger_entry_add(t, "network_nonvolatile_compressed", "physmem", "bytes");
1110         task_ledgers.media_nofootprint = ledger_entry_add(t, "media_nofootprint", "physmem", "bytes");
1111         task_ledgers.media_footprint = ledger_entry_add(t, "media_footprint", "physmem", "bytes");
1112         task_ledgers.media_nofootprint_compressed = ledger_entry_add(t, "media_nofootprint_compressed", "physmem", "bytes");
1113         task_ledgers.media_footprint_compressed = ledger_entry_add(t, "media_footprint_compressed", "physmem", "bytes");
1114         task_ledgers.graphics_nofootprint = ledger_entry_add(t, "graphics_nofootprint", "physmem", "bytes");
1115         task_ledgers.graphics_footprint = ledger_entry_add(t, "graphics_footprint", "physmem", "bytes");
1116         task_ledgers.graphics_nofootprint_compressed = ledger_entry_add(t, "graphics_nofootprint_compressed", "physmem", "bytes");
1117         task_ledgers.graphics_footprint_compressed = ledger_entry_add(t, "graphics_footprint_compressed", "physmem", "bytes");
1118         task_ledgers.neural_nofootprint = ledger_entry_add(t, "neural_nofootprint", "physmem", "bytes");
1119         task_ledgers.neural_footprint = ledger_entry_add(t, "neural_footprint", "physmem", "bytes");
1120         task_ledgers.neural_nofootprint_compressed = ledger_entry_add(t, "neural_nofootprint_compressed", "physmem", "bytes");
1121         task_ledgers.neural_footprint_compressed = ledger_entry_add(t, "neural_footprint_compressed", "physmem", "bytes");
1122
1123
1124         task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
1125             "count");
1126         task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
1127             "count");
1128
1129 #if CONFIG_SCHED_SFI
1130         sfi_class_id_t class_id, ledger_alias;
1131         for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1132                 task_ledgers.sfi_wait_times[class_id] = -1;
1133         }
1134
1135         /* don't account for UNSPECIFIED */
1136         for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
1137                 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1138                 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1139                         /* Check to see if alias has been registered yet */
1140                         if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
1141                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1142                         } else {
1143                                 /* Otherwise, initialize it first */
1144                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
1145                         }
1146                 } else {
1147                         task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
1148                 }
1149
1150                 if (task_ledgers.sfi_wait_times[class_id] < 0) {
1151                         panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1152                 }
1153         }
1154
1155         assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID - 1] != -1);
1156 #endif /* CONFIG_SCHED_SFI */
1157
1158         task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
1159         task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
1160         task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
1161         task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
1162         task_ledgers.logical_writes_to_external = ledger_entry_add(t, "logical_writes_to_external", "res", "bytes");
1163         task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
1164         task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
1165
1166         if ((task_ledgers.cpu_time < 0) ||
1167             (task_ledgers.tkm_private < 0) ||
1168             (task_ledgers.tkm_shared < 0) ||
1169             (task_ledgers.phys_mem < 0) ||
1170             (task_ledgers.wired_mem < 0) ||
1171             (task_ledgers.internal < 0) ||
1172             (task_ledgers.iokit_mapped < 0) ||
1173             (task_ledgers.alternate_accounting < 0) ||
1174             (task_ledgers.alternate_accounting_compressed < 0) ||
1175             (task_ledgers.page_table < 0) ||
1176             (task_ledgers.phys_footprint < 0) ||
1177             (task_ledgers.internal_compressed < 0) ||
1178             (task_ledgers.purgeable_volatile < 0) ||
1179             (task_ledgers.purgeable_nonvolatile < 0) ||
1180             (task_ledgers.purgeable_volatile_compressed < 0) ||
1181             (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
1182             (task_ledgers.tagged_nofootprint < 0) ||
1183             (task_ledgers.tagged_footprint < 0) ||
1184             (task_ledgers.tagged_nofootprint_compressed < 0) ||
1185             (task_ledgers.tagged_footprint_compressed < 0) ||
1186             (task_ledgers.network_volatile < 0) ||
1187             (task_ledgers.network_nonvolatile < 0) ||
1188             (task_ledgers.network_volatile_compressed < 0) ||
1189             (task_ledgers.network_nonvolatile_compressed < 0) ||
1190             (task_ledgers.media_nofootprint < 0) ||
1191             (task_ledgers.media_footprint < 0) ||
1192             (task_ledgers.media_nofootprint_compressed < 0) ||
1193             (task_ledgers.media_footprint_compressed < 0) ||
1194             (task_ledgers.graphics_nofootprint < 0) ||
1195             (task_ledgers.graphics_footprint < 0) ||
1196             (task_ledgers.graphics_nofootprint_compressed < 0) ||
1197             (task_ledgers.graphics_footprint_compressed < 0) ||
1198             (task_ledgers.neural_nofootprint < 0) ||
1199             (task_ledgers.neural_footprint < 0) ||
1200             (task_ledgers.neural_nofootprint_compressed < 0) ||
1201             (task_ledgers.neural_footprint_compressed < 0) ||
1202             (task_ledgers.platform_idle_wakeups < 0) ||
1203             (task_ledgers.interrupt_wakeups < 0) ||
1204             (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
1205             (task_ledgers.physical_writes < 0) ||
1206             (task_ledgers.logical_writes < 0) ||
1207             (task_ledgers.logical_writes_to_external < 0) ||
1208             (task_ledgers.energy_billed_to_me < 0) ||
1209             (task_ledgers.energy_billed_to_others < 0)
1210             ) {
1211                 panic("couldn't create entries for task ledger template");
1212         }
1213
1214         ledger_track_credit_only(t, task_ledgers.phys_footprint);
1215         ledger_track_credit_only(t, task_ledgers.page_table);
1216         ledger_track_credit_only(t, task_ledgers.internal);
1217         ledger_track_credit_only(t, task_ledgers.internal_compressed);
1218         ledger_track_credit_only(t, task_ledgers.iokit_mapped);
1219         ledger_track_credit_only(t, task_ledgers.alternate_accounting);
1220         ledger_track_credit_only(t, task_ledgers.alternate_accounting_compressed);
1221         ledger_track_credit_only(t, task_ledgers.purgeable_volatile);
1222         ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile);
1223         ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed);
1224         ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed);
1225 #if DEBUG || DEVELOPMENT
1226         ledger_track_credit_only(t, task_ledgers.pages_grabbed);
1227         ledger_track_credit_only(t, task_ledgers.pages_grabbed_kern);
1228         ledger_track_credit_only(t, task_ledgers.pages_grabbed_iopl);
1229         ledger_track_credit_only(t, task_ledgers.pages_grabbed_upl);
1230 #endif
1231         ledger_track_credit_only(t, task_ledgers.tagged_nofootprint);
1232         ledger_track_credit_only(t, task_ledgers.tagged_footprint);
1233         ledger_track_credit_only(t, task_ledgers.tagged_nofootprint_compressed);
1234         ledger_track_credit_only(t, task_ledgers.tagged_footprint_compressed);
1235         ledger_track_credit_only(t, task_ledgers.network_volatile);
1236         ledger_track_credit_only(t, task_ledgers.network_nonvolatile);
1237         ledger_track_credit_only(t, task_ledgers.network_volatile_compressed);
1238         ledger_track_credit_only(t, task_ledgers.network_nonvolatile_compressed);
1239         ledger_track_credit_only(t, task_ledgers.media_nofootprint);
1240         ledger_track_credit_only(t, task_ledgers.media_footprint);
1241         ledger_track_credit_only(t, task_ledgers.media_nofootprint_compressed);
1242         ledger_track_credit_only(t, task_ledgers.media_footprint_compressed);
1243         ledger_track_credit_only(t, task_ledgers.graphics_nofootprint);
1244         ledger_track_credit_only(t, task_ledgers.graphics_footprint);
1245         ledger_track_credit_only(t, task_ledgers.graphics_nofootprint_compressed);
1246         ledger_track_credit_only(t, task_ledgers.graphics_footprint_compressed);
1247         ledger_track_credit_only(t, task_ledgers.neural_nofootprint);
1248         ledger_track_credit_only(t, task_ledgers.neural_footprint);
1249         ledger_track_credit_only(t, task_ledgers.neural_nofootprint_compressed);
1250         ledger_track_credit_only(t, task_ledgers.neural_footprint_compressed);
1251
1252         ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
1253 #if MACH_ASSERT
1254         if (pmap_ledgers_panic) {
1255                 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
1256                 ledger_panic_on_negative(t, task_ledgers.page_table);
1257                 ledger_panic_on_negative(t, task_ledgers.internal);
1258                 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
1259                 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1260                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1261                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1262                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1263                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1264                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1265                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1266
1267                 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint);
1268                 ledger_panic_on_negative(t, task_ledgers.tagged_footprint);
1269                 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint_compressed);
1270                 ledger_panic_on_negative(t, task_ledgers.tagged_footprint_compressed);
1271                 ledger_panic_on_negative(t, task_ledgers.network_volatile);
1272                 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1273                 ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1274                 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
1275                 ledger_panic_on_negative(t, task_ledgers.media_nofootprint);
1276                 ledger_panic_on_negative(t, task_ledgers.media_footprint);
1277                 ledger_panic_on_negative(t, task_ledgers.media_nofootprint_compressed);
1278                 ledger_panic_on_negative(t, task_ledgers.media_footprint_compressed);
1279                 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint);
1280                 ledger_panic_on_negative(t, task_ledgers.graphics_footprint);
1281                 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint_compressed);
1282                 ledger_panic_on_negative(t, task_ledgers.graphics_footprint_compressed);
1283                 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint);
1284                 ledger_panic_on_negative(t, task_ledgers.neural_footprint);
1285                 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint_compressed);
1286                 ledger_panic_on_negative(t, task_ledgers.neural_footprint_compressed);
1287         }
1288 #endif /* MACH_ASSERT */
1289
1290 #if CONFIG_MEMORYSTATUS
1291         ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
1292 #endif /* CONFIG_MEMORYSTATUS */
1293
1294         ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1295             task_wakeups_rate_exceeded, NULL, NULL);
1296         ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1297
1298 #if XNU_MONITOR
1299         ledger_template_complete_secure_alloc(t);
1300 #else /* XNU_MONITOR */
1301         ledger_template_complete(t);
1302 #endif /* XNU_MONITOR */
1303         task_ledger_template = t;
1304 }
1305
1306 os_refgrp_decl(static, task_refgrp, "task", NULL);
1307
1308 kern_return_t
1309 task_create_internal(
1310         task_t          parent_task,
1311         coalition_t     *parent_coalitions __unused,
1312         boolean_t       inherit_memory,
1313         __unused boolean_t      is_64bit,
1314         boolean_t is_64bit_data,
1315         uint32_t        t_flags,
1316         uint32_t        t_procflags,
1317         uint8_t         t_returnwaitflags,
1318         task_t          *child_task)            /* OUT */
1319 {
1320         task_t                  new_task;
1321         vm_shared_region_t      shared_region;
1322         ledger_t                ledger = NULL;
1323
1324         new_task = (task_t) zalloc(task_zone);
1325
1326         if (new_task == TASK_NULL) {
1327                 return KERN_RESOURCE_SHORTAGE;
1328         }
1329
1330         /* one ref for just being alive; one for our caller */
1331         os_ref_init_count(&new_task->ref_count, &task_refgrp, 2);
1332
1333         /* allocate with active entries */
1334         assert(task_ledger_template != NULL);
1335         if ((ledger = ledger_instantiate(task_ledger_template,
1336             LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
1337                 zfree(task_zone, new_task);
1338                 return KERN_RESOURCE_SHORTAGE;
1339         }
1340
1341 #if defined(HAS_APPLE_PAC)
1342         ml_task_set_rop_pid(new_task, parent_task, inherit_memory);
1343         ml_task_set_disable_user_jop(new_task, inherit_memory ? parent_task->disable_user_jop : FALSE);
1344 #endif
1345
1346         new_task->ledger = ledger;
1347
1348 #if defined(CONFIG_SCHED_MULTIQ)
1349         new_task->sched_group = sched_group_create();
1350 #endif
1351
1352         /* if inherit_memory is true, parent_task MUST not be NULL */
1353         if (!(t_flags & TF_CORPSE_FORK) && inherit_memory) {
1354                 new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1355         } else {
1356                 unsigned int pmap_flags = is_64bit ? PMAP_CREATE_64BIT : 0;
1357                 new_task->map = vm_map_create(pmap_create_options(ledger, 0, pmap_flags),
1358                     (vm_map_offset_t)(VM_MIN_ADDRESS),
1359                     (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
1360         }
1361
1362         /* Inherit memlock limit from parent */
1363         if (parent_task) {
1364                 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
1365         }
1366
1367         lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1368         queue_init(&new_task->threads);
1369         new_task->suspend_count = 0;
1370         new_task->thread_count = 0;
1371         new_task->active_thread_count = 0;
1372         new_task->user_stop_count = 0;
1373         new_task->legacy_stop_count = 0;
1374         new_task->active = TRUE;
1375         new_task->halting = FALSE;
1376         new_task->priv_flags = 0;
1377         new_task->t_flags = t_flags;
1378         new_task->t_procflags = t_procflags;
1379         new_task->t_returnwaitflags = t_returnwaitflags;
1380         new_task->returnwait_inheritor = current_thread();
1381         new_task->importance = 0;
1382         new_task->crashed_thread_id = 0;
1383         new_task->exec_token = 0;
1384         new_task->watchports = NULL;
1385         new_task->restartable_ranges = NULL;
1386         new_task->task_exc_guard = 0;
1387
1388 #if CONFIG_ATM
1389         new_task->atm_context = NULL;
1390 #endif
1391         new_task->bank_context = NULL;
1392
1393 #ifdef MACH_BSD
1394         new_task->bsd_info = NULL;
1395         new_task->corpse_info = NULL;
1396 #endif /* MACH_BSD */
1397
1398 #if CONFIG_MACF
1399         new_task->crash_label = NULL;
1400 #endif
1401
1402 #if CONFIG_MEMORYSTATUS
1403         if (max_task_footprint != 0) {
1404                 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1405         }
1406 #endif /* CONFIG_MEMORYSTATUS */
1407
1408         if (task_wakeups_monitor_rate != 0) {
1409                 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1410                 int32_t  rate; // Ignored because of WAKEMON_SET_DEFAULTS
1411                 task_wakeups_monitor_ctl(new_task, &flags, &rate);
1412         }
1413
1414 #if CONFIG_IO_ACCOUNTING
1415         uint32_t flags = IOMON_ENABLE;
1416         task_io_monitor_ctl(new_task, &flags);
1417 #endif /* CONFIG_IO_ACCOUNTING */
1418
1419         machine_task_init(new_task, parent_task, inherit_memory);
1420
1421         new_task->task_debug = NULL;
1422
1423 #if DEVELOPMENT || DEBUG
1424         new_task->task_unnested = FALSE;
1425         new_task->task_disconnected_count = 0;
1426 #endif
1427         queue_init(&new_task->semaphore_list);
1428         new_task->semaphores_owned = 0;
1429
1430         ipc_task_init(new_task, parent_task);
1431
1432         new_task->vtimers = 0;
1433
1434         new_task->shared_region = NULL;
1435
1436         new_task->affinity_space = NULL;
1437
1438         new_task->t_kpc = 0;
1439
1440         new_task->pidsuspended = FALSE;
1441         new_task->frozen = FALSE;
1442         new_task->changing_freeze_state = FALSE;
1443         new_task->rusage_cpu_flags = 0;
1444         new_task->rusage_cpu_percentage = 0;
1445         new_task->rusage_cpu_interval = 0;
1446         new_task->rusage_cpu_deadline = 0;
1447         new_task->rusage_cpu_callt = NULL;
1448 #if MACH_ASSERT
1449         new_task->suspends_outstanding = 0;
1450 #endif
1451
1452 #if HYPERVISOR
1453         new_task->hv_task_target = NULL;
1454 #endif /* HYPERVISOR */
1455
1456 #if CONFIG_EMBEDDED
1457         queue_init(&new_task->task_watchers);
1458         new_task->num_taskwatchers  = 0;
1459         new_task->watchapplying  = 0;
1460 #endif /* CONFIG_EMBEDDED */
1461
1462         new_task->mem_notify_reserved = 0;
1463         new_task->memlimit_attrs_reserved = 0;
1464
1465         new_task->requested_policy = default_task_requested_policy;
1466         new_task->effective_policy = default_task_effective_policy;
1467
1468         task_importance_init_from_parent(new_task, parent_task);
1469
1470         if (parent_task != TASK_NULL) {
1471                 new_task->sec_token = parent_task->sec_token;
1472                 new_task->audit_token = parent_task->audit_token;
1473
1474                 /* inherit the parent's shared region */
1475                 shared_region = vm_shared_region_get(parent_task);
1476                 vm_shared_region_set(new_task, shared_region);
1477
1478                 if (task_has_64Bit_addr(parent_task)) {
1479                         task_set_64Bit_addr(new_task);
1480                 }
1481
1482                 if (task_has_64Bit_data(parent_task)) {
1483                         task_set_64Bit_data(new_task);
1484                 }
1485
1486                 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1487                 new_task->all_image_info_size = parent_task->all_image_info_size;
1488                 new_task->mach_header_vm_address = 0;
1489
1490                 if (inherit_memory && parent_task->affinity_space) {
1491                         task_affinity_create(parent_task, new_task);
1492                 }
1493
1494                 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1495
1496 #if DEBUG || DEVELOPMENT
1497                 if (parent_task->t_flags & TF_NO_SMT) {
1498                         new_task->t_flags |= TF_NO_SMT;
1499                 }
1500 #endif
1501
1502                 new_task->priority = BASEPRI_DEFAULT;
1503                 new_task->max_priority = MAXPRI_USER;
1504
1505                 task_policy_create(new_task, parent_task);
1506         } else {
1507                 new_task->sec_token = KERNEL_SECURITY_TOKEN;
1508                 new_task->audit_token = KERNEL_AUDIT_TOKEN;
1509 #ifdef __LP64__
1510                 if (is_64bit) {
1511                         task_set_64Bit_addr(new_task);
1512                 }
1513 #endif
1514
1515                 if (is_64bit_data) {
1516                         task_set_64Bit_data(new_task);
1517                 }
1518
1519                 new_task->all_image_info_addr = (mach_vm_address_t)0;
1520                 new_task->all_image_info_size = (mach_vm_size_t)0;
1521
1522                 new_task->pset_hint = PROCESSOR_SET_NULL;
1523
1524                 if (kernel_task == TASK_NULL) {
1525                         new_task->priority = BASEPRI_KERNEL;
1526                         new_task->max_priority = MAXPRI_KERNEL;
1527                 } else {
1528                         new_task->priority = BASEPRI_DEFAULT;
1529                         new_task->max_priority = MAXPRI_USER;
1530                 }
1531         }
1532
1533         bzero(new_task->coalition, sizeof(new_task->coalition));
1534         for (int i = 0; i < COALITION_NUM_TYPES; i++) {
1535                 queue_chain_init(new_task->task_coalition[i]);
1536         }
1537
1538         /* Allocate I/O Statistics */
1539         new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1540         assert(new_task->task_io_stats != NULL);
1541         bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1542
1543         bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1544         bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
1545
1546         bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1547
1548         /* Copy resource acc. info from Parent for Corpe Forked task. */
1549         if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1550                 task_rollup_accounting_info(new_task, parent_task);
1551         } else {
1552                 /* Initialize to zero for standard fork/spawn case */
1553                 new_task->total_user_time = 0;
1554                 new_task->total_system_time = 0;
1555                 new_task->total_ptime = 0;
1556                 new_task->total_runnable_time = 0;
1557                 new_task->faults = 0;
1558                 new_task->pageins = 0;
1559                 new_task->cow_faults = 0;
1560                 new_task->messages_sent = 0;
1561                 new_task->messages_received = 0;
1562                 new_task->syscalls_mach = 0;
1563                 new_task->syscalls_unix = 0;
1564                 new_task->c_switch = 0;
1565                 new_task->p_switch = 0;
1566                 new_task->ps_switch = 0;
1567                 new_task->decompressions = 0;
1568                 new_task->low_mem_notified_warn = 0;
1569                 new_task->low_mem_notified_critical = 0;
1570                 new_task->purged_memory_warn = 0;
1571                 new_task->purged_memory_critical = 0;
1572                 new_task->low_mem_privileged_listener = 0;
1573                 new_task->memlimit_is_active = 0;
1574                 new_task->memlimit_is_fatal = 0;
1575                 new_task->memlimit_active_exc_resource = 0;
1576                 new_task->memlimit_inactive_exc_resource = 0;
1577                 new_task->task_timer_wakeups_bin_1 = 0;
1578                 new_task->task_timer_wakeups_bin_2 = 0;
1579                 new_task->task_gpu_ns = 0;
1580                 new_task->task_writes_counters_internal.task_immediate_writes = 0;
1581                 new_task->task_writes_counters_internal.task_deferred_writes = 0;
1582                 new_task->task_writes_counters_internal.task_invalidated_writes = 0;
1583                 new_task->task_writes_counters_internal.task_metadata_writes = 0;
1584                 new_task->task_writes_counters_external.task_immediate_writes = 0;
1585                 new_task->task_writes_counters_external.task_deferred_writes = 0;
1586                 new_task->task_writes_counters_external.task_invalidated_writes = 0;
1587                 new_task->task_writes_counters_external.task_metadata_writes = 0;
1588
1589                 new_task->task_energy = 0;
1590 #if MONOTONIC
1591                 memset(&new_task->task_monotonic, 0, sizeof(new_task->task_monotonic));
1592 #endif /* MONOTONIC */
1593         }
1594
1595
1596 #if CONFIG_COALITIONS
1597         if (!(t_flags & TF_CORPSE_FORK)) {
1598                 /* TODO: there is no graceful failure path here... */
1599                 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1600                         coalitions_adopt_task(parent_coalitions, new_task);
1601                 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1602                         /*
1603                          * all tasks at least have a resource coalition, so
1604                          * if the parent has one then inherit all coalitions
1605                          * the parent is a part of
1606                          */
1607                         coalitions_adopt_task(parent_task->coalition, new_task);
1608                 } else {
1609                         /* TODO: assert that new_task will be PID 1 (launchd) */
1610                         coalitions_adopt_init_task(new_task);
1611                 }
1612                 /*
1613                  * on exec, we need to transfer the coalition roles from the
1614                  * parent task to the exec copy task.
1615                  */
1616                 if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1617                         int coal_roles[COALITION_NUM_TYPES];
1618                         task_coalition_roles(parent_task, coal_roles);
1619                         (void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1620                 }
1621         } else {
1622                 coalitions_adopt_corpse_task(new_task);
1623         }
1624
1625         if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1626                 panic("created task is not a member of a resource coalition");
1627         }
1628 #endif /* CONFIG_COALITIONS */
1629
1630         new_task->dispatchqueue_offset = 0;
1631         if (parent_task != NULL) {
1632                 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1633         }
1634
1635         new_task->task_can_transfer_memory_ownership = FALSE;
1636         new_task->task_volatile_objects = 0;
1637         new_task->task_nonvolatile_objects = 0;
1638         new_task->task_objects_disowning = FALSE;
1639         new_task->task_objects_disowned = FALSE;
1640         new_task->task_owned_objects = 0;
1641         queue_init(&new_task->task_objq);
1642         task_objq_lock_init(new_task);
1643
1644 #if __arm64__
1645         new_task->task_legacy_footprint = FALSE;
1646         new_task->task_extra_footprint_limit = FALSE;
1647         new_task->task_ios13extended_footprint_limit = FALSE;
1648 #endif /* __arm64__ */
1649         new_task->task_region_footprint = FALSE;
1650         new_task->task_has_crossed_thread_limit = FALSE;
1651         new_task->task_thread_limit = 0;
1652 #if CONFIG_SECLUDED_MEMORY
1653         new_task->task_can_use_secluded_mem = FALSE;
1654         new_task->task_could_use_secluded_mem = FALSE;
1655         new_task->task_could_also_use_secluded_mem = FALSE;
1656         new_task->task_suppressed_secluded = FALSE;
1657 #endif /* CONFIG_SECLUDED_MEMORY */
1658
1659         /*
1660          * t_flags is set up above. But since we don't
1661          * support darkwake mode being set that way
1662          * currently, we clear it out here explicitly.
1663          */
1664         new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1665
1666         queue_init(&new_task->io_user_clients);
1667         new_task->loadTag = 0;
1668
1669         ipc_task_enable(new_task);
1670
1671         lck_mtx_lock(&tasks_threads_lock);
1672         queue_enter(&tasks, new_task, task_t, tasks);
1673         tasks_count++;
1674         if (tasks_suspend_state) {
1675                 task_suspend_internal(new_task);
1676         }
1677         lck_mtx_unlock(&tasks_threads_lock);
1678
1679         *child_task = new_task;
1680         return KERN_SUCCESS;
1681 }
1682
1683 /*
1684  *      task_rollup_accounting_info
1685  *
1686  *      Roll up accounting stats. Used to rollup stats
1687  *      for exec copy task and corpse fork.
1688  */
1689 void
1690 task_rollup_accounting_info(task_t to_task, task_t from_task)
1691 {
1692         assert(from_task != to_task);
1693
1694         to_task->total_user_time = from_task->total_user_time;
1695         to_task->total_system_time = from_task->total_system_time;
1696         to_task->total_ptime = from_task->total_ptime;
1697         to_task->total_runnable_time = from_task->total_runnable_time;
1698         to_task->faults = from_task->faults;
1699         to_task->pageins = from_task->pageins;
1700         to_task->cow_faults = from_task->cow_faults;
1701         to_task->decompressions = from_task->decompressions;
1702         to_task->messages_sent = from_task->messages_sent;
1703         to_task->messages_received = from_task->messages_received;
1704         to_task->syscalls_mach = from_task->syscalls_mach;
1705         to_task->syscalls_unix = from_task->syscalls_unix;
1706         to_task->c_switch = from_task->c_switch;
1707         to_task->p_switch = from_task->p_switch;
1708         to_task->ps_switch = from_task->ps_switch;
1709         to_task->extmod_statistics = from_task->extmod_statistics;
1710         to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1711         to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1712         to_task->purged_memory_warn = from_task->purged_memory_warn;
1713         to_task->purged_memory_critical = from_task->purged_memory_critical;
1714         to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1715         *to_task->task_io_stats = *from_task->task_io_stats;
1716         to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1717         to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1718         to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1719         to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1720         to_task->task_gpu_ns = from_task->task_gpu_ns;
1721         to_task->task_writes_counters_internal.task_immediate_writes = from_task->task_writes_counters_internal.task_immediate_writes;
1722         to_task->task_writes_counters_internal.task_deferred_writes = from_task->task_writes_counters_internal.task_deferred_writes;
1723         to_task->task_writes_counters_internal.task_invalidated_writes = from_task->task_writes_counters_internal.task_invalidated_writes;
1724         to_task->task_writes_counters_internal.task_metadata_writes = from_task->task_writes_counters_internal.task_metadata_writes;
1725         to_task->task_writes_counters_external.task_immediate_writes = from_task->task_writes_counters_external.task_immediate_writes;
1726         to_task->task_writes_counters_external.task_deferred_writes = from_task->task_writes_counters_external.task_deferred_writes;
1727         to_task->task_writes_counters_external.task_invalidated_writes = from_task->task_writes_counters_external.task_invalidated_writes;
1728         to_task->task_writes_counters_external.task_metadata_writes = from_task->task_writes_counters_external.task_metadata_writes;
1729         to_task->task_energy = from_task->task_energy;
1730
1731         /* Skip ledger roll up for memory accounting entries */
1732         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1733         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1734         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1735 #if CONFIG_SCHED_SFI
1736         for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1737                 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1738         }
1739 #endif
1740         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1741         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1742         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1743         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1744         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1745         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
1746 }
1747
1748 int task_dropped_imp_count = 0;
1749
1750 /*
1751  *      task_deallocate:
1752  *
1753  *      Drop a reference on a task.
1754  */
1755 void
1756 task_deallocate(
1757         task_t          task)
1758 {
1759         ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1760         os_ref_count_t refs;
1761
1762         if (task == TASK_NULL) {
1763                 return;
1764         }
1765
1766         refs = task_deallocate_internal(task);
1767
1768 #if IMPORTANCE_INHERITANCE
1769         if (refs == 1) {
1770                 /*
1771                  * If last ref potentially comes from the task's importance,
1772                  * disconnect it.  But more task refs may be added before
1773                  * that completes, so wait for the reference to go to zero
1774                  * naturally (it may happen on a recursive task_deallocate()
1775                  * from the ipc_importance_disconnect_task() call).
1776                  */
1777                 if (IIT_NULL != task->task_imp_base) {
1778                         ipc_importance_disconnect_task(task);
1779                 }
1780                 return;
1781         }
1782 #endif /* IMPORTANCE_INHERITANCE */
1783
1784         if (refs > 0) {
1785                 return;
1786         }
1787
1788         /*
1789          * The task should be dead at this point. Ensure other resources
1790          * like threads, are gone before we trash the world.
1791          */
1792         assert(queue_empty(&task->threads));
1793         assert(task->bsd_info == NULL);
1794         assert(!is_active(task->itk_space));
1795         assert(!task->active);
1796         assert(task->active_thread_count == 0);
1797
1798         lck_mtx_lock(&tasks_threads_lock);
1799         assert(terminated_tasks_count > 0);
1800         queue_remove(&terminated_tasks, task, task_t, tasks);
1801         terminated_tasks_count--;
1802         lck_mtx_unlock(&tasks_threads_lock);
1803
1804         /*
1805          * remove the reference on atm descriptor
1806          */
1807         task_atm_reset(task);
1808
1809         /*
1810          * remove the reference on bank context
1811          */
1812         task_bank_reset(task);
1813
1814         if (task->task_io_stats) {
1815                 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1816         }
1817
1818         /*
1819          *      Give the machine dependent code a chance
1820          *      to perform cleanup before ripping apart
1821          *      the task.
1822          */
1823         machine_task_terminate(task);
1824
1825         ipc_task_terminate(task);
1826
1827         /* let iokit know */
1828         iokit_task_terminate(task);
1829
1830         if (task->affinity_space) {
1831                 task_affinity_deallocate(task);
1832         }
1833
1834 #if MACH_ASSERT
1835         if (task->ledger != NULL &&
1836             task->map != NULL &&
1837             task->map->pmap != NULL &&
1838             task->map->pmap->ledger != NULL) {
1839                 assert(task->ledger == task->map->pmap->ledger);
1840         }
1841 #endif /* MACH_ASSERT */
1842
1843         vm_owned_objects_disown(task);
1844         assert(task->task_objects_disowned);
1845         if (task->task_volatile_objects != 0 ||
1846             task->task_nonvolatile_objects != 0 ||
1847             task->task_owned_objects != 0) {
1848                 panic("task_deallocate(%p): "
1849                     "volatile_objects=%d nonvolatile_objects=%d owned=%d\n",
1850                     task,
1851                     task->task_volatile_objects,
1852                     task->task_nonvolatile_objects,
1853                     task->task_owned_objects);
1854         }
1855
1856         vm_map_deallocate(task->map);
1857         is_release(task->itk_space);
1858         if (task->restartable_ranges) {
1859                 restartable_ranges_release(task->restartable_ranges);
1860         }
1861
1862         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1863             &interrupt_wakeups, &debit);
1864         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1865             &platform_idle_wakeups, &debit);
1866
1867 #if defined(CONFIG_SCHED_MULTIQ)
1868         sched_group_destroy(task->sched_group);
1869 #endif
1870
1871         /* Accumulate statistics for dead tasks */
1872         lck_spin_lock(&dead_task_statistics_lock);
1873         dead_task_statistics.total_user_time += task->total_user_time;
1874         dead_task_statistics.total_system_time += task->total_system_time;
1875
1876         dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1877         dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1878
1879         dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1880         dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1881         dead_task_statistics.total_ptime += task->total_ptime;
1882         dead_task_statistics.total_pset_switches += task->ps_switch;
1883         dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
1884         dead_task_statistics.task_energy += task->task_energy;
1885
1886         lck_spin_unlock(&dead_task_statistics_lock);
1887         lck_mtx_destroy(&task->lock, &task_lck_grp);
1888
1889         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1890             &debit)) {
1891                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1892                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1893         }
1894         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1895             &debit)) {
1896                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1897                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1898         }
1899         ledger_dereference(task->ledger);
1900
1901 #if TASK_REFERENCE_LEAK_DEBUG
1902         btlog_remove_entries_for_element(task_ref_btlog, task);
1903 #endif
1904
1905 #if CONFIG_COALITIONS
1906         task_release_coalitions(task);
1907 #endif /* CONFIG_COALITIONS */
1908
1909         bzero(task->coalition, sizeof(task->coalition));
1910
1911 #if MACH_BSD
1912         /* clean up collected information since last reference to task is gone */
1913         if (task->corpse_info) {
1914                 void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
1915                 task_crashinfo_destroy(task->corpse_info);
1916                 task->corpse_info = NULL;
1917                 if (corpse_info_kernel) {
1918                         kfree(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
1919                 }
1920         }
1921 #endif
1922
1923 #if CONFIG_MACF
1924         if (task->crash_label) {
1925                 mac_exc_free_label(task->crash_label);
1926                 task->crash_label = NULL;
1927         }
1928 #endif
1929
1930         assert(queue_empty(&task->task_objq));
1931
1932         zfree(task_zone, task);
1933 }
1934
1935 /*
1936  *      task_name_deallocate:
1937  *
1938  *      Drop a reference on a task name.
1939  */
1940 void
1941 task_name_deallocate(
1942         task_name_t             task_name)
1943 {
1944         return task_deallocate((task_t)task_name);
1945 }
1946
1947 /*
1948  *      task_inspect_deallocate:
1949  *
1950  *      Drop a task inspection reference.
1951  */
1952 void
1953 task_inspect_deallocate(
1954         task_inspect_t          task_inspect)
1955 {
1956         return task_deallocate((task_t)task_inspect);
1957 }
1958
1959 /*
1960  *      task_suspension_token_deallocate:
1961  *
1962  *      Drop a reference on a task suspension token.
1963  */
1964 void
1965 task_suspension_token_deallocate(
1966         task_suspension_token_t         token)
1967 {
1968         return task_deallocate((task_t)token);
1969 }
1970
1971
1972 /*
1973  * task_collect_crash_info:
1974  *
1975  * collect crash info from bsd and mach based data
1976  */
1977 kern_return_t
1978 task_collect_crash_info(
1979         task_t task,
1980 #ifdef CONFIG_MACF
1981         struct label *crash_label,
1982 #endif
1983         int is_corpse_fork)
1984 {
1985         kern_return_t kr = KERN_SUCCESS;
1986
1987         kcdata_descriptor_t crash_data = NULL;
1988         kcdata_descriptor_t crash_data_release = NULL;
1989         mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1990         mach_vm_offset_t crash_data_ptr = 0;
1991         void *crash_data_kernel = NULL;
1992         void *crash_data_kernel_release = NULL;
1993 #if CONFIG_MACF
1994         struct label *label, *free_label;
1995 #endif
1996
1997         if (!corpses_enabled()) {
1998                 return KERN_NOT_SUPPORTED;
1999         }
2000
2001 #if CONFIG_MACF
2002         free_label = label = mac_exc_create_label();
2003 #endif
2004
2005         task_lock(task);
2006
2007         assert(is_corpse_fork || task->bsd_info != NULL);
2008         if (task->corpse_info == NULL && (is_corpse_fork || task->bsd_info != NULL)) {
2009 #if CONFIG_MACF
2010                 /* Set the crash label, used by the exception delivery mac hook */
2011                 free_label = task->crash_label; // Most likely NULL.
2012                 task->crash_label = label;
2013                 mac_exc_update_task_crash_label(task, crash_label);
2014 #endif
2015                 task_unlock(task);
2016
2017                 crash_data_kernel = (void *) kalloc(CORPSEINFO_ALLOCATION_SIZE);
2018                 if (crash_data_kernel == NULL) {
2019                         kr = KERN_RESOURCE_SHORTAGE;
2020                         goto out_no_lock;
2021                 }
2022                 bzero(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
2023                 crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
2024
2025                 /* Do not get a corpse ref for corpse fork */
2026                 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
2027                     is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
2028                     KCFLAG_USE_MEMCOPY);
2029                 if (crash_data) {
2030                         task_lock(task);
2031                         crash_data_release = task->corpse_info;
2032                         crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
2033                         task->corpse_info = crash_data;
2034
2035                         task_unlock(task);
2036                         kr = KERN_SUCCESS;
2037                 } else {
2038                         kfree(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
2039                         kr = KERN_FAILURE;
2040                 }
2041
2042                 if (crash_data_release != NULL) {
2043                         task_crashinfo_destroy(crash_data_release);
2044                 }
2045                 if (crash_data_kernel_release != NULL) {
2046                         kfree(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2047                 }
2048         } else {
2049                 task_unlock(task);
2050         }
2051
2052 out_no_lock:
2053 #if CONFIG_MACF
2054         if (free_label != NULL) {
2055                 mac_exc_free_label(free_label);
2056         }
2057 #endif
2058         return kr;
2059 }
2060
2061 /*
2062  * task_deliver_crash_notification:
2063  *
2064  * Makes outcall to registered host port for a corpse.
2065  */
2066 kern_return_t
2067 task_deliver_crash_notification(
2068         task_t task,
2069         thread_t thread,
2070         exception_type_t etype,
2071         mach_exception_subcode_t subcode)
2072 {
2073         kcdata_descriptor_t crash_info = task->corpse_info;
2074         thread_t th_iter = NULL;
2075         kern_return_t kr = KERN_SUCCESS;
2076         wait_interrupt_t wsave;
2077         mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2078         ipc_port_t task_port, old_notify;
2079
2080         if (crash_info == NULL) {
2081                 return KERN_FAILURE;
2082         }
2083
2084         task_lock(task);
2085         if (task_is_a_corpse_fork(task)) {
2086                 /* Populate code with EXC_{RESOURCE,GUARD} for corpse fork */
2087                 code[0] = etype;
2088                 code[1] = subcode;
2089         } else {
2090                 /* Populate code with EXC_CRASH for corpses */
2091                 code[0] = EXC_CRASH;
2092                 code[1] = 0;
2093                 /* Update the code[1] if the boot-arg corpse_for_fatal_memkill is set */
2094                 if (corpse_for_fatal_memkill) {
2095                         code[1] = subcode;
2096                 }
2097         }
2098
2099         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2100         {
2101                 if (th_iter->corpse_dup == FALSE) {
2102                         ipc_thread_reset(th_iter);
2103                 }
2104         }
2105         task_unlock(task);
2106
2107         /* Arm the no-sender notification for taskport */
2108         task_reference(task);
2109         task_port = convert_task_to_port(task);
2110         ip_lock(task_port);
2111         require_ip_active(task_port);
2112         ipc_port_nsrequest(task_port, task_port->ip_mscount, ipc_port_make_sonce_locked(task_port), &old_notify);
2113         /* port unlocked */
2114         assert(IP_NULL == old_notify);
2115
2116         wsave = thread_interrupt_level(THREAD_UNINT);
2117         kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
2118         if (kr != KERN_SUCCESS) {
2119                 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
2120         }
2121
2122         (void)thread_interrupt_level(wsave);
2123
2124         /*
2125          * Drop the send right on task port, will fire the
2126          * no-sender notification if exception deliver failed.
2127          */
2128         ipc_port_release_send(task_port);
2129         return kr;
2130 }
2131
2132 /*
2133  *      task_terminate:
2134  *
2135  *      Terminate the specified task.  See comments on thread_terminate
2136  *      (kern/thread.c) about problems with terminating the "current task."
2137  */
2138
2139 kern_return_t
2140 task_terminate(
2141         task_t          task)
2142 {
2143         if (task == TASK_NULL) {
2144                 return KERN_INVALID_ARGUMENT;
2145         }
2146
2147         if (task->bsd_info) {
2148                 return KERN_FAILURE;
2149         }
2150
2151         return task_terminate_internal(task);
2152 }
2153
2154 #if MACH_ASSERT
2155 extern int proc_pid(struct proc *);
2156 extern void proc_name_kdp(task_t t, char *buf, int size);
2157 #endif /* MACH_ASSERT */
2158
2159 #define VM_MAP_PARTIAL_REAP 0x54  /* 0x150 */
2160 static void
2161 __unused task_partial_reap(task_t task, __unused int pid)
2162 {
2163         unsigned int    reclaimed_resident = 0;
2164         unsigned int    reclaimed_compressed = 0;
2165         uint64_t        task_page_count;
2166
2167         task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
2168
2169         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
2170             pid, task_page_count, 0, 0, 0);
2171
2172         vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
2173
2174         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
2175             pid, reclaimed_resident, reclaimed_compressed, 0, 0);
2176 }
2177
2178 kern_return_t
2179 task_mark_corpse(task_t task)
2180 {
2181         kern_return_t kr = KERN_SUCCESS;
2182         thread_t self_thread;
2183         (void) self_thread;
2184         wait_interrupt_t wsave;
2185 #if CONFIG_MACF
2186         struct label *crash_label = NULL;
2187 #endif
2188
2189         assert(task != kernel_task);
2190         assert(task == current_task());
2191         assert(!task_is_a_corpse(task));
2192
2193 #if CONFIG_MACF
2194         crash_label = mac_exc_create_label_for_proc((struct proc*)task->bsd_info);
2195 #endif
2196
2197         kr = task_collect_crash_info(task,
2198 #if CONFIG_MACF
2199             crash_label,
2200 #endif
2201             FALSE);
2202         if (kr != KERN_SUCCESS) {
2203                 goto out;
2204         }
2205
2206         self_thread = current_thread();
2207
2208         wsave = thread_interrupt_level(THREAD_UNINT);
2209         task_lock(task);
2210
2211         task_set_corpse_pending_report(task);
2212         task_set_corpse(task);
2213         task->crashed_thread_id = thread_tid(self_thread);
2214
2215         kr = task_start_halt_locked(task, TRUE);
2216         assert(kr == KERN_SUCCESS);
2217
2218         ipc_task_reset(task);
2219         /* Remove the naked send right for task port, needed to arm no sender notification */
2220         task_set_special_port_internal(task, TASK_KERNEL_PORT, IPC_PORT_NULL);
2221         ipc_task_enable(task);
2222
2223         task_unlock(task);
2224         /* terminate the ipc space */
2225         ipc_space_terminate(task->itk_space);
2226
2227         /* Add it to global corpse task list */
2228         task_add_to_corpse_task_list(task);
2229
2230         task_start_halt(task);
2231         thread_terminate_internal(self_thread);
2232
2233         (void) thread_interrupt_level(wsave);
2234         assert(task->halting == TRUE);
2235
2236 out:
2237 #if CONFIG_MACF
2238         mac_exc_free_label(crash_label);
2239 #endif
2240         return kr;
2241 }
2242
2243 /*
2244  *      task_clear_corpse
2245  *
2246  *      Clears the corpse pending bit on task.
2247  *      Removes inspection bit on the threads.
2248  */
2249 void
2250 task_clear_corpse(task_t task)
2251 {
2252         thread_t th_iter = NULL;
2253
2254         task_lock(task);
2255         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2256         {
2257                 thread_mtx_lock(th_iter);
2258                 th_iter->inspection = FALSE;
2259                 thread_mtx_unlock(th_iter);
2260         }
2261
2262         thread_terminate_crashed_threads();
2263         /* remove the pending corpse report flag */
2264         task_clear_corpse_pending_report(task);
2265
2266         task_unlock(task);
2267 }
2268
2269 /*
2270  *      task_port_notify
2271  *
2272  *      Called whenever the Mach port system detects no-senders on
2273  *      the task port of a corpse.
2274  *      Each notification that comes in should terminate the task (corpse).
2275  */
2276 void
2277 task_port_notify(mach_msg_header_t *msg)
2278 {
2279         mach_no_senders_notification_t *notification = (void *)msg;
2280         ipc_port_t port = notification->not_header.msgh_remote_port;
2281         task_t task;
2282
2283         require_ip_active(port);
2284         assert(IKOT_TASK == ip_kotype(port));
2285         task = (task_t) ip_get_kobject(port);
2286
2287         assert(task_is_a_corpse(task));
2288
2289         /* Remove the task from global corpse task list */
2290         task_remove_from_corpse_task_list(task);
2291
2292         task_clear_corpse(task);
2293         task_terminate_internal(task);
2294 }
2295
2296 /*
2297  *      task_wait_till_threads_terminate_locked
2298  *
2299  *      Wait till all the threads in the task are terminated.
2300  *      Might release the task lock and re-acquire it.
2301  */
2302 void
2303 task_wait_till_threads_terminate_locked(task_t task)
2304 {
2305         /* wait for all the threads in the task to terminate */
2306         while (task->active_thread_count != 0) {
2307                 assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
2308                 task_unlock(task);
2309                 thread_block(THREAD_CONTINUE_NULL);
2310
2311                 task_lock(task);
2312         }
2313 }
2314
2315 /*
2316  *      task_duplicate_map_and_threads
2317  *
2318  *      Copy vmmap of source task.
2319  *      Copy active threads from source task to destination task.
2320  *      Source task would be suspended during the copy.
2321  */
2322 kern_return_t
2323 task_duplicate_map_and_threads(
2324         task_t task,
2325         void *p,
2326         task_t new_task,
2327         thread_t *thread_ret,
2328         uint64_t **udata_buffer,
2329         int *size,
2330         int *num_udata)
2331 {
2332         kern_return_t kr = KERN_SUCCESS;
2333         int active;
2334         thread_t thread, self, thread_return = THREAD_NULL;
2335         thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
2336         thread_t *thread_array;
2337         uint32_t active_thread_count = 0, array_count = 0, i;
2338         vm_map_t oldmap;
2339         uint64_t *buffer = NULL;
2340         int buf_size = 0;
2341         int est_knotes = 0, num_knotes = 0;
2342
2343         self = current_thread();
2344
2345         /*
2346          * Suspend the task to copy thread state, use the internal
2347          * variant so that no user-space process can resume
2348          * the task from under us
2349          */
2350         kr = task_suspend_internal(task);
2351         if (kr != KERN_SUCCESS) {
2352                 return kr;
2353         }
2354
2355         if (task->map->disable_vmentry_reuse == TRUE) {
2356                 /*
2357                  * Quite likely GuardMalloc (or some debugging tool)
2358                  * is being used on this task. And it has gone through
2359                  * its limit. Making a corpse will likely encounter
2360                  * a lot of VM entries that will need COW.
2361                  *
2362                  * Skip it.
2363                  */
2364 #if DEVELOPMENT || DEBUG
2365                 memorystatus_abort_vm_map_fork(task);
2366 #endif
2367                 task_resume_internal(task);
2368                 return KERN_FAILURE;
2369         }
2370
2371         /* Check with VM if vm_map_fork is allowed for this task */
2372         if (memorystatus_allowed_vm_map_fork(task)) {
2373                 /* Setup new task's vmmap, switch from parent task's map to it COW map */
2374                 oldmap = new_task->map;
2375                 new_task->map = vm_map_fork(new_task->ledger,
2376                     task->map,
2377                     (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
2378                     VM_MAP_FORK_PRESERVE_PURGEABLE |
2379                     VM_MAP_FORK_CORPSE_FOOTPRINT));
2380                 vm_map_deallocate(oldmap);
2381
2382                 /* copy ledgers that impact the memory footprint */
2383                 vm_map_copy_footprint_ledgers(task, new_task);
2384
2385                 /* Get all the udata pointers from kqueue */
2386                 est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
2387                 if (est_knotes > 0) {
2388                         buf_size = (est_knotes + 32) * sizeof(uint64_t);
2389                         buffer = (uint64_t *) kalloc(buf_size);
2390                         num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2391                         if (num_knotes > est_knotes + 32) {
2392                                 num_knotes = est_knotes + 32;
2393                         }
2394                 }
2395         }
2396
2397         active_thread_count = task->active_thread_count;
2398         if (active_thread_count == 0) {
2399                 if (buffer != NULL) {
2400                         kfree(buffer, buf_size);
2401                 }
2402                 task_resume_internal(task);
2403                 return KERN_FAILURE;
2404         }
2405
2406         thread_array = (thread_t *) kalloc(sizeof(thread_t) * active_thread_count);
2407
2408         /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2409         task_lock(task);
2410         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2411                 /* Skip inactive threads */
2412                 active = thread->active;
2413                 if (!active) {
2414                         continue;
2415                 }
2416
2417                 if (array_count >= active_thread_count) {
2418                         break;
2419                 }
2420
2421                 thread_array[array_count++] = thread;
2422                 thread_reference(thread);
2423         }
2424         task_unlock(task);
2425
2426         for (i = 0; i < array_count; i++) {
2427                 kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2428                 if (kr != KERN_SUCCESS) {
2429                         break;
2430                 }
2431
2432                 /* Equivalent of current thread in corpse */
2433                 if (thread_array[i] == self) {
2434                         thread_return = new_thread;
2435                         new_task->crashed_thread_id = thread_tid(new_thread);
2436                 } else if (first_thread == NULL) {
2437                         first_thread = new_thread;
2438                 } else {
2439                         /* drop the extra ref returned by thread_create_with_continuation */
2440                         thread_deallocate(new_thread);
2441                 }
2442
2443                 kr = thread_dup2(thread_array[i], new_thread);
2444                 if (kr != KERN_SUCCESS) {
2445                         thread_mtx_lock(new_thread);
2446                         new_thread->corpse_dup = TRUE;
2447                         thread_mtx_unlock(new_thread);
2448                         continue;
2449                 }
2450
2451                 /* Copy thread name */
2452                 bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread);
2453                 new_thread->thread_tag = thread_array[i]->thread_tag;
2454                 thread_copy_resource_info(new_thread, thread_array[i]);
2455         }
2456
2457         /* return the first thread if we couldn't find the equivalent of current */
2458         if (thread_return == THREAD_NULL) {
2459                 thread_return = first_thread;
2460         } else if (first_thread != THREAD_NULL) {
2461                 /* drop the extra ref returned by thread_create_with_continuation */
2462                 thread_deallocate(first_thread);
2463         }
2464
2465         task_resume_internal(task);
2466
2467         for (i = 0; i < array_count; i++) {
2468                 thread_deallocate(thread_array[i]);
2469         }
2470         kfree(thread_array, sizeof(thread_t) * active_thread_count);
2471
2472         if (kr == KERN_SUCCESS) {
2473                 *thread_ret = thread_return;
2474                 *udata_buffer = buffer;
2475                 *size = buf_size;
2476                 *num_udata = num_knotes;
2477         } else {
2478                 if (thread_return != THREAD_NULL) {
2479                         thread_deallocate(thread_return);
2480                 }
2481                 if (buffer != NULL) {
2482                         kfree(buffer, buf_size);
2483                 }
2484         }
2485
2486         return kr;
2487 }
2488
2489 #if CONFIG_SECLUDED_MEMORY
2490 extern void task_set_can_use_secluded_mem_locked(
2491         task_t          task,
2492         boolean_t       can_use_secluded_mem);
2493 #endif /* CONFIG_SECLUDED_MEMORY */
2494
2495 kern_return_t
2496 task_terminate_internal(
2497         task_t                  task)
2498 {
2499         thread_t                        thread, self;
2500         task_t                          self_task;
2501         boolean_t                       interrupt_save;
2502         int                             pid = 0;
2503
2504         assert(task != kernel_task);
2505
2506         self = current_thread();
2507         self_task = self->task;
2508
2509         /*
2510          *      Get the task locked and make sure that we are not racing
2511          *      with someone else trying to terminate us.
2512          */
2513         if (task == self_task) {
2514                 task_lock(task);
2515         } else if (task < self_task) {
2516                 task_lock(task);
2517                 task_lock(self_task);
2518         } else {
2519                 task_lock(self_task);
2520                 task_lock(task);
2521         }
2522
2523 #if CONFIG_SECLUDED_MEMORY
2524         if (task->task_can_use_secluded_mem) {
2525                 task_set_can_use_secluded_mem_locked(task, FALSE);
2526         }
2527         task->task_could_use_secluded_mem = FALSE;
2528         task->task_could_also_use_secluded_mem = FALSE;
2529
2530         if (task->task_suppressed_secluded) {
2531                 stop_secluded_suppression(task);
2532         }
2533 #endif /* CONFIG_SECLUDED_MEMORY */
2534
2535         if (!task->active) {
2536                 /*
2537                  *      Task is already being terminated.
2538                  *      Just return an error. If we are dying, this will
2539                  *      just get us to our AST special handler and that
2540                  *      will get us to finalize the termination of ourselves.
2541                  */
2542                 task_unlock(task);
2543                 if (self_task != task) {
2544                         task_unlock(self_task);
2545                 }
2546
2547                 return KERN_FAILURE;
2548         }
2549
2550         if (task_corpse_pending_report(task)) {
2551                 /*
2552                  *      Task is marked for reporting as corpse.
2553                  *      Just return an error. This will
2554                  *      just get us to our AST special handler and that
2555                  *      will get us to finish the path to death
2556                  */
2557                 task_unlock(task);
2558                 if (self_task != task) {
2559                         task_unlock(self_task);
2560                 }
2561
2562                 return KERN_FAILURE;
2563         }
2564
2565         if (self_task != task) {
2566                 task_unlock(self_task);
2567         }
2568
2569         /*
2570          * Make sure the current thread does not get aborted out of
2571          * the waits inside these operations.
2572          */
2573         interrupt_save = thread_interrupt_level(THREAD_UNINT);
2574
2575         /*
2576          *      Indicate that we want all the threads to stop executing
2577          *      at user space by holding the task (we would have held
2578          *      each thread independently in thread_terminate_internal -
2579          *      but this way we may be more likely to already find it
2580          *      held there).  Mark the task inactive, and prevent
2581          *      further task operations via the task port.
2582          */
2583         task_hold_locked(task);
2584         task->active = FALSE;
2585         ipc_task_disable(task);
2586
2587 #if CONFIG_TELEMETRY
2588         /*
2589          * Notify telemetry that this task is going away.
2590          */
2591         telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2592 #endif
2593
2594         /*
2595          *      Terminate each thread in the task.
2596          */
2597         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2598                 thread_terminate_internal(thread);
2599         }
2600
2601 #ifdef MACH_BSD
2602         if (task->bsd_info != NULL && !task_is_exec_copy(task)) {
2603                 pid = proc_pid(task->bsd_info);
2604         }
2605 #endif /* MACH_BSD */
2606
2607         task_unlock(task);
2608
2609         proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
2610             TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
2611
2612         /* Early object reap phase */
2613
2614 // PR-17045188: Revisit implementation
2615 //        task_partial_reap(task, pid);
2616
2617 #if CONFIG_EMBEDDED
2618         /*
2619          * remove all task watchers
2620          */
2621         task_removewatchers(task);
2622
2623 #endif /* CONFIG_EMBEDDED */
2624
2625         /*
2626          *      Destroy all synchronizers owned by the task.
2627          */
2628         task_synchronizer_destroy_all(task);
2629
2630         /*
2631          *      Clear the watchport boost on the task.
2632          */
2633         task_remove_turnstile_watchports(task);
2634
2635         /*
2636          *      Destroy the IPC space, leaving just a reference for it.
2637          */
2638         ipc_space_terminate(task->itk_space);
2639
2640 #if 00
2641         /* if some ledgers go negative on tear-down again... */
2642         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2643             task_ledgers.phys_footprint);
2644         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2645             task_ledgers.internal);
2646         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2647             task_ledgers.internal_compressed);
2648         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2649             task_ledgers.iokit_mapped);
2650         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2651             task_ledgers.alternate_accounting);
2652         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2653             task_ledgers.alternate_accounting_compressed);
2654 #endif
2655
2656         /*
2657          * If the current thread is a member of the task
2658          * being terminated, then the last reference to
2659          * the task will not be dropped until the thread
2660          * is finally reaped.  To avoid incurring the
2661          * expense of removing the address space regions
2662          * at reap time, we do it explictly here.
2663          */
2664
2665         vm_map_lock(task->map);
2666         vm_map_disable_hole_optimization(task->map);
2667         vm_map_unlock(task->map);
2668
2669 #if MACH_ASSERT
2670         /*
2671          * Identify the pmap's process, in case the pmap ledgers drift
2672          * and we have to report it.
2673          */
2674         char procname[17];
2675         if (task->bsd_info && !task_is_exec_copy(task)) {
2676                 pid = proc_pid(task->bsd_info);
2677                 proc_name_kdp(task, procname, sizeof(procname));
2678         } else {
2679                 pid = 0;
2680                 strlcpy(procname, "<unknown>", sizeof(procname));
2681         }
2682         pmap_set_process(task->map->pmap, pid, procname);
2683 #endif /* MACH_ASSERT */
2684
2685         vm_map_terminate(task->map);
2686
2687         /* release our shared region */
2688         vm_shared_region_set(task, NULL);
2689
2690
2691         lck_mtx_lock(&tasks_threads_lock);
2692         queue_remove(&tasks, task, task_t, tasks);
2693         queue_enter(&terminated_tasks, task, task_t, tasks);
2694         tasks_count--;
2695         terminated_tasks_count++;
2696         lck_mtx_unlock(&tasks_threads_lock);
2697
2698         /*
2699          * We no longer need to guard against being aborted, so restore
2700          * the previous interruptible state.
2701          */
2702         thread_interrupt_level(interrupt_save);
2703
2704 #if KPC
2705         /* force the task to release all ctrs */
2706         if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS) {
2707                 kpc_force_all_ctrs(task, 0);
2708         }
2709 #endif /* KPC */
2710
2711 #if CONFIG_COALITIONS
2712         /*
2713          * Leave our coalitions. (drop activation but not reference)
2714          */
2715         coalitions_remove_task(task);
2716 #endif
2717
2718         /*
2719          * Get rid of the task active reference on itself.
2720          */
2721         task_deallocate(task);
2722
2723         return KERN_SUCCESS;
2724 }
2725
2726 void
2727 tasks_system_suspend(boolean_t suspend)
2728 {
2729         task_t task;
2730
2731         lck_mtx_lock(&tasks_threads_lock);
2732         assert(tasks_suspend_state != suspend);
2733         tasks_suspend_state = suspend;
2734         queue_iterate(&tasks, task, task_t, tasks) {
2735                 if (task == kernel_task) {
2736                         continue;
2737                 }
2738                 suspend ? task_suspend_internal(task) : task_resume_internal(task);
2739         }
2740         lck_mtx_unlock(&tasks_threads_lock);
2741 }
2742
2743 /*
2744  * task_start_halt:
2745  *
2746  *      Shut the current task down (except for the current thread) in
2747  *      preparation for dramatic changes to the task (probably exec).
2748  *      We hold the task and mark all other threads in the task for
2749  *      termination.
2750  */
2751 kern_return_t
2752 task_start_halt(task_t task)
2753 {
2754         kern_return_t kr = KERN_SUCCESS;
2755         task_lock(task);
2756         kr = task_start_halt_locked(task, FALSE);
2757         task_unlock(task);
2758         return kr;
2759 }
2760
2761 static kern_return_t
2762 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
2763 {
2764         thread_t thread, self;
2765         uint64_t dispatchqueue_offset;
2766
2767         assert(task != kernel_task);
2768
2769         self = current_thread();
2770
2771         if (task != self->task && !task_is_a_corpse_fork(task)) {
2772                 return KERN_INVALID_ARGUMENT;
2773         }
2774
2775         if (task->halting || !task->active || !self->active) {
2776                 /*
2777                  * Task or current thread is already being terminated.
2778                  * Hurry up and return out of the current kernel context
2779                  * so that we run our AST special handler to terminate
2780                  * ourselves.
2781                  */
2782                 return KERN_FAILURE;
2783         }
2784
2785         task->halting = TRUE;
2786
2787         /*
2788          * Mark all the threads to keep them from starting any more
2789          * user-level execution.  The thread_terminate_internal code
2790          * would do this on a thread by thread basis anyway, but this
2791          * gives us a better chance of not having to wait there.
2792          */
2793         task_hold_locked(task);
2794         dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
2795
2796         /*
2797          * Terminate all the other threads in the task.
2798          */
2799         queue_iterate(&task->threads, thread, thread_t, task_threads)
2800         {
2801                 if (should_mark_corpse) {
2802                         thread_mtx_lock(thread);
2803                         thread->inspection = TRUE;
2804                         thread_mtx_unlock(thread);
2805                 }
2806                 if (thread != self) {
2807                         thread_terminate_internal(thread);
2808                 }
2809         }
2810         task->dispatchqueue_offset = dispatchqueue_offset;
2811
2812         task_release_locked(task);
2813
2814         return KERN_SUCCESS;
2815 }
2816
2817
2818 /*
2819  * task_complete_halt:
2820  *
2821  *      Complete task halt by waiting for threads to terminate, then clean
2822  *      up task resources (VM, port namespace, etc...) and then let the
2823  *      current thread go in the (practically empty) task context.
2824  *
2825  *      Note: task->halting flag is not cleared in order to avoid creation
2826  *      of new thread in old exec'ed task.
2827  */
2828 void
2829 task_complete_halt(task_t task)
2830 {
2831         task_lock(task);
2832         assert(task->halting);
2833         assert(task == current_task());
2834
2835         /*
2836          *      Wait for the other threads to get shut down.
2837          *      When the last other thread is reaped, we'll be
2838          *      woken up.
2839          */
2840         if (task->thread_count > 1) {
2841                 assert_wait((event_t)&task->halting, THREAD_UNINT);
2842                 task_unlock(task);
2843                 thread_block(THREAD_CONTINUE_NULL);
2844         } else {
2845                 task_unlock(task);
2846         }
2847
2848         /*
2849          *      Give the machine dependent code a chance
2850          *      to perform cleanup of task-level resources
2851          *      associated with the current thread before
2852          *      ripping apart the task.
2853          */
2854         machine_task_terminate(task);
2855
2856         /*
2857          *      Destroy all synchronizers owned by the task.
2858          */
2859         task_synchronizer_destroy_all(task);
2860
2861         /*
2862          *      Destroy the contents of the IPC space, leaving just
2863          *      a reference for it.
2864          */
2865         ipc_space_clean(task->itk_space);
2866
2867         /*
2868          * Clean out the address space, as we are going to be
2869          * getting a new one.
2870          */
2871         vm_map_remove(task->map, task->map->min_offset,
2872             task->map->max_offset,
2873             /*
2874              * Final cleanup:
2875              * + no unnesting
2876              * + remove immutable mappings
2877              * + allow gaps in the range
2878              */
2879             (VM_MAP_REMOVE_NO_UNNESTING |
2880             VM_MAP_REMOVE_IMMUTABLE |
2881             VM_MAP_REMOVE_GAPS_OK));
2882
2883         /*
2884          * Kick out any IOKitUser handles to the task. At best they're stale,
2885          * at worst someone is racing a SUID exec.
2886          */
2887         iokit_task_terminate(task);
2888 }
2889
2890 /*
2891  *      task_hold_locked:
2892  *
2893  *      Suspend execution of the specified task.
2894  *      This is a recursive-style suspension of the task, a count of
2895  *      suspends is maintained.
2896  *
2897  *      CONDITIONS: the task is locked and active.
2898  */
2899 void
2900 task_hold_locked(
2901         task_t          task)
2902 {
2903         thread_t        thread;
2904
2905         assert(task->active);
2906
2907         if (task->suspend_count++ > 0) {
2908                 return;
2909         }
2910
2911         if (task->bsd_info) {
2912                 workq_proc_suspended(task->bsd_info);
2913         }
2914
2915         /*
2916          *      Iterate through all the threads and hold them.
2917          */
2918         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2919                 thread_mtx_lock(thread);
2920                 thread_hold(thread);
2921                 thread_mtx_unlock(thread);
2922         }
2923 }
2924
2925 /*
2926  *      task_hold:
2927  *
2928  *      Same as the internal routine above, except that is must lock
2929  *      and verify that the task is active.  This differs from task_suspend
2930  *      in that it places a kernel hold on the task rather than just a
2931  *      user-level hold.  This keeps users from over resuming and setting
2932  *      it running out from under the kernel.
2933  *
2934  *      CONDITIONS: the caller holds a reference on the task
2935  */
2936 kern_return_t
2937 task_hold(
2938         task_t          task)
2939 {
2940         if (task == TASK_NULL) {
2941                 return KERN_INVALID_ARGUMENT;
2942         }
2943
2944         task_lock(task);
2945
2946         if (!task->active) {
2947                 task_unlock(task);
2948
2949                 return KERN_FAILURE;
2950         }
2951
2952         task_hold_locked(task);
2953         task_unlock(task);
2954
2955         return KERN_SUCCESS;
2956 }
2957
2958 kern_return_t
2959 task_wait(
2960         task_t          task,
2961         boolean_t       until_not_runnable)
2962 {
2963         if (task == TASK_NULL) {
2964                 return KERN_INVALID_ARGUMENT;
2965         }
2966
2967         task_lock(task);
2968
2969         if (!task->active) {
2970                 task_unlock(task);
2971
2972                 return KERN_FAILURE;
2973         }
2974
2975         task_wait_locked(task, until_not_runnable);
2976         task_unlock(task);
2977
2978         return KERN_SUCCESS;
2979 }
2980
2981 /*
2982  *      task_wait_locked:
2983  *
2984  *      Wait for all threads in task to stop.
2985  *
2986  * Conditions:
2987  *      Called with task locked, active, and held.
2988  */
2989 void
2990 task_wait_locked(
2991         task_t          task,
2992         boolean_t               until_not_runnable)
2993 {
2994         thread_t        thread, self;
2995
2996         assert(task->active);
2997         assert(task->suspend_count > 0);
2998
2999         self = current_thread();
3000
3001         /*
3002          *      Iterate through all the threads and wait for them to
3003          *      stop.  Do not wait for the current thread if it is within
3004          *      the task.
3005          */
3006         queue_iterate(&task->threads, thread, thread_t, task_threads) {
3007                 if (thread != self) {
3008                         thread_wait(thread, until_not_runnable);
3009                 }
3010         }
3011 }
3012
3013 boolean_t
3014 task_is_app_suspended(task_t task)
3015 {
3016         return task->pidsuspended;
3017 }
3018
3019 /*
3020  *      task_release_locked:
3021  *
3022  *      Release a kernel hold on a task.
3023  *
3024  *      CONDITIONS: the task is locked and active
3025  */
3026 void
3027 task_release_locked(
3028         task_t          task)
3029 {
3030         thread_t        thread;
3031
3032         assert(task->active);
3033         assert(task->suspend_count > 0);
3034
3035         if (--task->suspend_count > 0) {
3036                 return;
3037         }
3038
3039         if (task->bsd_info) {
3040                 workq_proc_resumed(task->bsd_info);
3041         }
3042
3043         queue_iterate(&task->threads, thread, thread_t, task_threads) {
3044                 thread_mtx_lock(thread);
3045                 thread_release(thread);
3046                 thread_mtx_unlock(thread);
3047         }
3048 }
3049
3050 /*
3051  *      task_release:
3052  *
3053  *      Same as the internal routine above, except that it must lock
3054  *      and verify that the task is active.
3055  *
3056  *      CONDITIONS: The caller holds a reference to the task
3057  */
3058 kern_return_t
3059 task_release(
3060         task_t          task)
3061 {
3062         if (task == TASK_NULL) {
3063                 return KERN_INVALID_ARGUMENT;
3064         }
3065
3066         task_lock(task);
3067
3068         if (!task->active) {
3069                 task_unlock(task);
3070
3071                 return KERN_FAILURE;
3072         }
3073
3074         task_release_locked(task);
3075         task_unlock(task);
3076
3077         return KERN_SUCCESS;
3078 }
3079
3080 kern_return_t
3081 task_threads(
3082         task_t                                  task,
3083         thread_act_array_t              *threads_out,
3084         mach_msg_type_number_t  *count)
3085 {
3086         mach_msg_type_number_t  actual;
3087         thread_t                                *thread_list;
3088         thread_t                                thread;
3089         vm_size_t                               size, size_needed;
3090         void                                    *addr;
3091         unsigned int                    i, j;
3092
3093         if (task == TASK_NULL) {
3094                 return KERN_INVALID_ARGUMENT;
3095         }
3096
3097         size = 0; addr = NULL;
3098
3099         for (;;) {
3100                 task_lock(task);
3101                 if (!task->active) {
3102                         task_unlock(task);
3103
3104                         if (size != 0) {
3105                                 kfree(addr, size);
3106                         }
3107
3108                         return KERN_FAILURE;
3109                 }
3110
3111                 actual = task->thread_count;
3112
3113                 /* do we have the memory we need? */
3114                 size_needed = actual * sizeof(mach_port_t);
3115                 if (size_needed <= size) {
3116                         break;
3117                 }
3118
3119                 /* unlock the task and allocate more memory */
3120                 task_unlock(task);
3121
3122                 if (size != 0) {
3123                         kfree(addr, size);
3124                 }
3125
3126                 assert(size_needed > 0);
3127                 size = size_needed;
3128
3129                 addr = kalloc(size);
3130                 if (addr == 0) {
3131                         return KERN_RESOURCE_SHORTAGE;
3132                 }
3133         }
3134
3135         /* OK, have memory and the task is locked & active */
3136         thread_list = (thread_t *)addr;
3137
3138         i = j = 0;
3139
3140         for (thread = (thread_t)queue_first(&task->threads); i < actual;
3141             ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
3142                 thread_reference_internal(thread);
3143                 thread_list[j++] = thread;
3144         }
3145
3146         assert(queue_end(&task->threads, (queue_entry_t)thread));
3147
3148         actual = j;
3149         size_needed = actual * sizeof(mach_port_t);
3150
3151         /* can unlock task now that we've got the thread refs */
3152         task_unlock(task);
3153
3154         if (actual == 0) {
3155                 /* no threads, so return null pointer and deallocate memory */
3156
3157                 *threads_out = NULL;
3158                 *count = 0;
3159
3160                 if (size != 0) {
3161                         kfree(addr, size);
3162                 }
3163         } else {
3164                 /* if we allocated too much, must copy */
3165
3166                 if (size_needed < size) {
3167                         void *newaddr;
3168
3169                         newaddr = kalloc(size_needed);
3170                         if (newaddr == 0) {
3171                                 for (i = 0; i < actual; ++i) {
3172                                         thread_deallocate(thread_list[i]);
3173                                 }
3174                                 kfree(addr, size);
3175                                 return KERN_RESOURCE_SHORTAGE;
3176                         }
3177
3178                         bcopy(addr, newaddr, size_needed);
3179                         kfree(addr, size);
3180                         thread_list = (thread_t *)newaddr;
3181                 }
3182
3183                 *threads_out = thread_list;
3184                 *count = actual;
3185
3186                 /* do the conversion that Mig should handle */
3187
3188                 for (i = 0; i < actual; ++i) {
3189                         ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
3190                 }
3191         }
3192
3193         return KERN_SUCCESS;
3194 }
3195
3196 #define TASK_HOLD_NORMAL        0
3197 #define TASK_HOLD_PIDSUSPEND    1
3198 #define TASK_HOLD_LEGACY        2
3199 #define TASK_HOLD_LEGACY_ALL    3
3200
3201 static kern_return_t
3202 place_task_hold(
3203         task_t task,
3204         int mode)
3205 {
3206         if (!task->active && !task_is_a_corpse(task)) {
3207                 return KERN_FAILURE;
3208         }
3209
3210         /* Return success for corpse task */
3211         if (task_is_a_corpse(task)) {
3212                 return KERN_SUCCESS;
3213         }
3214
3215         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3216             MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
3217             task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3218             task->user_stop_count, task->user_stop_count + 1, 0);
3219
3220 #if MACH_ASSERT
3221         current_task()->suspends_outstanding++;
3222 #endif
3223
3224         if (mode == TASK_HOLD_LEGACY) {
3225                 task->legacy_stop_count++;
3226         }
3227
3228         if (task->user_stop_count++ > 0) {
3229                 /*
3230                  *      If the stop count was positive, the task is
3231                  *      already stopped and we can exit.
3232                  */
3233                 return KERN_SUCCESS;
3234         }
3235
3236         /*
3237          * Put a kernel-level hold on the threads in the task (all
3238          * user-level task suspensions added together represent a
3239          * single kernel-level hold).  We then wait for the threads
3240          * to stop executing user code.
3241          */
3242         task_hold_locked(task);
3243         task_wait_locked(task, FALSE);
3244
3245         return KERN_SUCCESS;
3246 }
3247
3248 static kern_return_t
3249 release_task_hold(
3250         task_t          task,
3251         int                     mode)
3252 {
3253         boolean_t release = FALSE;
3254
3255         if (!task->active && !task_is_a_corpse(task)) {
3256                 return KERN_FAILURE;
3257         }
3258
3259         /* Return success for corpse task */
3260         if (task_is_a_corpse(task)) {
3261                 return KERN_SUCCESS;
3262         }
3263
3264         if (mode == TASK_HOLD_PIDSUSPEND) {
3265                 if (task->pidsuspended == FALSE) {
3266                         return KERN_FAILURE;
3267                 }
3268                 task->pidsuspended = FALSE;
3269         }
3270
3271         if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
3272                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3273                     MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_RESUME) | DBG_FUNC_NONE,
3274                     task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3275                     task->user_stop_count, mode, task->legacy_stop_count);
3276
3277 #if MACH_ASSERT
3278                 /*
3279                  * This is obviously not robust; if we suspend one task and then resume a different one,
3280                  * we'll fly under the radar. This is only meant to catch the common case of a crashed
3281                  * or buggy suspender.
3282                  */
3283                 current_task()->suspends_outstanding--;
3284 #endif
3285
3286                 if (mode == TASK_HOLD_LEGACY_ALL) {
3287                         if (task->legacy_stop_count >= task->user_stop_count) {
3288                                 task->user_stop_count = 0;
3289                                 release = TRUE;
3290                         } else {
3291                                 task->user_stop_count -= task->legacy_stop_count;
3292                         }
3293                         task->legacy_stop_count = 0;
3294                 } else {
3295                         if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0) {
3296                                 task->legacy_stop_count--;
3297                         }
3298                         if (--task->user_stop_count == 0) {
3299                                 release = TRUE;
3300                         }
3301                 }
3302         } else {
3303                 return KERN_FAILURE;
3304         }
3305
3306         /*
3307          *      Release the task if necessary.
3308          */
3309         if (release) {
3310                 task_release_locked(task);
3311         }
3312
3313         return KERN_SUCCESS;
3314 }
3315
3316 boolean_t
3317 get_task_suspended(task_t task)
3318 {
3319         return 0 != task->user_stop_count;
3320 }
3321
3322 /*
3323  *      task_suspend:
3324  *
3325  *      Implement an (old-fashioned) user-level suspension on a task.
3326  *
3327  *      Because the user isn't expecting to have to manage a suspension
3328  *      token, we'll track it for him in the kernel in the form of a naked
3329  *      send right to the task's resume port.  All such send rights
3330  *      account for a single suspension against the task (unlike task_suspend2()
3331  *      where each caller gets a unique suspension count represented by a
3332  *      unique send-once right).
3333  *
3334  * Conditions:
3335  *      The caller holds a reference to the task
3336  */
3337 kern_return_t
3338 task_suspend(
3339         task_t          task)
3340 {
3341         kern_return_t                   kr;
3342         mach_port_t                     port;
3343         mach_port_name_t                name;
3344
3345         if (task == TASK_NULL || task == kernel_task) {
3346                 return KERN_INVALID_ARGUMENT;
3347         }
3348
3349         task_lock(task);
3350
3351         /*
3352          * place a legacy hold on the task.
3353          */
3354         kr = place_task_hold(task, TASK_HOLD_LEGACY);
3355         if (kr != KERN_SUCCESS) {
3356                 task_unlock(task);
3357                 return kr;
3358         }
3359
3360         /*
3361          * Claim a send right on the task resume port, and request a no-senders
3362          * notification on that port (if none outstanding).
3363          */
3364         (void)ipc_kobject_make_send_lazy_alloc_port(&task->itk_resume,
3365             (ipc_kobject_t)task, IKOT_TASK_RESUME);
3366         port = task->itk_resume;
3367
3368         task_unlock(task);
3369
3370         /*
3371          * Copyout the send right into the calling task's IPC space.  It won't know it is there,
3372          * but we'll look it up when calling a traditional resume.  Any IPC operations that
3373          * deallocate the send right will auto-release the suspension.
3374          */
3375         if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, ip_to_object(port),
3376             MACH_MSG_TYPE_MOVE_SEND, NULL, NULL, &name)) != KERN_SUCCESS) {
3377                 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
3378                     proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3379                     task_pid(task), kr);
3380                 return kr;
3381         }
3382
3383         return kr;
3384 }
3385
3386 /*
3387  *      task_resume:
3388  *              Release a user hold on a task.
3389  *
3390  * Conditions:
3391  *              The caller holds a reference to the task
3392  */
3393 kern_return_t
3394 task_resume(
3395         task_t  task)
3396 {
3397         kern_return_t    kr;
3398         mach_port_name_t resume_port_name;
3399         ipc_entry_t              resume_port_entry;
3400         ipc_space_t              space = current_task()->itk_space;
3401
3402         if (task == TASK_NULL || task == kernel_task) {
3403                 return KERN_INVALID_ARGUMENT;
3404         }
3405
3406         /* release a legacy task hold */
3407         task_lock(task);
3408         kr = release_task_hold(task, TASK_HOLD_LEGACY);
3409         task_unlock(task);
3410
3411         is_write_lock(space);
3412         if (is_active(space) && IP_VALID(task->itk_resume) &&
3413             ipc_hash_lookup(space, ip_to_object(task->itk_resume), &resume_port_name, &resume_port_entry) == TRUE) {
3414                 /*
3415                  * We found a suspension token in the caller's IPC space. Release a send right to indicate that
3416                  * we are holding one less legacy hold on the task from this caller.  If the release failed,
3417                  * go ahead and drop all the rights, as someone either already released our holds or the task
3418                  * is gone.
3419                  */
3420                 if (kr == KERN_SUCCESS) {
3421                         ipc_right_dealloc(space, resume_port_name, resume_port_entry);
3422                 } else {
3423                         ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
3424                 }
3425                 /* space unlocked */
3426         } else {
3427                 is_write_unlock(space);
3428                 if (kr == KERN_SUCCESS) {
3429                         printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
3430                             proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3431                             task_pid(task));
3432                 }
3433         }
3434
3435         return kr;
3436 }
3437
3438 /*
3439  * Suspend the target task.
3440  * Making/holding a token/reference/port is the callers responsibility.
3441  */
3442 kern_return_t
3443 task_suspend_internal(task_t task)
3444 {
3445         kern_return_t    kr;
3446
3447         if (task == TASK_NULL || task == kernel_task) {
3448                 return KERN_INVALID_ARGUMENT;
3449         }
3450
3451         task_lock(task);
3452         kr = place_task_hold(task, TASK_HOLD_NORMAL);
3453         task_unlock(task);
3454         return kr;
3455 }
3456
3457 /*
3458  * Suspend the target task, and return a suspension token. The token
3459  * represents a reference on the suspended task.
3460  */
3461 kern_return_t
3462 task_suspend2(
3463         task_t                  task,
3464         task_suspension_token_t *suspend_token)
3465 {
3466         kern_return_t    kr;
3467
3468         kr = task_suspend_internal(task);
3469         if (kr != KERN_SUCCESS) {
3470                 *suspend_token = TASK_NULL;
3471                 return kr;
3472         }
3473
3474         /*
3475          * Take a reference on the target task and return that to the caller
3476          * as a "suspension token," which can be converted into an SO right to
3477          * the now-suspended task's resume port.
3478          */
3479         task_reference_internal(task);
3480         *suspend_token = task;
3481
3482         return KERN_SUCCESS;
3483 }
3484
3485 /*
3486  * Resume the task
3487  * (reference/token/port management is caller's responsibility).
3488  */
3489 kern_return_t
3490 task_resume_internal(
3491         task_suspension_token_t         task)
3492 {
3493         kern_return_t kr;
3494
3495         if (task == TASK_NULL || task == kernel_task) {
3496                 return KERN_INVALID_ARGUMENT;
3497         }
3498
3499         task_lock(task);
3500         kr = release_task_hold(task, TASK_HOLD_NORMAL);
3501         task_unlock(task);
3502         return kr;
3503 }
3504
3505 /*
3506  * Resume the task using a suspension token. Consumes the token's ref.
3507  */
3508 kern_return_t
3509 task_resume2(
3510         task_suspension_token_t         task)
3511 {
3512         kern_return_t kr;
3513
3514         kr = task_resume_internal(task);
3515         task_suspension_token_deallocate(task);
3516
3517         return kr;
3518 }
3519
3520 boolean_t
3521 task_suspension_notify(mach_msg_header_t *request_header)
3522 {
3523         ipc_port_t port = request_header->msgh_remote_port;
3524         task_t task = convert_port_to_task_suspension_token(port);
3525         mach_msg_type_number_t not_count;
3526
3527         if (task == TASK_NULL || task == kernel_task) {
3528                 return TRUE;  /* nothing to do */
3529         }
3530         switch (request_header->msgh_id) {
3531         case MACH_NOTIFY_SEND_ONCE:
3532                 /* release the hold held by this specific send-once right */
3533                 task_lock(task);
3534                 release_task_hold(task, TASK_HOLD_NORMAL);
3535                 task_unlock(task);
3536                 break;
3537
3538         case MACH_NOTIFY_NO_SENDERS:
3539                 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
3540
3541                 task_lock(task);
3542                 ip_lock(port);
3543                 if (port->ip_mscount == not_count) {
3544                         /* release all the [remaining] outstanding legacy holds */
3545                         assert(port->ip_nsrequest == IP_NULL);
3546                         ip_unlock(port);
3547                         release_task_hold(task, TASK_HOLD_LEGACY_ALL);
3548                         task_unlock(task);
3549                 } else if (port->ip_nsrequest == IP_NULL) {
3550                         ipc_port_t old_notify;
3551
3552                         task_unlock(task);
3553                         /* new send rights, re-arm notification at current make-send count */
3554                         ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3555                         assert(old_notify == IP_NULL);
3556                         /* port unlocked */
3557                 } else {
3558                         ip_unlock(port);
3559                         task_unlock(task);
3560                 }
3561                 break;
3562
3563         default:
3564                 break;
3565         }
3566
3567         task_suspension_token_deallocate(task); /* drop token reference */
3568         return TRUE;
3569 }
3570
3571 static kern_return_t
3572 task_pidsuspend_locked(task_t task)
3573 {
3574         kern_return_t kr;
3575
3576         if (task->pidsuspended) {
3577                 kr = KERN_FAILURE;
3578                 goto out;
3579         }
3580
3581         task->pidsuspended = TRUE;
3582
3583         kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
3584         if (kr != KERN_SUCCESS) {
3585                 task->pidsuspended = FALSE;
3586         }
3587 out:
3588         return kr;
3589 }
3590
3591
3592 /*
3593  *      task_pidsuspend:
3594  *
3595  *      Suspends a task by placing a hold on its threads.
3596  *
3597  * Conditions:
3598  *      The caller holds a reference to the task
3599  */
3600 kern_return_t
3601 task_pidsuspend(
3602         task_t          task)
3603 {
3604         kern_return_t    kr;
3605
3606         if (task == TASK_NULL || task == kernel_task) {
3607                 return KERN_INVALID_ARGUMENT;
3608         }
3609
3610         task_lock(task);
3611
3612         kr = task_pidsuspend_locked(task);
3613
3614         task_unlock(task);
3615
3616         if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
3617                 iokit_task_app_suspended_changed(task);
3618         }
3619
3620         return kr;
3621 }
3622
3623 /*
3624  *      task_pidresume:
3625  *              Resumes a previously suspended task.
3626  *
3627  * Conditions:
3628  *              The caller holds a reference to the task
3629  */
3630 kern_return_t
3631 task_pidresume(
3632         task_t  task)
3633 {
3634         kern_return_t    kr;
3635
3636         if (task == TASK_NULL || task == kernel_task) {
3637                 return KERN_INVALID_ARGUMENT;
3638         }
3639
3640         task_lock(task);
3641
3642 #if CONFIG_FREEZE
3643
3644         while (task->changing_freeze_state) {
3645                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3646                 task_unlock(task);
3647                 thread_block(THREAD_CONTINUE_NULL);
3648
3649                 task_lock(task);
3650         }
3651         task->changing_freeze_state = TRUE;
3652 #endif
3653
3654         kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
3655
3656         task_unlock(task);
3657
3658         if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
3659                 iokit_task_app_suspended_changed(task);
3660         }
3661
3662 #if CONFIG_FREEZE
3663
3664         task_lock(task);
3665
3666         if (kr == KERN_SUCCESS) {
3667                 task->frozen = FALSE;
3668         }
3669         task->changing_freeze_state = FALSE;
3670         thread_wakeup(&task->changing_freeze_state);
3671
3672         task_unlock(task);
3673 #endif
3674
3675         return kr;
3676 }
3677
3678 os_refgrp_decl(static, task_watchports_refgrp, "task_watchports", NULL);
3679
3680 /*
3681  *      task_add_turnstile_watchports:
3682  *              Setup watchports to boost the main thread of the task.
3683  *
3684  *      Arguments:
3685  *              task: task being spawned
3686  *              thread: main thread of task
3687  *              portwatch_ports: array of watchports
3688  *              portwatch_count: number of watchports
3689  *
3690  *      Conditions:
3691  *              Nothing locked.
3692  */
3693 void
3694 task_add_turnstile_watchports(
3695         task_t          task,
3696         thread_t        thread,
3697         ipc_port_t      *portwatch_ports,
3698         uint32_t        portwatch_count)
3699 {
3700         struct task_watchports *watchports = NULL;
3701         struct task_watchport_elem *previous_elem_array[TASK_MAX_WATCHPORT_COUNT] = {};
3702         os_ref_count_t refs;
3703
3704         /* Check if the task has terminated */
3705         if (!task->active) {
3706                 return;
3707         }
3708
3709         assert(portwatch_count <= TASK_MAX_WATCHPORT_COUNT);
3710
3711         watchports = task_watchports_alloc_init(task, thread, portwatch_count);
3712
3713         /* Lock the ipc space */
3714         is_write_lock(task->itk_space);
3715
3716         /* Setup watchports to boost the main thread */
3717         refs = task_add_turnstile_watchports_locked(task,
3718             watchports, previous_elem_array, portwatch_ports,
3719             portwatch_count);
3720
3721         /* Drop the space lock */
3722         is_write_unlock(task->itk_space);
3723
3724         if (refs == 0) {
3725                 task_watchports_deallocate(watchports);
3726         }
3727
3728         /* Drop the ref on previous_elem_array */
3729         for (uint32_t i = 0; i < portwatch_count && previous_elem_array[i] != NULL; i++) {
3730                 task_watchport_elem_deallocate(previous_elem_array[i]);
3731         }
3732 }
3733
3734 /*
3735  *      task_remove_turnstile_watchports:
3736  *              Clear all turnstile boost on the task from watchports.
3737  *
3738  *      Arguments:
3739  *              task: task being terminated
3740  *
3741  *      Conditions:
3742  *              Nothing locked.
3743  */
3744 void
3745 task_remove_turnstile_watchports(
3746         task_t          task)
3747 {
3748         os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
3749         struct task_watchports *watchports = NULL;
3750         ipc_port_t port_freelist[TASK_MAX_WATCHPORT_COUNT] = {};
3751         uint32_t portwatch_count;
3752
3753         /* Lock the ipc space */
3754         is_write_lock(task->itk_space);
3755
3756         /* Check if watchport boost exist */
3757         if (task->watchports == NULL) {
3758                 is_write_unlock(task->itk_space);
3759                 return;
3760         }
3761         watchports = task->watchports;
3762         portwatch_count = watchports->tw_elem_array_count;
3763
3764         refs = task_remove_turnstile_watchports_locked(task, watchports,
3765             port_freelist);
3766
3767         is_write_unlock(task->itk_space);
3768
3769         /* Drop all the port references */
3770         for (uint32_t i = 0; i < portwatch_count && port_freelist[i] != NULL; i++) {
3771                 ip_release(port_freelist[i]);
3772         }
3773
3774         /* Clear the task and thread references for task_watchport */
3775         if (refs == 0) {
3776                 task_watchports_deallocate(watchports);
3777         }
3778 }
3779
3780 /*
3781  *      task_transfer_turnstile_watchports:
3782  *              Transfer all watchport turnstile boost from old task to new task.
3783  *
3784  *      Arguments:
3785  *              old_task: task calling exec
3786  *              new_task: new exec'ed task
3787  *              thread: main thread of new task
3788  *
3789  *      Conditions:
3790  *              Nothing locked.
3791  */
3792 void
3793 task_transfer_turnstile_watchports(
3794         task_t   old_task,
3795         task_t   new_task,
3796         thread_t new_thread)
3797 {
3798         struct task_watchports *old_watchports = NULL;
3799         struct task_watchports *new_watchports = NULL;
3800         os_ref_count_t old_refs = TASK_MAX_WATCHPORT_COUNT;
3801         os_ref_count_t new_refs = TASK_MAX_WATCHPORT_COUNT;
3802         uint32_t portwatch_count;
3803
3804         if (old_task->watchports == NULL || !new_task->active) {
3805                 return;
3806         }
3807
3808         /* Get the watch port count from the old task */
3809         is_write_lock(old_task->itk_space);
3810         if (old_task->watchports == NULL) {
3811                 is_write_unlock(old_task->itk_space);
3812                 return;
3813         }
3814
3815         portwatch_count = old_task->watchports->tw_elem_array_count;
3816         is_write_unlock(old_task->itk_space);
3817
3818         new_watchports = task_watchports_alloc_init(new_task, new_thread, portwatch_count);
3819
3820         /* Lock the ipc space for old task */
3821         is_write_lock(old_task->itk_space);
3822
3823         /* Lock the ipc space for new task */
3824         is_write_lock(new_task->itk_space);
3825
3826         /* Check if watchport boost exist */
3827         if (old_task->watchports == NULL || !new_task->active) {
3828                 is_write_unlock(new_task->itk_space);
3829                 is_write_unlock(old_task->itk_space);
3830                 (void)task_watchports_release(new_watchports);
3831                 task_watchports_deallocate(new_watchports);
3832                 return;
3833         }
3834
3835         old_watchports = old_task->watchports;
3836         assert(portwatch_count == old_task->watchports->tw_elem_array_count);
3837
3838         /* Setup new task watchports */
3839         new_task->watchports = new_watchports;
3840
3841         for (uint32_t i = 0; i < portwatch_count; i++) {
3842                 ipc_port_t port = old_watchports->tw_elem[i].twe_port;
3843
3844                 if (port == NULL) {
3845                         task_watchport_elem_clear(&new_watchports->tw_elem[i]);
3846                         continue;
3847                 }
3848
3849                 /* Lock the port and check if it has the entry */
3850                 ip_lock(port);
3851                 imq_lock(&port->ip_messages);
3852
3853                 task_watchport_elem_init(&new_watchports->tw_elem[i], new_task, port);
3854
3855                 if (ipc_port_replace_watchport_elem_conditional_locked(port,
3856                     &old_watchports->tw_elem[i], &new_watchports->tw_elem[i]) == KERN_SUCCESS) {
3857                         task_watchport_elem_clear(&old_watchports->tw_elem[i]);
3858
3859                         task_watchports_retain(new_watchports);
3860                         old_refs = task_watchports_release(old_watchports);
3861
3862                         /* Check if all ports are cleaned */
3863                         if (old_refs == 0) {
3864                                 old_task->watchports = NULL;
3865                         }
3866                 } else {
3867                         task_watchport_elem_clear(&new_watchports->tw_elem[i]);
3868                 }
3869                 /* mqueue and port unlocked by ipc_port_replace_watchport_elem_conditional_locked */
3870         }
3871
3872         /* Drop the reference on new task_watchports struct returned by task_watchports_alloc_init */
3873         new_refs = task_watchports_release(new_watchports);
3874         if (new_refs == 0) {
3875                 new_task->watchports = NULL;
3876         }
3877
3878         is_write_unlock(new_task->itk_space);
3879         is_write_unlock(old_task->itk_space);
3880
3881         /* Clear the task and thread references for old_watchport */
3882         if (old_refs == 0) {
3883                 task_watchports_deallocate(old_watchports);
3884         }
3885
3886         /* Clear the task and thread references for new_watchport */
3887         if (new_refs == 0) {
3888                 task_watchports_deallocate(new_watchports);
3889         }
3890 }
3891
3892 /*
3893  *      task_add_turnstile_watchports_locked:
3894  *              Setup watchports to boost the main thread of the task.
3895  *
3896  *      Arguments:
3897  *              task: task to boost
3898  *              watchports: watchport structure to be attached to the task
3899  *              previous_elem_array: an array of old watchport_elem to be returned to caller
3900  *              portwatch_ports: array of watchports
3901  *              portwatch_count: number of watchports
3902  *
3903  *      Conditions:
3904  *              ipc space of the task locked.
3905  *              returns array of old watchport_elem in previous_elem_array
3906  */
3907 static os_ref_count_t
3908 task_add_turnstile_watchports_locked(
3909         task_t                      task,
3910         struct task_watchports      *watchports,
3911         struct task_watchport_elem  **previous_elem_array,
3912         ipc_port_t                  *portwatch_ports,
3913         uint32_t                    portwatch_count)
3914 {
3915         os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
3916
3917         /* Check if the task is still active */
3918         if (!task->active) {
3919                 refs = task_watchports_release(watchports);
3920                 return refs;
3921         }
3922
3923         assert(task->watchports == NULL);
3924         task->watchports = watchports;
3925
3926         for (uint32_t i = 0, j = 0; i < portwatch_count; i++) {
3927                 ipc_port_t port = portwatch_ports[i];
3928
3929                 task_watchport_elem_init(&watchports->tw_elem[i], task, port);
3930                 if (port == NULL) {
3931                         task_watchport_elem_clear(&watchports->tw_elem[i]);
3932                         continue;
3933                 }
3934
3935                 ip_lock(port);
3936                 imq_lock(&port->ip_messages);
3937
3938                 /* Check if port is in valid state to be setup as watchport */
3939                 if (ipc_port_add_watchport_elem_locked(port, &watchports->tw_elem[i],
3940                     &previous_elem_array[j]) != KERN_SUCCESS) {
3941                         task_watchport_elem_clear(&watchports->tw_elem[i]);
3942                         continue;
3943                 }
3944                 /* port and mqueue unlocked on return */
3945
3946                 ip_reference(port);
3947                 task_watchports_retain(watchports);
3948                 if (previous_elem_array[j] != NULL) {
3949                         j++;
3950                 }
3951         }
3952
3953         /* Drop the reference on task_watchport struct returned by os_ref_init */
3954         refs = task_watchports_release(watchports);
3955         if (refs == 0) {
3956                 task->watchports = NULL;
3957         }
3958
3959         return refs;
3960 }
3961
3962 /*
3963  *      task_remove_turnstile_watchports_locked:
3964  *              Clear all turnstile boost on the task from watchports.
3965  *
3966  *      Arguments:
3967  *              task: task to remove watchports from
3968  *              watchports: watchports structure for the task
3969  *              port_freelist: array of ports returned with ref to caller
3970  *
3971  *
3972  *      Conditions:
3973  *              ipc space of the task locked.
3974  *              array of ports with refs are returned in port_freelist
3975  */
3976 static os_ref_count_t
3977 task_remove_turnstile_watchports_locked(
3978         task_t                 task,
3979         struct task_watchports *watchports,
3980         ipc_port_t             *port_freelist)
3981 {
3982         os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
3983
3984         for (uint32_t i = 0, j = 0; i < watchports->tw_elem_array_count; i++) {
3985                 ipc_port_t port = watchports->tw_elem[i].twe_port;
3986                 if (port == NULL) {
3987                         continue;
3988                 }
3989
3990                 /* Lock the port and check if it has the entry */
3991                 ip_lock(port);
3992                 imq_lock(&port->ip_messages);
3993                 if (ipc_port_clear_watchport_elem_internal_conditional_locked(port,
3994                     &watchports->tw_elem[i]) == KERN_SUCCESS) {
3995                         task_watchport_elem_clear(&watchports->tw_elem[i]);
3996                         port_freelist[j++] = port;
3997                         refs = task_watchports_release(watchports);
3998
3999                         /* Check if all ports are cleaned */
4000                         if (refs == 0) {
4001                                 task->watchports = NULL;
4002                                 break;
4003                         }
4004                 }
4005                 /* mqueue and port unlocked by ipc_port_clear_watchport_elem_internal_conditional_locked */
4006         }
4007         return refs;
4008 }
4009
4010 /*
4011  *      task_watchports_alloc_init:
4012  *              Allocate and initialize task watchport struct.
4013  *
4014  *      Conditions:
4015  *              Nothing locked.
4016  */
4017 static struct task_watchports *
4018 task_watchports_alloc_init(
4019         task_t        task,
4020         thread_t      thread,
4021         uint32_t      count)
4022 {
4023         struct task_watchports *watchports = kalloc(sizeof(struct task_watchports) +
4024             count * sizeof(struct task_watchport_elem));
4025
4026         task_reference(task);
4027         thread_reference(thread);
4028         watchports->tw_task = task;
4029         watchports->tw_thread = thread;
4030         watchports->tw_elem_array_count = count;
4031         os_ref_init(&watchports->tw_refcount, &task_watchports_refgrp);
4032
4033         return watchports;
4034 }
4035
4036 /*
4037  *      task_watchports_deallocate:
4038  *              Deallocate task watchport struct.
4039  *
4040  *      Conditions:
4041  *              Nothing locked.
4042  */
4043 static void
4044 task_watchports_deallocate(
4045         struct task_watchports *watchports)
4046 {
4047         uint32_t portwatch_count = watchports->tw_elem_array_count;
4048
4049         task_deallocate(watchports->tw_task);
4050         thread_deallocate(watchports->tw_thread);
4051         kfree(watchports, sizeof(struct task_watchports) + portwatch_count * sizeof(struct task_watchport_elem));
4052 }
4053
4054 /*
4055  *      task_watchport_elem_deallocate:
4056  *              Deallocate task watchport element and release its ref on task_watchport.
4057  *
4058  *      Conditions:
4059  *              Nothing locked.
4060  */
4061 void
4062 task_watchport_elem_deallocate(
4063         struct task_watchport_elem *watchport_elem)
4064 {
4065         os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4066         task_t task = watchport_elem->twe_task;
4067         struct task_watchports *watchports = NULL;
4068         ipc_port_t port = NULL;
4069
4070         assert(task != NULL);
4071
4072         /* Take the space lock to modify the elememt */
4073         is_write_lock(task->itk_space);
4074
4075         watchports = task->watchports;
4076         assert(watchports != NULL);
4077
4078         port = watchport_elem->twe_port;
4079         assert(port != NULL);
4080
4081         task_watchport_elem_clear(watchport_elem);
4082         refs = task_watchports_release(watchports);
4083
4084         if (refs == 0) {
4085                 task->watchports = NULL;
4086         }
4087
4088         is_write_unlock(task->itk_space);
4089
4090         ip_release(port);
4091         if (refs == 0) {
4092                 task_watchports_deallocate(watchports);
4093         }
4094 }
4095
4096 /*
4097  *      task_has_watchports:
4098  *              Return TRUE if task has watchport boosts.
4099  *
4100  *      Conditions:
4101  *              Nothing locked.
4102  */
4103 boolean_t
4104 task_has_watchports(task_t task)
4105 {
4106         return task->watchports != NULL;
4107 }
4108
4109 #if DEVELOPMENT || DEBUG
4110
4111 extern void IOSleep(int);
4112
4113 kern_return_t
4114 task_disconnect_page_mappings(task_t task)
4115 {
4116         int     n;
4117
4118         if (task == TASK_NULL || task == kernel_task) {
4119                 return KERN_INVALID_ARGUMENT;
4120         }
4121
4122         /*
4123          * this function is used to strip all of the mappings from
4124          * the pmap for the specified task to force the task to
4125          * re-fault all of the pages it is actively using... this
4126          * allows us to approximate the true working set of the
4127          * specified task.  We only engage if at least 1 of the
4128          * threads in the task is runnable, but we want to continuously
4129          * sweep (at least for a while - I've arbitrarily set the limit at
4130          * 100 sweeps to be re-looked at as we gain experience) to get a better
4131          * view into what areas within a page are being visited (as opposed to only
4132          * seeing the first fault of a page after the task becomes
4133          * runnable)...  in the future I may
4134          * try to block until awakened by a thread in this task
4135          * being made runnable, but for now we'll periodically poll from the
4136          * user level debug tool driving the sysctl
4137          */
4138         for (n = 0; n < 100; n++) {
4139                 thread_t        thread;
4140                 boolean_t       runnable;
4141                 boolean_t       do_unnest;
4142                 int             page_count;
4143
4144                 runnable = FALSE;
4145                 do_unnest = FALSE;
4146
4147                 task_lock(task);
4148
4149                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4150                         if (thread->state & TH_RUN) {
4151                                 runnable = TRUE;
4152                                 break;
4153                         }
4154                 }
4155                 if (n == 0) {
4156                         task->task_disconnected_count++;
4157                 }
4158
4159                 if (task->task_unnested == FALSE) {
4160                         if (runnable == TRUE) {
4161                                 task->task_unnested = TRUE;
4162                                 do_unnest = TRUE;
4163                         }
4164                 }
4165                 task_unlock(task);
4166
4167                 if (runnable == FALSE) {
4168                         break;
4169                 }
4170
4171                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
4172                     task, do_unnest, task->task_disconnected_count, 0, 0);
4173
4174                 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
4175
4176                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
4177                     task, page_count, 0, 0, 0);
4178
4179                 if ((n % 5) == 4) {
4180                         IOSleep(1);
4181                 }
4182         }
4183         return KERN_SUCCESS;
4184 }
4185
4186 #endif
4187
4188
4189 #if CONFIG_FREEZE
4190
4191 /*
4192  *      task_freeze:
4193  *
4194  *      Freeze a task.
4195  *
4196  * Conditions:
4197  *      The caller holds a reference to the task
4198  */
4199 extern void             vm_wake_compactor_swapper(void);
4200 extern queue_head_t     c_swapout_list_head;
4201
4202 kern_return_t
4203 task_freeze(
4204         task_t    task,
4205         uint32_t           *purgeable_count,
4206         uint32_t           *wired_count,
4207         uint32_t           *clean_count,
4208         uint32_t           *dirty_count,
4209         uint32_t           dirty_budget,
4210         uint32_t           *shared_count,
4211         int                *freezer_error_code,
4212         boolean_t          eval_only)
4213 {
4214         kern_return_t kr = KERN_SUCCESS;
4215
4216         if (task == TASK_NULL || task == kernel_task) {
4217                 return KERN_INVALID_ARGUMENT;
4218         }
4219
4220         task_lock(task);
4221
4222         while (task->changing_freeze_state) {
4223                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4224                 task_unlock(task);
4225                 thread_block(THREAD_CONTINUE_NULL);
4226
4227                 task_lock(task);
4228         }
4229         if (task->frozen) {
4230                 task_unlock(task);
4231                 return KERN_FAILURE;
4232         }
4233         task->changing_freeze_state = TRUE;
4234
4235         task_unlock(task);
4236
4237         kr = vm_map_freeze(task,
4238             purgeable_count,
4239             wired_count,
4240             clean_count,
4241             dirty_count,
4242             dirty_budget,
4243             shared_count,
4244             freezer_error_code,
4245             eval_only);
4246
4247         task_lock(task);
4248
4249         if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
4250                 task->frozen = TRUE;
4251         }
4252
4253         task->changing_freeze_state = FALSE;
4254         thread_wakeup(&task->changing_freeze_state);
4255
4256         task_unlock(task);
4257
4258         if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
4259             (kr == KERN_SUCCESS) &&
4260             (eval_only == FALSE)) {
4261                 vm_wake_compactor_swapper();
4262                 /*
4263                  * We do an explicit wakeup of the swapout thread here
4264                  * because the compact_and_swap routines don't have
4265                  * knowledge about these kind of "per-task packed c_segs"
4266                  * and so will not be evaluating whether we need to do
4267                  * a wakeup there.
4268                  */
4269                 thread_wakeup((event_t)&c_swapout_list_head);
4270         }
4271
4272         return kr;
4273 }
4274
4275 /*
4276  *      task_thaw:
4277  *
4278  *      Thaw a currently frozen task.
4279  *
4280  * Conditions:
4281  *      The caller holds a reference to the task
4282  */
4283 kern_return_t
4284 task_thaw(
4285         task_t          task)
4286 {
4287         if (task == TASK_NULL || task == kernel_task) {
4288                 return KERN_INVALID_ARGUMENT;
4289         }
4290
4291         task_lock(task);
4292
4293         while (task->changing_freeze_state) {
4294                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4295                 task_unlock(task);
4296                 thread_block(THREAD_CONTINUE_NULL);
4297
4298                 task_lock(task);
4299         }
4300         if (!task->frozen) {
4301                 task_unlock(task);
4302                 return KERN_FAILURE;
4303         }
4304         task->frozen = FALSE;
4305
4306         task_unlock(task);
4307
4308         return KERN_SUCCESS;
4309 }
4310
4311 #endif /* CONFIG_FREEZE */
4312
4313 kern_return_t
4314 host_security_set_task_token(
4315         host_security_t  host_security,
4316         task_t           task,
4317         security_token_t sec_token,
4318         audit_token_t    audit_token,
4319         host_priv_t      host_priv)
4320 {
4321         ipc_port_t       host_port;
4322         kern_return_t    kr;
4323
4324         if (task == TASK_NULL) {
4325                 return KERN_INVALID_ARGUMENT;
4326         }
4327
4328         if (host_security == HOST_NULL) {
4329                 return KERN_INVALID_SECURITY;
4330         }
4331
4332         task_lock(task);
4333         task->sec_token = sec_token;
4334         task->audit_token = audit_token;
4335         task_unlock(task);
4336
4337         if (host_priv != HOST_PRIV_NULL) {
4338                 kr = host_get_host_priv_port(host_priv, &host_port);
4339         } else {
4340                 kr = host_get_host_port(host_priv_self(), &host_port);
4341         }
4342         assert(kr == KERN_SUCCESS);
4343
4344         kr = task_set_special_port_internal(task, TASK_HOST_PORT, host_port);
4345         return kr;
4346 }
4347
4348 kern_return_t
4349 task_send_trace_memory(
4350         __unused task_t   target_task,
4351         __unused uint32_t pid,
4352         __unused uint64_t uniqueid)
4353 {
4354         return KERN_INVALID_ARGUMENT;
4355 }
4356
4357 /*
4358  * This routine was added, pretty much exclusively, for registering the
4359  * RPC glue vector for in-kernel short circuited tasks.  Rather than
4360  * removing it completely, I have only disabled that feature (which was
4361  * the only feature at the time).  It just appears that we are going to
4362  * want to add some user data to tasks in the future (i.e. bsd info,
4363  * task names, etc...), so I left it in the formal task interface.
4364  */
4365 kern_return_t
4366 task_set_info(
4367         task_t          task,
4368         task_flavor_t   flavor,
4369         __unused task_info_t    task_info_in,           /* pointer to IN array */
4370         __unused mach_msg_type_number_t task_info_count)
4371 {
4372         if (task == TASK_NULL) {
4373                 return KERN_INVALID_ARGUMENT;
4374         }
4375
4376         switch (flavor) {
4377 #if CONFIG_ATM
4378         case TASK_TRACE_MEMORY_INFO:
4379         {
4380                 if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT) {
4381                         return KERN_INVALID_ARGUMENT;
4382                 }
4383
4384                 assert(task_info_in != NULL);
4385                 task_trace_memory_info_t mem_info;
4386                 mem_info = (task_trace_memory_info_t) task_info_in;
4387                 kern_return_t kr = atm_register_trace_memory(task,
4388                     mem_info->user_memory_address,
4389                     mem_info->buffer_size);
4390                 return kr;
4391         }
4392
4393 #endif
4394         default:
4395                 return KERN_INVALID_ARGUMENT;
4396         }
4397         return KERN_SUCCESS;
4398 }
4399
4400 int radar_20146450 = 1;
4401 kern_return_t
4402 task_info(
4403         task_t                  task,
4404         task_flavor_t           flavor,
4405         task_info_t             task_info_out,
4406         mach_msg_type_number_t  *task_info_count)
4407 {
4408         kern_return_t error = KERN_SUCCESS;
4409         mach_msg_type_number_t  original_task_info_count;
4410
4411         if (task == TASK_NULL) {
4412                 return KERN_INVALID_ARGUMENT;
4413         }
4414
4415         original_task_info_count = *task_info_count;
4416         task_lock(task);
4417
4418         if ((task != current_task()) && (!task->active)) {
4419                 task_unlock(task);
4420                 return KERN_INVALID_ARGUMENT;
4421         }
4422
4423         switch (flavor) {
4424         case TASK_BASIC_INFO_32:
4425         case TASK_BASIC2_INFO_32:
4426 #if defined(__arm__) || defined(__arm64__)
4427         case TASK_BASIC_INFO_64:
4428 #endif
4429                 {
4430                         task_basic_info_32_t    basic_info;
4431                         vm_map_t                                map;
4432                         clock_sec_t                             secs;
4433                         clock_usec_t                    usecs;
4434
4435                         if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
4436                                 error = KERN_INVALID_ARGUMENT;
4437                                 break;
4438                         }
4439
4440                         basic_info = (task_basic_info_32_t)task_info_out;
4441
4442                         map = (task == kernel_task)? kernel_map: task->map;
4443                         basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
4444                         if (flavor == TASK_BASIC2_INFO_32) {
4445                                 /*
4446                                  * The "BASIC2" flavor gets the maximum resident
4447                                  * size instead of the current resident size...
4448                                  */
4449                                 basic_info->resident_size = pmap_resident_max(map->pmap);
4450                         } else {
4451                                 basic_info->resident_size = pmap_resident_count(map->pmap);
4452                         }
4453                         basic_info->resident_size *= PAGE_SIZE;
4454
4455                         basic_info->policy = ((task != kernel_task)?
4456                             POLICY_TIMESHARE: POLICY_RR);
4457                         basic_info->suspend_count = task->user_stop_count;
4458
4459                         absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4460                         basic_info->user_time.seconds =
4461                             (typeof(basic_info->user_time.seconds))secs;
4462                         basic_info->user_time.microseconds = usecs;
4463
4464                         absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4465                         basic_info->system_time.seconds =
4466                             (typeof(basic_info->system_time.seconds))secs;
4467                         basic_info->system_time.microseconds = usecs;
4468
4469                         *task_info_count = TASK_BASIC_INFO_32_COUNT;
4470                         break;
4471                 }
4472
4473 #if defined(__arm__) || defined(__arm64__)
4474         case TASK_BASIC_INFO_64_2:
4475         {
4476                 task_basic_info_64_2_t  basic_info;
4477                 vm_map_t                                map;
4478                 clock_sec_t                             secs;
4479                 clock_usec_t                    usecs;
4480
4481                 if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
4482                         error = KERN_INVALID_ARGUMENT;
4483                         break;
4484                 }
4485
4486                 basic_info = (task_basic_info_64_2_t)task_info_out;
4487
4488                 map = (task == kernel_task)? kernel_map: task->map;
4489                 basic_info->virtual_size  = map->size;
4490                 basic_info->resident_size =
4491                     (mach_vm_size_t)(pmap_resident_count(map->pmap))
4492                     * PAGE_SIZE_64;
4493
4494                 basic_info->policy = ((task != kernel_task)?
4495                     POLICY_TIMESHARE: POLICY_RR);
4496                 basic_info->suspend_count = task->user_stop_count;
4497
4498                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4499                 basic_info->user_time.seconds =
4500                     (typeof(basic_info->user_time.seconds))secs;
4501                 basic_info->user_time.microseconds = usecs;
4502
4503                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4504                 basic_info->system_time.seconds =
4505                     (typeof(basic_info->system_time.seconds))secs;
4506                 basic_info->system_time.microseconds = usecs;
4507
4508                 *task_info_count = TASK_BASIC_INFO_64_2_COUNT;
4509                 break;
4510         }
4511
4512 #else /* defined(__arm__) || defined(__arm64__) */
4513         case TASK_BASIC_INFO_64:
4514         {
4515                 task_basic_info_64_t    basic_info;
4516                 vm_map_t                                map;
4517                 clock_sec_t                             secs;
4518                 clock_usec_t                    usecs;
4519
4520                 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
4521                         error = KERN_INVALID_ARGUMENT;
4522                         break;
4523                 }
4524
4525                 basic_info = (task_basic_info_64_t)task_info_out;
4526
4527                 map = (task == kernel_task)? kernel_map: task->map;
4528                 basic_info->virtual_size  = map->size;
4529                 basic_info->resident_size =
4530                     (mach_vm_size_t)(pmap_resident_count(map->pmap))
4531                     * PAGE_SIZE_64;
4532
4533                 basic_info->policy = ((task != kernel_task)?
4534                     POLICY_TIMESHARE: POLICY_RR);
4535                 basic_info->suspend_count = task->user_stop_count;
4536
4537                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4538                 basic_info->user_time.seconds =
4539                     (typeof(basic_info->user_time.seconds))secs;
4540                 basic_info->user_time.microseconds = usecs;
4541
4542                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4543                 basic_info->system_time.seconds =
4544                     (typeof(basic_info->system_time.seconds))secs;
4545                 basic_info->system_time.microseconds = usecs;
4546
4547                 *task_info_count = TASK_BASIC_INFO_64_COUNT;
4548                 break;
4549         }
4550 #endif /* defined(__arm__) || defined(__arm64__) */
4551
4552         case MACH_TASK_BASIC_INFO:
4553         {
4554                 mach_task_basic_info_t  basic_info;
4555                 vm_map_t                map;
4556                 clock_sec_t             secs;
4557                 clock_usec_t            usecs;
4558
4559                 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
4560                         error = KERN_INVALID_ARGUMENT;
4561                         break;
4562                 }
4563
4564                 basic_info = (mach_task_basic_info_t)task_info_out;
4565
4566                 map = (task == kernel_task) ? kernel_map : task->map;
4567
4568                 basic_info->virtual_size  = map->size;
4569
4570                 basic_info->resident_size =
4571                     (mach_vm_size_t)(pmap_resident_count(map->pmap));
4572                 basic_info->resident_size *= PAGE_SIZE_64;
4573
4574                 basic_info->resident_size_max =
4575                     (mach_vm_size_t)(pmap_resident_max(map->pmap));
4576                 basic_info->resident_size_max *= PAGE_SIZE_64;
4577
4578                 basic_info->policy = ((task != kernel_task) ?
4579                     POLICY_TIMESHARE : POLICY_RR);
4580
4581                 basic_info->suspend_count = task->user_stop_count;
4582
4583                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
4584                 basic_info->user_time.seconds =
4585                     (typeof(basic_info->user_time.seconds))secs;
4586                 basic_info->user_time.microseconds = usecs;
4587
4588                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
4589                 basic_info->system_time.seconds =
4590                     (typeof(basic_info->system_time.seconds))secs;
4591                 basic_info->system_time.microseconds = usecs;
4592
4593                 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
4594                 break;
4595         }
4596
4597         case TASK_THREAD_TIMES_INFO:
4598         {
4599                 task_thread_times_info_t        times_info;
4600                 thread_t                                        thread;
4601
4602                 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
4603                         error = KERN_INVALID_ARGUMENT;
4604                         break;
4605                 }
4606
4607                 times_info = (task_thread_times_info_t) task_info_out;
4608                 times_info->user_time.seconds = 0;
4609                 times_info->user_time.microseconds = 0;
4610                 times_info->system_time.seconds = 0;
4611                 times_info->system_time.microseconds = 0;
4612
4613
4614                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4615                         time_value_t    user_time, system_time;
4616
4617                         if (thread->options & TH_OPT_IDLE_THREAD) {
4618                                 continue;
4619                         }
4620
4621                         thread_read_times(thread, &user_time, &system_time, NULL);
4622
4623                         time_value_add(&times_info->user_time, &user_time);
4624                         time_value_add(&times_info->system_time, &system_time);
4625                 }
4626
4627                 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
4628                 break;
4629         }
4630
4631         case TASK_ABSOLUTETIME_INFO:
4632         {
4633                 task_absolutetime_info_t        info;
4634                 thread_t                        thread;
4635
4636                 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
4637                         error = KERN_INVALID_ARGUMENT;
4638                         break;
4639                 }
4640
4641                 info = (task_absolutetime_info_t)task_info_out;
4642                 info->threads_user = info->threads_system = 0;
4643
4644
4645                 info->total_user = task->total_user_time;
4646                 info->total_system = task->total_system_time;
4647
4648                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4649                         uint64_t        tval;
4650                         spl_t           x;
4651
4652                         if (thread->options & TH_OPT_IDLE_THREAD) {
4653                                 continue;
4654                         }
4655
4656                         x = splsched();
4657                         thread_lock(thread);
4658
4659                         tval = timer_grab(&thread->user_timer);
4660                         info->threads_user += tval;
4661                         info->total_user += tval;
4662
4663                         tval = timer_grab(&thread->system_timer);
4664                         if (thread->precise_user_kernel_time) {
4665                                 info->threads_system += tval;
4666                                 info->total_system += tval;
4667                         } else {
4668                                 /* system_timer may represent either sys or user */
4669                                 info->threads_user += tval;
4670                                 info->total_user += tval;
4671                         }
4672
4673                         thread_unlock(thread);
4674                         splx(x);
4675                 }
4676
4677
4678                 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
4679                 break;
4680         }
4681
4682         case TASK_DYLD_INFO:
4683         {
4684                 task_dyld_info_t info;
4685
4686                 /*
4687                  * We added the format field to TASK_DYLD_INFO output.  For
4688                  * temporary backward compatibility, accept the fact that
4689                  * clients may ask for the old version - distinquished by the
4690                  * size of the expected result structure.
4691                  */
4692 #define TASK_LEGACY_DYLD_INFO_COUNT \
4693                 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
4694
4695                 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
4696                         error = KERN_INVALID_ARGUMENT;
4697                         break;
4698                 }
4699
4700                 info = (task_dyld_info_t)task_info_out;
4701                 info->all_image_info_addr = task->all_image_info_addr;
4702                 info->all_image_info_size = task->all_image_info_size;
4703
4704                 /* only set format on output for those expecting it */
4705                 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
4706                         info->all_image_info_format = task_has_64Bit_addr(task) ?
4707                             TASK_DYLD_ALL_IMAGE_INFO_64 :
4708                             TASK_DYLD_ALL_IMAGE_INFO_32;
4709                         *task_info_count = TASK_DYLD_INFO_COUNT;
4710                 } else {
4711                         *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
4712                 }
4713                 break;
4714         }
4715
4716         case TASK_EXTMOD_INFO:
4717         {
4718                 task_extmod_info_t info;
4719                 void *p;
4720
4721                 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
4722                         error = KERN_INVALID_ARGUMENT;
4723                         break;
4724                 }
4725
4726                 info = (task_extmod_info_t)task_info_out;
4727
4728                 p = get_bsdtask_info(task);
4729                 if (p) {
4730                         proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
4731                 } else {
4732                         bzero(info->task_uuid, sizeof(info->task_uuid));
4733                 }
4734                 info->extmod_statistics = task->extmod_statistics;
4735                 *task_info_count = TASK_EXTMOD_INFO_COUNT;
4736
4737                 break;
4738         }
4739
4740         case TASK_KERNELMEMORY_INFO:
4741         {
4742                 task_kernelmemory_info_t        tkm_info;
4743                 ledger_amount_t                 credit, debit;
4744
4745                 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
4746                         error = KERN_INVALID_ARGUMENT;
4747                         break;
4748                 }
4749
4750                 tkm_info = (task_kernelmemory_info_t) task_info_out;
4751                 tkm_info->total_palloc = 0;
4752                 tkm_info->total_pfree = 0;
4753                 tkm_info->total_salloc = 0;
4754                 tkm_info->total_sfree = 0;
4755
4756                 if (task == kernel_task) {
4757                         /*
4758                          * All shared allocs/frees from other tasks count against
4759                          * the kernel private memory usage.  If we are looking up
4760                          * info for the kernel task, gather from everywhere.
4761                          */
4762                         task_unlock(task);
4763
4764                         /* start by accounting for all the terminated tasks against the kernel */
4765                         tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
4766                         tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
4767
4768                         /* count all other task/thread shared alloc/free against the kernel */
4769                         lck_mtx_lock(&tasks_threads_lock);
4770
4771                         /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
4772                         queue_iterate(&tasks, task, task_t, tasks) {
4773                                 if (task == kernel_task) {
4774                                         if (ledger_get_entries(task->ledger,
4775                                             task_ledgers.tkm_private, &credit,
4776                                             &debit) == KERN_SUCCESS) {
4777                                                 tkm_info->total_palloc += credit;
4778                                                 tkm_info->total_pfree += debit;
4779                                         }
4780                                 }
4781                                 if (!ledger_get_entries(task->ledger,
4782                                     task_ledgers.tkm_shared, &credit, &debit)) {
4783                                         tkm_info->total_palloc += credit;
4784                                         tkm_info->total_pfree += debit;
4785                                 }
4786                         }
4787                         lck_mtx_unlock(&tasks_threads_lock);
4788                 } else {
4789                         if (!ledger_get_entries(task->ledger,
4790                             task_ledgers.tkm_private, &credit, &debit)) {
4791                                 tkm_info->total_palloc = credit;
4792                                 tkm_info->total_pfree = debit;
4793                         }
4794                         if (!ledger_get_entries(task->ledger,
4795                             task_ledgers.tkm_shared, &credit, &debit)) {
4796                                 tkm_info->total_salloc = credit;
4797                                 tkm_info->total_sfree = debit;
4798                         }
4799                         task_unlock(task);
4800                 }
4801
4802                 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
4803                 return KERN_SUCCESS;
4804         }
4805
4806         /* OBSOLETE */
4807         case TASK_SCHED_FIFO_INFO:
4808         {
4809                 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
4810                         error = KERN_INVALID_ARGUMENT;
4811                         break;
4812                 }
4813
4814                 error = KERN_INVALID_POLICY;
4815                 break;
4816         }
4817
4818         /* OBSOLETE */
4819         case TASK_SCHED_RR_INFO:
4820         {
4821                 policy_rr_base_t        rr_base;
4822                 uint32_t quantum_time;
4823                 uint64_t quantum_ns;
4824
4825                 if (*task_info_count < POLICY_RR_BASE_COUNT) {
4826                         error = KERN_INVALID_ARGUMENT;
4827                         break;
4828                 }
4829
4830                 rr_base = (policy_rr_base_t) task_info_out;
4831
4832                 if (task != kernel_task) {
4833                         error = KERN_INVALID_POLICY;
4834                         break;
4835                 }
4836
4837                 rr_base->base_priority = task->priority;
4838
4839                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
4840                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
4841
4842                 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
4843
4844                 *task_info_count = POLICY_RR_BASE_COUNT;
4845                 break;
4846         }
4847
4848         /* OBSOLETE */
4849         case TASK_SCHED_TIMESHARE_INFO:
4850         {
4851                 policy_timeshare_base_t ts_base;
4852
4853                 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
4854                         error = KERN_INVALID_ARGUMENT;
4855                         break;
4856                 }
4857
4858                 ts_base = (policy_timeshare_base_t) task_info_out;
4859
4860                 if (task == kernel_task) {
4861                         error = KERN_INVALID_POLICY;
4862                         break;
4863                 }
4864
4865                 ts_base->base_priority = task->priority;
4866
4867                 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
4868                 break;
4869         }
4870
4871         case TASK_SECURITY_TOKEN:
4872         {
4873                 security_token_t        *sec_token_p;
4874
4875                 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
4876                         error = KERN_INVALID_ARGUMENT;
4877                         break;
4878                 }
4879
4880                 sec_token_p = (security_token_t *) task_info_out;
4881
4882                 *sec_token_p = task->sec_token;
4883
4884                 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
4885                 break;
4886         }
4887
4888         case TASK_AUDIT_TOKEN:
4889         {
4890                 audit_token_t   *audit_token_p;
4891
4892                 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
4893                         error = KERN_INVALID_ARGUMENT;
4894                         break;
4895                 }
4896
4897                 audit_token_p = (audit_token_t *) task_info_out;
4898
4899                 *audit_token_p = task->audit_token;
4900
4901                 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
4902                 break;
4903         }
4904
4905         case TASK_SCHED_INFO:
4906                 error = KERN_INVALID_ARGUMENT;
4907                 break;
4908
4909         case TASK_EVENTS_INFO:
4910         {
4911                 task_events_info_t      events_info;
4912                 thread_t                        thread;
4913
4914                 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
4915                         error = KERN_INVALID_ARGUMENT;
4916                         break;
4917                 }
4918
4919                 events_info = (task_events_info_t) task_info_out;
4920
4921
4922                 events_info->faults = task->faults;
4923                 events_info->pageins = task->pageins;
4924                 events_info->cow_faults = task->cow_faults;
4925                 events_info->messages_sent = task->messages_sent;
4926                 events_info->messages_received = task->messages_received;
4927                 events_info->syscalls_mach = task->syscalls_mach;
4928                 events_info->syscalls_unix = task->syscalls_unix;
4929
4930                 events_info->csw = task->c_switch;
4931
4932                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4933                         events_info->csw           += thread->c_switch;
4934                         events_info->syscalls_mach += thread->syscalls_mach;
4935                         events_info->syscalls_unix += thread->syscalls_unix;
4936                 }
4937
4938
4939                 *task_info_count = TASK_EVENTS_INFO_COUNT;
4940                 break;
4941         }
4942         case TASK_AFFINITY_TAG_INFO:
4943         {
4944                 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
4945                         error = KERN_INVALID_ARGUMENT;
4946                         break;
4947                 }
4948
4949                 error = task_affinity_info(task, task_info_out, task_info_count);
4950                 break;
4951         }
4952         case TASK_POWER_INFO:
4953         {
4954                 if (*task_info_count < TASK_POWER_INFO_COUNT) {
4955                         error = KERN_INVALID_ARGUMENT;
4956                         break;
4957                 }
4958
4959                 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL, NULL);
4960                 break;
4961         }
4962
4963         case TASK_POWER_INFO_V2:
4964         {
4965                 if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
4966                         error = KERN_INVALID_ARGUMENT;
4967                         break;
4968                 }
4969                 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
4970                 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2, NULL);
4971                 break;
4972         }
4973
4974         case TASK_VM_INFO:
4975         case TASK_VM_INFO_PURGEABLE:
4976         {
4977                 task_vm_info_t          vm_info;
4978                 vm_map_t                map;
4979
4980 #if __arm64__
4981                 struct proc *p;
4982                 uint32_t platform, sdk;
4983                 p = current_proc();
4984                 platform = proc_platform(p);
4985                 sdk = proc_sdk(p);
4986                 if (original_task_info_count > TASK_VM_INFO_REV2_COUNT &&
4987                     platform == PLATFORM_IOS &&
4988                     sdk != 0 &&
4989                     (sdk >> 16) <= 12) {
4990                         /*
4991                          * Some iOS apps pass an incorrect value for
4992                          * task_info_count, expressed in number of bytes
4993                          * instead of number of "natural_t" elements.
4994                          * For the sake of backwards binary compatibility
4995                          * for apps built with an iOS12 or older SDK and using
4996                          * the "rev2" data structure, let's fix task_info_count
4997                          * for them, to avoid stomping past the actual end
4998                          * of their buffer.
4999                          */
5000 #if DEVELOPMENT || DEBUG
5001                         printf("%s:%d %d[%s] rdar://49484582 task_info_count %d -> %d platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p), proc_name_address(p), original_task_info_count, TASK_VM_INFO_REV2_COUNT, platform, (sdk >> 16), ((sdk >> 8) & 0xff), (sdk & 0xff));
5002 #endif /* DEVELOPMENT || DEBUG */
5003                         DTRACE_VM4(workaround_task_vm_info_count,
5004                             mach_msg_type_number_t, original_task_info_count,
5005                             mach_msg_type_number_t, TASK_VM_INFO_REV2_COUNT,
5006                             uint32_t, platform,
5007                             uint32_t, sdk);
5008                         original_task_info_count = TASK_VM_INFO_REV2_COUNT;
5009                         *task_info_count = original_task_info_count;
5010                 }
5011 #endif /* __arm64__ */
5012
5013                 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
5014                         error = KERN_INVALID_ARGUMENT;
5015                         break;
5016                 }
5017
5018                 vm_info = (task_vm_info_t)task_info_out;
5019
5020                 if (task == kernel_task) {
5021                         map = kernel_map;
5022                         /* no lock */
5023                 } else {
5024                         map = task->map;
5025                         vm_map_lock_read(map);
5026                 }
5027
5028                 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
5029                 vm_info->region_count = map->hdr.nentries;
5030                 vm_info->page_size = vm_map_page_size(map);
5031
5032                 vm_info->resident_size = pmap_resident_count(map->pmap);
5033                 vm_info->resident_size *= PAGE_SIZE;
5034                 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
5035                 vm_info->resident_size_peak *= PAGE_SIZE;
5036
5037 #define _VM_INFO(_name) \
5038         vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
5039
5040                 _VM_INFO(device);
5041                 _VM_INFO(device_peak);
5042                 _VM_INFO(external);
5043                 _VM_INFO(external_peak);
5044                 _VM_INFO(internal);
5045                 _VM_INFO(internal_peak);
5046                 _VM_INFO(reusable);
5047                 _VM_INFO(reusable_peak);
5048                 _VM_INFO(compressed);
5049                 _VM_INFO(compressed_peak);
5050                 _VM_INFO(compressed_lifetime);
5051
5052                 vm_info->purgeable_volatile_pmap = 0;
5053                 vm_info->purgeable_volatile_resident = 0;
5054                 vm_info->purgeable_volatile_virtual = 0;
5055                 if (task == kernel_task) {
5056                         /*
5057                          * We do not maintain the detailed stats for the
5058                          * kernel_pmap, so just count everything as
5059                          * "internal"...
5060                          */
5061                         vm_info->internal = vm_info->resident_size;
5062                         /*
5063                          * ... but since the memory held by the VM compressor
5064                          * in the kernel address space ought to be attributed
5065                          * to user-space tasks, we subtract it from "internal"
5066                          * to give memory reporting tools a more accurate idea
5067                          * of what the kernel itself is actually using, instead
5068                          * of making it look like the kernel is leaking memory
5069                          * when the system is under memory pressure.
5070                          */
5071                         vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
5072                             PAGE_SIZE);
5073                 } else {
5074                         mach_vm_size_t  volatile_virtual_size;
5075                         mach_vm_size_t  volatile_resident_size;
5076                         mach_vm_size_t  volatile_compressed_size;
5077                         mach_vm_size_t  volatile_pmap_size;
5078                         mach_vm_size_t  volatile_compressed_pmap_size;
5079                         kern_return_t   kr;
5080
5081                         if (flavor == TASK_VM_INFO_PURGEABLE) {
5082                                 kr = vm_map_query_volatile(
5083                                         map,
5084                                         &volatile_virtual_size,
5085                                         &volatile_resident_size,
5086                                         &volatile_compressed_size,
5087                                         &volatile_pmap_size,
5088                                         &volatile_compressed_pmap_size);
5089                                 if (kr == KERN_SUCCESS) {
5090                                         vm_info->purgeable_volatile_pmap =
5091                                             volatile_pmap_size;
5092                                         if (radar_20146450) {
5093                                                 vm_info->compressed -=
5094                                                     volatile_compressed_pmap_size;
5095                                         }
5096                                         vm_info->purgeable_volatile_resident =
5097                                             volatile_resident_size;
5098                                         vm_info->purgeable_volatile_virtual =
5099                                             volatile_virtual_size;
5100                                 }
5101                         }
5102                 }
5103                 *task_info_count = TASK_VM_INFO_REV0_COUNT;
5104
5105                 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
5106                         vm_info->phys_footprint =
5107                             (mach_vm_size_t) get_task_phys_footprint(task);
5108                         *task_info_count = TASK_VM_INFO_REV1_COUNT;
5109                 }
5110                 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5111                         vm_info->min_address = map->min_offset;
5112                         vm_info->max_address = map->max_offset;
5113                         *task_info_count = TASK_VM_INFO_REV2_COUNT;
5114                 }
5115                 if (original_task_info_count >= TASK_VM_INFO_REV3_COUNT) {
5116                         ledger_get_lifetime_max(task->ledger,
5117                             task_ledgers.phys_footprint,
5118                             &vm_info->ledger_phys_footprint_peak);
5119                         ledger_get_balance(task->ledger,
5120                             task_ledgers.purgeable_nonvolatile,
5121                             &vm_info->ledger_purgeable_nonvolatile);
5122                         ledger_get_balance(task->ledger,
5123                             task_ledgers.purgeable_nonvolatile_compressed,
5124                             &vm_info->ledger_purgeable_novolatile_compressed);
5125                         ledger_get_balance(task->ledger,
5126                             task_ledgers.purgeable_volatile,
5127                             &vm_info->ledger_purgeable_volatile);
5128                         ledger_get_balance(task->ledger,
5129                             task_ledgers.purgeable_volatile_compressed,
5130                             &vm_info->ledger_purgeable_volatile_compressed);
5131                         ledger_get_balance(task->ledger,
5132                             task_ledgers.network_nonvolatile,
5133                             &vm_info->ledger_tag_network_nonvolatile);
5134                         ledger_get_balance(task->ledger,
5135                             task_ledgers.network_nonvolatile_compressed,
5136                             &vm_info->ledger_tag_network_nonvolatile_compressed);
5137                         ledger_get_balance(task->ledger,
5138                             task_ledgers.network_volatile,
5139                             &vm_info->ledger_tag_network_volatile);
5140                         ledger_get_balance(task->ledger,
5141                             task_ledgers.network_volatile_compressed,
5142                             &vm_info->ledger_tag_network_volatile_compressed);
5143                         ledger_get_balance(task->ledger,
5144                             task_ledgers.media_footprint,
5145                             &vm_info->ledger_tag_media_footprint);
5146                         ledger_get_balance(task->ledger,
5147                             task_ledgers.media_footprint_compressed,
5148                             &vm_info->ledger_tag_media_footprint_compressed);
5149                         ledger_get_balance(task->ledger,
5150                             task_ledgers.media_nofootprint,
5151                             &vm_info->ledger_tag_media_nofootprint);
5152                         ledger_get_balance(task->ledger,
5153                             task_ledgers.media_nofootprint_compressed,
5154                             &vm_info->ledger_tag_media_nofootprint_compressed);
5155                         ledger_get_balance(task->ledger,
5156                             task_ledgers.graphics_footprint,
5157                             &vm_info->ledger_tag_graphics_footprint);
5158                         ledger_get_balance(task->ledger,
5159                             task_ledgers.graphics_footprint_compressed,
5160                             &vm_info->ledger_tag_graphics_footprint_compressed);
5161                         ledger_get_balance(task->ledger,
5162                             task_ledgers.graphics_nofootprint,
5163                             &vm_info->ledger_tag_graphics_nofootprint);
5164                         ledger_get_balance(task->ledger,
5165                             task_ledgers.graphics_nofootprint_compressed,
5166                             &vm_info->ledger_tag_graphics_nofootprint_compressed);
5167                         ledger_get_balance(task->ledger,
5168                             task_ledgers.neural_footprint,
5169                             &vm_info->ledger_tag_neural_footprint);
5170                         ledger_get_balance(task->ledger,
5171                             task_ledgers.neural_footprint_compressed,
5172                             &vm_info->ledger_tag_neural_footprint_compressed);
5173                         ledger_get_balance(task->ledger,
5174                             task_ledgers.neural_nofootprint,
5175                             &vm_info->ledger_tag_neural_nofootprint);
5176                         ledger_get_balance(task->ledger,
5177                             task_ledgers.neural_nofootprint_compressed,
5178                             &vm_info->ledger_tag_neural_nofootprint_compressed);
5179                         *task_info_count = TASK_VM_INFO_REV3_COUNT;
5180                 }
5181                 if (original_task_info_count >= TASK_VM_INFO_REV4_COUNT) {
5182                         if (task->bsd_info) {
5183                                 vm_info->limit_bytes_remaining =
5184                                     memorystatus_available_memory_internal(task->bsd_info);
5185                         } else {
5186                                 vm_info->limit_bytes_remaining = 0;
5187                         }
5188                         *task_info_count = TASK_VM_INFO_REV4_COUNT;
5189                 }
5190                 if (original_task_info_count >= TASK_VM_INFO_REV5_COUNT) {
5191                         thread_t thread;
5192                         integer_t total = task->decompressions;
5193                         queue_iterate(&task->threads, thread, thread_t, task_threads) {
5194                                 total += thread->decompressions;
5195                         }
5196                         vm_info->decompressions = total;
5197                         *task_info_count = TASK_VM_INFO_REV5_COUNT;
5198                 }
5199
5200                 if (task != kernel_task) {
5201                         vm_map_unlock_read(map);
5202                 }
5203
5204                 break;
5205         }
5206
5207         case TASK_WAIT_STATE_INFO:
5208         {
5209                 /*
5210                  * Deprecated flavor. Currently allowing some results until all users
5211                  * stop calling it. The results may not be accurate.
5212                  */
5213                 task_wait_state_info_t  wait_state_info;
5214                 uint64_t total_sfi_ledger_val = 0;
5215
5216                 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
5217                         error = KERN_INVALID_ARGUMENT;
5218                         break;
5219                 }
5220
5221                 wait_state_info = (task_wait_state_info_t) task_info_out;
5222
5223                 wait_state_info->total_wait_state_time = 0;
5224                 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
5225
5226 #if CONFIG_SCHED_SFI
5227                 int i, prev_lentry = -1;
5228                 int64_t  val_credit, val_debit;
5229
5230                 for (i = 0; i < MAX_SFI_CLASS_ID; i++) {
5231                         val_credit = 0;
5232                         /*
5233                          * checking with prev_lentry != entry ensures adjacent classes
5234                          * which share the same ledger do not add wait times twice.
5235                          * Note: Use ledger() call to get data for each individual sfi class.
5236                          */
5237                         if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
5238                             KERN_SUCCESS == ledger_get_entries(task->ledger,
5239                             task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
5240                                 total_sfi_ledger_val += val_credit;
5241                         }
5242                         prev_lentry = task_ledgers.sfi_wait_times[i];
5243                 }
5244
5245 #endif /* CONFIG_SCHED_SFI */
5246                 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
5247                 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
5248
5249                 break;
5250         }
5251         case TASK_VM_INFO_PURGEABLE_ACCOUNT:
5252         {
5253 #if DEVELOPMENT || DEBUG
5254                 pvm_account_info_t      acnt_info;
5255
5256                 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
5257                         error = KERN_INVALID_ARGUMENT;
5258                         break;
5259                 }
5260
5261                 if (task_info_out == NULL) {
5262                         error = KERN_INVALID_ARGUMENT;
5263                         break;
5264                 }
5265
5266                 acnt_info = (pvm_account_info_t) task_info_out;
5267
5268                 error = vm_purgeable_account(task, acnt_info);
5269
5270                 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
5271
5272                 break;
5273 #else /* DEVELOPMENT || DEBUG */
5274                 error = KERN_NOT_SUPPORTED;
5275                 break;
5276 #endif /* DEVELOPMENT || DEBUG */
5277         }
5278         case TASK_FLAGS_INFO:
5279         {
5280                 task_flags_info_t               flags_info;
5281
5282                 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
5283                         error = KERN_INVALID_ARGUMENT;
5284                         break;
5285                 }
5286
5287                 flags_info = (task_flags_info_t)task_info_out;
5288
5289                 /* only publish the 64-bit flag of the task */
5290                 flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA);
5291
5292                 *task_info_count = TASK_FLAGS_INFO_COUNT;
5293                 break;
5294         }
5295
5296         case TASK_DEBUG_INFO_INTERNAL:
5297         {
5298 #if DEVELOPMENT || DEBUG
5299                 task_debug_info_internal_t dbg_info;
5300                 ipc_space_t space = task->itk_space;
5301                 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
5302                         error = KERN_NOT_SUPPORTED;
5303                         break;
5304                 }
5305
5306                 if (task_info_out == NULL) {
5307                         error = KERN_INVALID_ARGUMENT;
5308                         break;
5309                 }
5310                 dbg_info = (task_debug_info_internal_t) task_info_out;
5311                 dbg_info->ipc_space_size = 0;
5312
5313                 if (space) {
5314                         is_read_lock(space);
5315                         dbg_info->ipc_space_size = space->is_table_size;
5316                         is_read_unlock(space);
5317                 }
5318
5319                 dbg_info->suspend_count = task->suspend_count;
5320
5321                 error = KERN_SUCCESS;
5322                 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
5323                 break;
5324 #else /* DEVELOPMENT || DEBUG */
5325                 error = KERN_NOT_SUPPORTED;
5326                 break;
5327 #endif /* DEVELOPMENT || DEBUG */
5328         }
5329         default:
5330                 error = KERN_INVALID_ARGUMENT;
5331         }
5332
5333         task_unlock(task);
5334         return error;
5335 }
5336
5337 /*
5338  * task_info_from_user
5339  *
5340  * When calling task_info from user space,
5341  * this function will be executed as mig server side
5342  * instead of calling directly into task_info.
5343  * This gives the possibility to perform more security
5344  * checks on task_port.
5345  *
5346  * In the case of TASK_DYLD_INFO, we require the more
5347  * privileged task_port not the less-privileged task_name_port.
5348  *
5349  */
5350 kern_return_t
5351 task_info_from_user(
5352         mach_port_t             task_port,
5353         task_flavor_t           flavor,
5354         task_info_t             task_info_out,
5355         mach_msg_type_number_t  *task_info_count)
5356 {
5357         task_t task;
5358         kern_return_t ret;
5359
5360         if (flavor == TASK_DYLD_INFO) {
5361                 task = convert_port_to_task(task_port);
5362         } else {
5363                 task = convert_port_to_task_name(task_port);
5364         }
5365
5366         ret = task_info(task, flavor, task_info_out, task_info_count);
5367
5368         task_deallocate(task);
5369
5370         return ret;
5371 }
5372
5373 /*
5374  *      task_power_info
5375  *
5376  *      Returns power stats for the task.
5377  *      Note: Called with task locked.
5378  */
5379 void
5380 task_power_info_locked(
5381         task_t                  task,
5382         task_power_info_t       info,
5383         gpu_energy_data_t       ginfo,
5384         task_power_info_v2_t    infov2,
5385         uint64_t                *runnable_time)
5386 {
5387         thread_t                thread;
5388         ledger_amount_t         tmp;
5389
5390         uint64_t                runnable_time_sum = 0;
5391
5392         task_lock_assert_owned(task);
5393
5394         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
5395             (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
5396         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
5397             (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
5398
5399         info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
5400         info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
5401
5402         info->total_user = task->total_user_time;
5403         info->total_system = task->total_system_time;
5404         runnable_time_sum = task->total_runnable_time;
5405
5406 #if CONFIG_EMBEDDED
5407         if (infov2) {
5408                 infov2->task_energy = task->task_energy;
5409         }
5410 #endif
5411
5412         if (ginfo) {
5413                 ginfo->task_gpu_utilisation = task->task_gpu_ns;
5414         }
5415
5416         if (infov2) {
5417                 infov2->task_ptime = task->total_ptime;
5418                 infov2->task_pset_switches = task->ps_switch;
5419         }
5420
5421         queue_iterate(&task->threads, thread, thread_t, task_threads) {
5422                 uint64_t        tval;
5423                 spl_t           x;
5424
5425                 if (thread->options & TH_OPT_IDLE_THREAD) {
5426                         continue;
5427                 }
5428
5429                 x = splsched();
5430                 thread_lock(thread);
5431
5432                 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
5433                 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
5434
5435 #if CONFIG_EMBEDDED
5436                 if (infov2) {
5437                         infov2->task_energy += ml_energy_stat(thread);
5438                 }
5439 #endif
5440
5441                 tval = timer_grab(&thread->user_timer);
5442                 info->total_user += tval;
5443
5444                 if (infov2) {
5445                         tval = timer_grab(&thread->ptime);
5446                         infov2->task_ptime += tval;
5447                         infov2->task_pset_switches += thread->ps_switch;
5448                 }
5449
5450                 tval = timer_grab(&thread->system_timer);
5451                 if (thread->precise_user_kernel_time) {
5452                         info->total_system += tval;
5453                 } else {
5454                         /* system_timer may represent either sys or user */
5455                         info->total_user += tval;
5456                 }
5457
5458                 tval = timer_grab(&thread->runnable_timer);
5459
5460                 runnable_time_sum += tval;
5461
5462                 if (ginfo) {
5463                         ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
5464                 }
5465                 thread_unlock(thread);
5466                 splx(x);
5467         }
5468
5469         if (runnable_time) {
5470                 *runnable_time = runnable_time_sum;
5471         }
5472 }
5473
5474 /*
5475  *      task_gpu_utilisation
5476  *
5477  *      Returns the total gpu time used by the all the threads of the task
5478  *  (both dead and alive)
5479  */
5480 uint64_t
5481 task_gpu_utilisation(
5482         task_t  task)
5483 {
5484         uint64_t gpu_time = 0;
5485 #if !CONFIG_EMBEDDED
5486         thread_t thread;
5487
5488         task_lock(task);
5489         gpu_time += task->task_gpu_ns;
5490
5491         queue_iterate(&task->threads, thread, thread_t, task_threads) {
5492                 spl_t x;
5493                 x = splsched();
5494                 thread_lock(thread);
5495                 gpu_time += ml_gpu_stat(thread);
5496                 thread_unlock(thread);
5497                 splx(x);
5498         }
5499
5500         task_unlock(task);
5501 #else /* CONFIG_EMBEDDED */
5502         /* silence compiler warning */
5503         (void)task;
5504 #endif /* !CONFIG_EMBEDDED */
5505         return gpu_time;
5506 }
5507
5508 /*
5509  *      task_energy
5510  *
5511  *      Returns the total energy used by the all the threads of the task
5512  *  (both dead and alive)
5513  */
5514 uint64_t
5515 task_energy(
5516         task_t  task)
5517 {
5518         uint64_t energy = 0;
5519         thread_t thread;
5520
5521         task_lock(task);
5522         energy += task->task_energy;
5523
5524         queue_iterate(&task->threads, thread, thread_t, task_threads) {
5525                 spl_t x;
5526                 x = splsched();
5527                 thread_lock(thread);
5528                 energy += ml_energy_stat(thread);
5529                 thread_unlock(thread);
5530                 splx(x);
5531         }
5532
5533         task_unlock(task);
5534         return energy;
5535 }
5536
5537 #if __AMP__
5538
5539 uint64_t
5540 task_cpu_ptime(
5541         task_t  task)
5542 {
5543         uint64_t cpu_ptime = 0;
5544         thread_t thread;
5545
5546         task_lock(task);
5547         cpu_ptime += task->total_ptime;
5548
5549         queue_iterate(&task->threads, thread, thread_t, task_threads) {
5550                 cpu_ptime += timer_grab(&thread->ptime);
5551         }
5552
5553         task_unlock(task);
5554         return cpu_ptime;
5555 }
5556
5557 #else /* __AMP__ */
5558
5559 uint64_t
5560 task_cpu_ptime(
5561         __unused task_t  task)
5562 {
5563         return 0;
5564 }
5565
5566 #endif /* __AMP__ */
5567
5568 /* This function updates the cpu time in the arrays for each
5569  * effective and requested QoS class
5570  */
5571 void
5572 task_update_cpu_time_qos_stats(
5573         task_t  task,
5574         uint64_t *eqos_stats,
5575         uint64_t *rqos_stats)
5576 {
5577         if (!eqos_stats && !rqos_stats) {
5578                 return;
5579         }
5580
5581         task_lock(task);
5582         thread_t thread;
5583         queue_iterate(&task->threads, thread, thread_t, task_threads) {
5584                 if (thread->options & TH_OPT_IDLE_THREAD) {
5585                         continue;
5586                 }
5587
5588                 thread_update_qos_cpu_time(thread);
5589         }
5590
5591         if (eqos_stats) {
5592                 eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
5593                 eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
5594                 eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
5595                 eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
5596                 eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
5597                 eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
5598                 eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
5599         }
5600
5601         if (rqos_stats) {
5602                 rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
5603                 rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
5604                 rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
5605                 rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
5606                 rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
5607                 rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
5608                 rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
5609         }
5610
5611         task_unlock(task);
5612 }
5613
5614 kern_return_t
5615 task_purgable_info(
5616         task_t                  task,
5617         task_purgable_info_t    *stats)
5618 {
5619         if (task == TASK_NULL || stats == NULL) {
5620                 return KERN_INVALID_ARGUMENT;
5621         }
5622         /* Take task reference */
5623         task_reference(task);
5624         vm_purgeable_stats((vm_purgeable_info_t)stats, task);
5625         /* Drop task reference */
5626         task_deallocate(task);
5627         return KERN_SUCCESS;
5628 }
5629
5630 void
5631 task_vtimer_set(
5632         task_t          task,
5633         integer_t       which)
5634 {
5635         thread_t        thread;
5636         spl_t           x;
5637
5638         task_lock(task);
5639
5640         task->vtimers |= which;
5641
5642         switch (which) {
5643         case TASK_VTIMER_USER:
5644                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5645                         x = splsched();
5646                         thread_lock(thread);
5647                         if (thread->precise_user_kernel_time) {
5648                                 thread->vtimer_user_save = timer_grab(&thread->user_timer);
5649                         } else {
5650                                 thread->vtimer_user_save = timer_grab(&thread->system_timer);
5651                         }
5652                         thread_unlock(thread);
5653                         splx(x);
5654                 }
5655                 break;
5656
5657         case TASK_VTIMER_PROF:
5658                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5659                         x = splsched();
5660                         thread_lock(thread);
5661                         thread->vtimer_prof_save = timer_grab(&thread->user_timer);
5662                         thread->vtimer_prof_save += timer_grab(&thread->system_timer);
5663                         thread_unlock(thread);
5664                         splx(x);
5665                 }
5666                 break;
5667
5668         case TASK_VTIMER_RLIM:
5669                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5670                         x = splsched();
5671                         thread_lock(thread);
5672                         thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
5673                         thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
5674                         thread_unlock(thread);
5675                         splx(x);
5676                 }
5677                 break;
5678         }
5679
5680         task_unlock(task);
5681 }
5682
5683 void
5684 task_vtimer_clear(
5685         task_t          task,
5686         integer_t       which)
5687 {
5688         assert(task == current_task());
5689
5690         task_lock(task);
5691
5692         task->vtimers &= ~which;
5693
5694         task_unlock(task);
5695 }
5696
5697 void
5698 task_vtimer_update(
5699         __unused
5700         task_t          task,
5701         integer_t       which,
5702         uint32_t        *microsecs)
5703 {
5704         thread_t        thread = current_thread();
5705         uint32_t        tdelt = 0;
5706         clock_sec_t     secs = 0;
5707         uint64_t        tsum;
5708
5709         assert(task == current_task());
5710
5711         spl_t s = splsched();
5712         thread_lock(thread);
5713
5714         if ((task->vtimers & which) != (uint32_t)which) {
5715                 thread_unlock(thread);
5716                 splx(s);
5717                 return;
5718         }
5719
5720         switch (which) {
5721         case TASK_VTIMER_USER:
5722                 if (thread->precise_user_kernel_time) {
5723                         tdelt = (uint32_t)timer_delta(&thread->user_timer,
5724                             &thread->vtimer_user_save);
5725                 } else {
5726                         tdelt = (uint32_t)timer_delta(&thread->system_timer,
5727                             &thread->vtimer_user_save);
5728                 }
5729                 absolutetime_to_microtime(tdelt, &secs, microsecs);
5730                 break;
5731
5732         case TASK_VTIMER_PROF:
5733                 tsum = timer_grab(&thread->user_timer);
5734                 tsum += timer_grab(&thread->system_timer);
5735                 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
5736                 absolutetime_to_microtime(tdelt, &secs, microsecs);
5737                 /* if the time delta is smaller than a usec, ignore */
5738                 if (*microsecs != 0) {
5739                         thread->vtimer_prof_save = tsum;
5740                 }
5741                 break;
5742
5743         case TASK_VTIMER_RLIM:
5744                 tsum = timer_grab(&thread->user_timer);
5745                 tsum += timer_grab(&thread->system_timer);
5746                 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
5747                 thread->vtimer_rlim_save = tsum;
5748                 absolutetime_to_microtime(tdelt, &secs, microsecs);
5749                 break;
5750         }
5751
5752         thread_unlock(thread);
5753         splx(s);
5754 }
5755
5756 /*
5757  *      task_assign:
5758  *
5759  *      Change the assigned processor set for the task
5760  */
5761 kern_return_t
5762 task_assign(
5763         __unused task_t         task,
5764         __unused processor_set_t        new_pset,
5765         __unused boolean_t      assign_threads)
5766 {
5767         return KERN_FAILURE;
5768 }
5769
5770 /*
5771  *      task_assign_default:
5772  *
5773  *      Version of task_assign to assign to default processor set.
5774  */
5775 kern_return_t
5776 task_assign_default(
5777         task_t          task,
5778         boolean_t       assign_threads)
5779 {
5780         return task_assign(task, &pset0, assign_threads);
5781 }
5782
5783 /*
5784  *      task_get_assignment
5785  *
5786  *      Return name of processor set that task is assigned to.
5787  */
5788 kern_return_t
5789 task_get_assignment(
5790         task_t          task,
5791         processor_set_t *pset)
5792 {
5793         if (!task || !task->active) {
5794                 return KERN_FAILURE;
5795         }
5796
5797         *pset = &pset0;
5798
5799         return KERN_SUCCESS;
5800 }
5801
5802 uint64_t
5803 get_task_dispatchqueue_offset(
5804         task_t          task)
5805 {
5806         return task->dispatchqueue_offset;
5807 }
5808
5809 /*
5810  *      task_policy
5811  *
5812  *      Set scheduling policy and parameters, both base and limit, for
5813  *      the given task. Policy must be a policy which is enabled for the
5814  *      processor set. Change contained threads if requested.
5815  */
5816 kern_return_t
5817 task_policy(
5818         __unused task_t                 task,
5819         __unused policy_t                       policy_id,
5820         __unused policy_base_t          base,
5821         __unused mach_msg_type_number_t count,
5822         __unused boolean_t                      set_limit,
5823         __unused boolean_t                      change)
5824 {
5825         return KERN_FAILURE;
5826 }
5827
5828 /*
5829  *      task_set_policy
5830  *
5831  *      Set scheduling policy and parameters, both base and limit, for
5832  *      the given task. Policy can be any policy implemented by the
5833  *      processor set, whether enabled or not. Change contained threads
5834  *      if requested.
5835  */
5836 kern_return_t
5837 task_set_policy(
5838         __unused task_t                 task,
5839         __unused processor_set_t                pset,
5840         __unused policy_t                       policy_id,
5841         __unused policy_base_t          base,
5842         __unused mach_msg_type_number_t base_count,
5843         __unused policy_limit_t         limit,
5844         __unused mach_msg_type_number_t limit_count,
5845         __unused boolean_t                      change)
5846 {
5847         return KERN_FAILURE;
5848 }
5849
5850 kern_return_t
5851 task_set_ras_pc(
5852         __unused task_t task,
5853         __unused vm_offset_t    pc,
5854         __unused vm_offset_t    endpc)
5855 {
5856         return KERN_FAILURE;
5857 }
5858
5859 void
5860 task_synchronizer_destroy_all(task_t task)
5861 {
5862         /*
5863          *  Destroy owned semaphores
5864          */
5865         semaphore_destroy_all(task);
5866 }
5867
5868 /*
5869  * Install default (machine-dependent) initial thread state
5870  * on the task.  Subsequent thread creation will have this initial
5871  * state set on the thread by machine_thread_inherit_taskwide().
5872  * Flavors and structures are exactly the same as those to thread_set_state()
5873  */
5874 kern_return_t
5875 task_set_state(
5876         task_t task,
5877         int flavor,
5878         thread_state_t state,
5879         mach_msg_type_number_t state_count)
5880 {
5881         kern_return_t ret;
5882
5883         if (task == TASK_NULL) {
5884                 return KERN_INVALID_ARGUMENT;
5885         }
5886
5887         task_lock(task);
5888
5889         if (!task->active) {
5890                 task_unlock(task);
5891                 return KERN_FAILURE;
5892         }
5893
5894         ret = machine_task_set_state(task, flavor, state, state_count);
5895
5896         task_unlock(task);
5897         return ret;
5898 }
5899
5900 /*
5901  * Examine the default (machine-dependent) initial thread state
5902  * on the task, as set by task_set_state().  Flavors and structures
5903  * are exactly the same as those passed to thread_get_state().
5904  */
5905 kern_return_t
5906 task_get_state(
5907         task_t  task,
5908         int     flavor,
5909         thread_state_t state,
5910         mach_msg_type_number_t *state_count)
5911 {
5912         kern_return_t ret;
5913
5914         if (task == TASK_NULL) {
5915                 return KERN_INVALID_ARGUMENT;
5916         }
5917
5918         task_lock(task);
5919
5920         if (!task->active) {
5921                 task_unlock(task);
5922                 return KERN_FAILURE;
5923         }
5924
5925         ret = machine_task_get_state(task, flavor, state, state_count);
5926
5927         task_unlock(task);
5928         return ret;
5929 }
5930
5931
5932 static kern_return_t __attribute__((noinline, not_tail_called))
5933 PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(
5934         mach_exception_code_t code,
5935         mach_exception_subcode_t subcode,
5936         void *reason)
5937 {
5938 #ifdef MACH_BSD
5939         if (1 == proc_selfpid()) {
5940                 return KERN_NOT_SUPPORTED;              // initproc is immune
5941         }
5942 #endif
5943         mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
5944                 [0] = code,
5945                 [1] = subcode,
5946         };
5947         task_t task = current_task();
5948         kern_return_t kr;
5949
5950         /* (See jetsam-related comments below) */
5951
5952         proc_memstat_terminated(task->bsd_info, TRUE);
5953         kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason);
5954         proc_memstat_terminated(task->bsd_info, FALSE);
5955         return kr;
5956 }
5957
5958 kern_return_t
5959 task_violated_guard(
5960         mach_exception_code_t code,
5961         mach_exception_subcode_t subcode,
5962         void *reason)
5963 {
5964         return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code, subcode, reason);
5965 }
5966
5967
5968 #if CONFIG_MEMORYSTATUS
5969
5970 boolean_t
5971 task_get_memlimit_is_active(task_t task)
5972 {
5973         assert(task != NULL);
5974
5975         if (task->memlimit_is_active == 1) {
5976                 return TRUE;
5977         } else {
5978                 return FALSE;
5979         }
5980 }
5981
5982 void
5983 task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
5984 {
5985         assert(task != NULL);
5986
5987         if (memlimit_is_active) {
5988                 task->memlimit_is_active = 1;
5989         } else {
5990                 task->memlimit_is_active = 0;
5991         }
5992 }
5993
5994 boolean_t
5995 task_get_memlimit_is_fatal(task_t task)
5996 {
5997         assert(task != NULL);
5998
5999         if (task->memlimit_is_fatal == 1) {
6000                 return TRUE;
6001         } else {
6002                 return FALSE;
6003         }
6004 }
6005
6006 void
6007 task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
6008 {
6009         assert(task != NULL);
6010
6011         if (memlimit_is_fatal) {
6012                 task->memlimit_is_fatal = 1;
6013         } else {
6014                 task->memlimit_is_fatal = 0;
6015         }
6016 }
6017
6018 boolean_t
6019 task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6020 {
6021         boolean_t triggered = FALSE;
6022
6023         assert(task == current_task());
6024
6025         /*
6026          * Returns true, if task has already triggered an exc_resource exception.
6027          */
6028
6029         if (memlimit_is_active) {
6030                 triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
6031         } else {
6032                 triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
6033         }
6034
6035         return triggered;
6036 }
6037
6038 void
6039 task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
6040 {
6041         assert(task == current_task());
6042
6043         /*
6044          * We allow one exc_resource per process per active/inactive limit.
6045          * The limit's fatal attribute does not come into play.
6046          */
6047
6048         if (memlimit_is_active) {
6049                 task->memlimit_active_exc_resource = 1;
6050         } else {
6051                 task->memlimit_inactive_exc_resource = 1;
6052         }
6053 }
6054
6055 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
6056
6057 void __attribute__((noinline))
6058 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
6059 {
6060         task_t                                          task            = current_task();
6061         int                                                     pid         = 0;
6062         const char                                      *procname       = "unknown";
6063         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
6064         boolean_t send_sync_exc_resource = FALSE;
6065
6066 #ifdef MACH_BSD
6067         pid = proc_selfpid();
6068
6069         if (pid == 1) {
6070                 /*
6071                  * Cannot have ReportCrash analyzing
6072                  * a suspended initproc.
6073                  */
6074                 return;
6075         }
6076
6077         if (task->bsd_info != NULL) {
6078                 procname = proc_name_address(current_task()->bsd_info);
6079                 send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(current_task()->bsd_info);
6080         }
6081 #endif
6082 #if CONFIG_COREDUMP
6083         if (hwm_user_cores) {
6084                 int                             error;
6085                 uint64_t                starttime, end;
6086                 clock_sec_t             secs = 0;
6087                 uint32_t                microsecs = 0;
6088
6089                 starttime = mach_absolute_time();
6090                 /*
6091                  * Trigger a coredump of this process. Don't proceed unless we know we won't
6092                  * be filling up the disk; and ignore the core size resource limit for this
6093                  * core file.
6094                  */
6095                 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
6096                         printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
6097                 }
6098                 /*
6099                  * coredump() leaves the task suspended.
6100                  */
6101                 task_resume_internal(current_task());
6102
6103                 end = mach_absolute_time();
6104                 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
6105                 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
6106                     proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
6107         }
6108 #endif /* CONFIG_COREDUMP */
6109
6110         if (disable_exc_resource) {
6111                 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6112                     "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
6113                 return;
6114         }
6115
6116         /*
6117          * A task that has triggered an EXC_RESOURCE, should not be
6118          * jetsammed when the device is under memory pressure.  Here
6119          * we set the P_MEMSTAT_TERMINATED flag so that the process
6120          * will be skipped if the memorystatus_thread wakes up.
6121          */
6122         proc_memstat_terminated(current_task()->bsd_info, TRUE);
6123
6124         code[0] = code[1] = 0;
6125         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
6126         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
6127         EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
6128
6129         /*
6130          * Do not generate a corpse fork if the violation is a fatal one
6131          * or the process wants synchronous EXC_RESOURCE exceptions.
6132          */
6133         if (is_fatal || send_sync_exc_resource || exc_via_corpse_forking == 0) {
6134                 /* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */
6135                 if (send_sync_exc_resource || corpse_for_fatal_memkill == 0) {
6136                         /*
6137                          * Use the _internal_ variant so that no user-space
6138                          * process can resume our task from under us.
6139                          */
6140                         task_suspend_internal(task);
6141                         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6142                         task_resume_internal(task);
6143                 }
6144         } else {
6145                 if (audio_active) {
6146                         printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
6147                             "supressed due to audio playback.\n", procname, pid, max_footprint_mb);
6148                 } else {
6149                         task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
6150                             code, EXCEPTION_CODE_MAX, NULL);
6151                 }
6152         }
6153
6154         /*
6155          * After the EXC_RESOURCE has been handled, we must clear the
6156          * P_MEMSTAT_TERMINATED flag so that the process can again be
6157          * considered for jetsam if the memorystatus_thread wakes up.
6158          */
6159         proc_memstat_terminated(current_task()->bsd_info, FALSE);  /* clear the flag */
6160 }
6161
6162 /*
6163  * Callback invoked when a task exceeds its physical footprint limit.
6164  */
6165 void
6166 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
6167 {
6168         ledger_amount_t max_footprint, max_footprint_mb;
6169         task_t task;
6170         boolean_t is_warning;
6171         boolean_t memlimit_is_active;
6172         boolean_t memlimit_is_fatal;
6173
6174         if (warning == LEDGER_WARNING_DIPPED_BELOW) {
6175                 /*
6176                  * Task memory limits only provide a warning on the way up.
6177                  */
6178                 return;
6179         } else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
6180                 /*
6181                  * This task is in danger of violating a memory limit,
6182                  * It has exceeded a percentage level of the limit.
6183                  */
6184                 is_warning = TRUE;
6185         } else {
6186                 /*
6187                  * The task has exceeded the physical footprint limit.
6188                  * This is not a warning but a true limit violation.
6189                  */
6190                 is_warning = FALSE;
6191         }
6192
6193         task = current_task();
6194
6195         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
6196         max_footprint_mb = max_footprint >> 20;
6197
6198         memlimit_is_active = task_get_memlimit_is_active(task);
6199         memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6200
6201         /*
6202          * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
6203          * We only generate the exception once per process per memlimit (active/inactive limit).
6204          * To enforce this, we monitor state based on the  memlimit's active/inactive attribute
6205          * and we disable it by marking that memlimit as exception triggered.
6206          */
6207         if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
6208                 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
6209                 memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
6210                 task_mark_has_triggered_exc_resource(task, memlimit_is_active);
6211         }
6212
6213         memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
6214 }
6215
6216 extern int proc_check_footprint_priv(void);
6217
6218 kern_return_t
6219 task_set_phys_footprint_limit(
6220         task_t task,
6221         int new_limit_mb,
6222         int *old_limit_mb)
6223 {
6224         kern_return_t error;
6225
6226         boolean_t memlimit_is_active;
6227         boolean_t memlimit_is_fatal;
6228
6229         if ((error = proc_check_footprint_priv())) {
6230                 return KERN_NO_ACCESS;
6231         }
6232
6233         /*
6234          * This call should probably be obsoleted.
6235          * But for now, we default to current state.
6236          */
6237         memlimit_is_active = task_get_memlimit_is_active(task);
6238         memlimit_is_fatal = task_get_memlimit_is_fatal(task);
6239
6240         return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
6241 }
6242
6243 kern_return_t
6244 task_convert_phys_footprint_limit(
6245         int limit_mb,
6246         int *converted_limit_mb)
6247 {
6248         if (limit_mb == -1) {
6249                 /*
6250                  * No limit
6251                  */
6252                 if (max_task_footprint != 0) {
6253                         *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024);   /* bytes to MB */
6254                 } else {
6255                         *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
6256                 }
6257         } else {
6258                 /* nothing to convert */
6259                 *converted_limit_mb = limit_mb;
6260         }
6261         return KERN_SUCCESS;
6262 }
6263
6264
6265 kern_return_t
6266 task_set_phys_footprint_limit_internal(
6267         task_t task,
6268         int new_limit_mb,
6269         int *old_limit_mb,
6270         boolean_t memlimit_is_active,
6271         boolean_t memlimit_is_fatal)
6272 {
6273         ledger_amount_t old;
6274         kern_return_t ret;
6275
6276         ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
6277
6278         if (ret != KERN_SUCCESS) {
6279                 return ret;
6280         }
6281
6282         /*
6283          * Check that limit >> 20 will not give an "unexpected" 32-bit
6284          * result. There are, however, implicit assumptions that -1 mb limit
6285          * equates to LEDGER_LIMIT_INFINITY.
6286          */
6287         assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
6288
6289         if (old_limit_mb) {
6290                 *old_limit_mb = (int)(old >> 20);
6291         }
6292
6293         if (new_limit_mb == -1) {
6294                 /*
6295                  * Caller wishes to remove the limit.
6296                  */
6297                 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
6298                     max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
6299                     max_task_footprint ? max_task_footprint_warning_level : 0);
6300
6301                 task_lock(task);
6302                 task_set_memlimit_is_active(task, memlimit_is_active);
6303                 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
6304                 task_unlock(task);
6305
6306                 return KERN_SUCCESS;
6307         }
6308
6309 #ifdef CONFIG_NOMONITORS
6310         return KERN_SUCCESS;
6311 #endif /* CONFIG_NOMONITORS */
6312
6313         task_lock(task);
6314
6315         if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
6316             (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
6317             (((ledger_amount_t)new_limit_mb << 20) == old)) {
6318                 /*
6319                  * memlimit state is not changing
6320                  */
6321                 task_unlock(task);
6322                 return KERN_SUCCESS;
6323         }
6324
6325         task_set_memlimit_is_active(task, memlimit_is_active);
6326         task_set_memlimit_is_fatal(task, memlimit_is_fatal);
6327
6328         ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
6329             (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
6330
6331         if (task == current_task()) {
6332                 ledger_check_new_balance(current_thread(), task->ledger,
6333                     task_ledgers.phys_footprint);
6334         }
6335
6336         task_unlock(task);
6337
6338         return KERN_SUCCESS;
6339 }
6340
6341 kern_return_t
6342 task_get_phys_footprint_limit(
6343         task_t task,
6344         int *limit_mb)
6345 {
6346         ledger_amount_t limit;
6347         kern_return_t ret;
6348
6349         ret = ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
6350         if (ret != KERN_SUCCESS) {
6351                 return ret;
6352         }
6353
6354         /*
6355          * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
6356          * result. There are, however, implicit assumptions that -1 mb limit
6357          * equates to LEDGER_LIMIT_INFINITY.
6358          */
6359         assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
6360         *limit_mb = (int)(limit >> 20);
6361
6362         return KERN_SUCCESS;
6363 }
6364 #else /* CONFIG_MEMORYSTATUS */
6365 kern_return_t
6366 task_set_phys_footprint_limit(
6367         __unused task_t task,
6368         __unused int new_limit_mb,
6369         __unused int *old_limit_mb)
6370 {
6371         return KERN_FAILURE;
6372 }
6373
6374 kern_return_t
6375 task_get_phys_footprint_limit(
6376         __unused task_t task,
6377         __unused int *limit_mb)
6378 {
6379         return KERN_FAILURE;
6380 }
6381 #endif /* CONFIG_MEMORYSTATUS */
6382
6383 void
6384 task_set_thread_limit(task_t task, uint16_t thread_limit)
6385 {
6386         assert(task != kernel_task);
6387         if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
6388                 task_lock(task);
6389                 task->task_thread_limit = thread_limit;
6390                 task_unlock(task);
6391         }
6392 }
6393
6394 /*
6395  * We need to export some functions to other components that
6396  * are currently implemented in macros within the osfmk
6397  * component.  Just export them as functions of the same name.
6398  */
6399 boolean_t
6400 is_kerneltask(task_t t)
6401 {
6402         if (t == kernel_task) {
6403                 return TRUE;
6404         }
6405
6406         return FALSE;
6407 }
6408
6409 boolean_t
6410 is_corpsetask(task_t t)
6411 {
6412         return task_is_a_corpse(t);
6413 }
6414
6415 #undef current_task
6416 task_t current_task(void);
6417 task_t
6418 current_task(void)
6419 {
6420         return current_task_fast();
6421 }
6422
6423 #undef task_reference
6424 void task_reference(task_t task);
6425 void
6426 task_reference(
6427         task_t          task)
6428 {
6429         if (task != TASK_NULL) {
6430                 task_reference_internal(task);
6431         }
6432 }
6433
6434 /* defined in bsd/kern/kern_prot.c */
6435 extern int get_audit_token_pid(audit_token_t *audit_token);
6436
6437 int
6438 task_pid(task_t task)
6439 {
6440         if (task) {
6441                 return get_audit_token_pid(&task->audit_token);
6442         }
6443         return -1;
6444 }
6445
6446
6447 /*
6448  * This routine finds a thread in a task by its unique id
6449  * Returns a referenced thread or THREAD_NULL if the thread was not found
6450  *
6451  * TODO: This is super inefficient - it's an O(threads in task) list walk!
6452  *       We should make a tid hash, or transition all tid clients to thread ports
6453  *
6454  * Precondition: No locks held (will take task lock)
6455  */
6456 thread_t
6457 task_findtid(task_t task, uint64_t tid)
6458 {
6459         thread_t self           = current_thread();
6460         thread_t found_thread   = THREAD_NULL;
6461         thread_t iter_thread    = THREAD_NULL;
6462
6463         /* Short-circuit the lookup if we're looking up ourselves */
6464         if (tid == self->thread_id || tid == TID_NULL) {
6465                 assert(self->task == task);
6466
6467                 thread_reference(self);
6468
6469                 return self;
6470         }
6471
6472         task_lock(task);
6473
6474         queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
6475                 if (iter_thread->thread_id == tid) {
6476                         found_thread = iter_thread;
6477                         thread_reference(found_thread);
6478                         break;
6479                 }
6480         }
6481
6482         task_unlock(task);
6483
6484         return found_thread;
6485 }
6486
6487 int
6488 pid_from_task(task_t task)
6489 {
6490         int pid = -1;
6491
6492         if (task->bsd_info) {
6493                 pid = proc_pid(task->bsd_info);
6494         } else {
6495                 pid = task_pid(task);
6496         }
6497
6498         return pid;
6499 }
6500
6501 /*
6502  * Control the CPU usage monitor for a task.
6503  */
6504 kern_return_t
6505 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
6506 {
6507         int error = KERN_SUCCESS;
6508
6509         if (*flags & CPUMON_MAKE_FATAL) {
6510                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
6511         } else {
6512                 error = KERN_INVALID_ARGUMENT;
6513         }
6514
6515         return error;
6516 }
6517
6518 /*
6519  * Control the wakeups monitor for a task.
6520  */
6521 kern_return_t
6522 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
6523 {
6524         ledger_t ledger = task->ledger;
6525
6526         task_lock(task);
6527         if (*flags & WAKEMON_GET_PARAMS) {
6528                 ledger_amount_t limit;
6529                 uint64_t                period;
6530
6531                 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
6532                 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
6533
6534                 if (limit != LEDGER_LIMIT_INFINITY) {
6535                         /*
6536                          * An active limit means the wakeups monitor is enabled.
6537                          */
6538                         *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
6539                         *flags = WAKEMON_ENABLE;
6540                         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
6541                                 *flags |= WAKEMON_MAKE_FATAL;
6542                         }
6543                 } else {
6544                         *flags = WAKEMON_DISABLE;
6545                         *rate_hz = -1;
6546                 }
6547
6548                 /*
6549                  * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
6550                  */
6551                 task_unlock(task);
6552                 return KERN_SUCCESS;
6553         }
6554
6555         if (*flags & WAKEMON_ENABLE) {
6556                 if (*flags & WAKEMON_SET_DEFAULTS) {
6557                         *rate_hz = task_wakeups_monitor_rate;
6558                 }
6559
6560 #ifndef CONFIG_NOMONITORS
6561                 if (*flags & WAKEMON_MAKE_FATAL) {
6562                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
6563                 }
6564 #endif /* CONFIG_NOMONITORS */
6565
6566                 if (*rate_hz <= 0) {
6567                         task_unlock(task);
6568                         return KERN_INVALID_ARGUMENT;
6569                 }
6570
6571 #ifndef CONFIG_NOMONITORS
6572                 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
6573                     task_wakeups_monitor_ustackshots_trigger_pct);
6574                 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
6575                 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
6576 #endif /* CONFIG_NOMONITORS */
6577         } else if (*flags & WAKEMON_DISABLE) {
6578                 /*
6579                  * Caller wishes to disable wakeups monitor on the task.
6580                  *
6581                  * Disable telemetry if it was triggered by the wakeups monitor, and
6582                  * remove the limit & callback on the wakeups ledger entry.
6583                  */
6584 #if CONFIG_TELEMETRY
6585                 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
6586 #endif
6587                 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
6588                 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
6589         }
6590
6591         task_unlock(task);
6592         return KERN_SUCCESS;
6593 }
6594
6595 void
6596 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
6597 {
6598         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
6599 #if CONFIG_TELEMETRY
6600                 /*
6601                  * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
6602                  * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
6603                  */
6604                 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
6605 #endif
6606                 return;
6607         }
6608
6609 #if CONFIG_TELEMETRY
6610         /*
6611          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
6612          * exceeded the limit, turn telemetry off for the task.
6613          */
6614         telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
6615 #endif
6616
6617         if (warning == 0) {
6618                 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
6619         }
6620 }
6621
6622 void __attribute__((noinline))
6623 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
6624 {
6625         task_t                      task        = current_task();
6626         int                         pid         = 0;
6627         const char                  *procname   = "unknown";
6628         boolean_t                   fatal;
6629         kern_return_t               kr;
6630 #ifdef EXC_RESOURCE_MONITORS
6631         mach_exception_data_type_t  code[EXCEPTION_CODE_MAX];
6632 #endif /* EXC_RESOURCE_MONITORS */
6633         struct ledger_entry_info    lei;
6634
6635 #ifdef MACH_BSD
6636         pid = proc_selfpid();
6637         if (task->bsd_info != NULL) {
6638                 procname = proc_name_address(current_task()->bsd_info);
6639         }
6640 #endif
6641
6642         ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
6643
6644         /*
6645          * Disable the exception notification so we don't overwhelm
6646          * the listener with an endless stream of redundant exceptions.
6647          * TODO: detect whether another thread is already reporting the violation.
6648          */
6649         uint32_t flags = WAKEMON_DISABLE;
6650         task_wakeups_monitor_ctl(task, &flags, NULL);
6651
6652         fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
6653         trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
6654         os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
6655             "over ~%llu seconds, averaging %llu wakes / second and "
6656             "violating a %slimit of %llu wakes over %llu seconds.\n",
6657             procname, pid,
6658             lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
6659             lei.lei_last_refill == 0 ? 0 :
6660             (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
6661             fatal ? "FATAL " : "",
6662             lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
6663
6664         kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
6665             fatal ? kRNFatalLimitFlag : 0);
6666         if (kr) {
6667                 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
6668         }
6669
6670 #ifdef EXC_RESOURCE_MONITORS
6671         if (disable_exc_resource) {
6672                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
6673                     "supressed by a boot-arg\n", procname, pid);
6674                 return;
6675         }
6676         if (audio_active) {
6677                 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
6678                     "supressed due to audio playback\n", procname, pid);
6679                 return;
6680         }
6681         if (lei.lei_last_refill == 0) {
6682                 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
6683                     "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
6684         }
6685
6686         code[0] = code[1] = 0;
6687         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
6688         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
6689         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
6690             NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
6691         EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
6692             lei.lei_last_refill);
6693         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
6694             NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
6695         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6696 #endif /* EXC_RESOURCE_MONITORS */
6697
6698         if (fatal) {
6699                 task_terminate_internal(task);
6700         }
6701 }
6702
6703 static boolean_t
6704 global_update_logical_writes(int64_t io_delta, int64_t *global_write_count)
6705 {
6706         int64_t old_count, new_count;
6707         boolean_t needs_telemetry;
6708
6709         do {
6710                 new_count = old_count = *global_write_count;
6711                 new_count += io_delta;
6712                 if (new_count >= io_telemetry_limit) {
6713                         new_count = 0;
6714                         needs_telemetry = TRUE;
6715                 } else {
6716                         needs_telemetry = FALSE;
6717                 }
6718         } while (!OSCompareAndSwap64(old_count, new_count, global_write_count));
6719         return needs_telemetry;
6720 }
6721
6722 void
6723 task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
6724 {
6725         int64_t io_delta = 0;
6726         int64_t * global_counter_to_update;
6727         boolean_t needs_telemetry = FALSE;
6728         boolean_t is_external_device = FALSE;
6729         int ledger_to_update = 0;
6730         struct task_writes_counters * writes_counters_to_update;
6731
6732         if ((!task) || (!io_size) || (!vp)) {
6733                 return;
6734         }
6735
6736         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
6737             task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
6738         DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
6739
6740         // Is the drive backing this vnode internal or external to the system?
6741         if (vnode_isonexternalstorage(vp) == false) {
6742                 global_counter_to_update = &global_logical_writes_count;
6743                 ledger_to_update = task_ledgers.logical_writes;
6744                 writes_counters_to_update = &task->task_writes_counters_internal;
6745                 is_external_device = FALSE;
6746         } else {
6747                 global_counter_to_update = &global_logical_writes_to_external_count;
6748                 ledger_to_update = task_ledgers.logical_writes_to_external;
6749                 writes_counters_to_update = &task->task_writes_counters_external;
6750                 is_external_device = TRUE;
6751         }
6752
6753         switch (flags) {
6754         case TASK_WRITE_IMMEDIATE:
6755                 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_immediate_writes));
6756                 ledger_credit(task->ledger, ledger_to_update, io_size);
6757                 if (!is_external_device) {
6758                         coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
6759                 }
6760                 break;
6761         case TASK_WRITE_DEFERRED:
6762                 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_deferred_writes));
6763                 ledger_credit(task->ledger, ledger_to_update, io_size);
6764                 if (!is_external_device) {
6765                         coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
6766                 }
6767                 break;
6768         case TASK_WRITE_INVALIDATED:
6769                 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_invalidated_writes));
6770                 ledger_debit(task->ledger, ledger_to_update, io_size);
6771                 if (!is_external_device) {
6772                         coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
6773                 }
6774                 break;
6775         case TASK_WRITE_METADATA:
6776                 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_metadata_writes));
6777                 ledger_credit(task->ledger, ledger_to_update, io_size);
6778                 if (!is_external_device) {
6779                         coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
6780                 }
6781                 break;
6782         }
6783
6784         io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
6785         if (io_telemetry_limit != 0) {
6786                 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
6787                 needs_telemetry = global_update_logical_writes(io_delta, global_counter_to_update);
6788                 if (needs_telemetry && !is_external_device) {
6789                         act_set_io_telemetry_ast(current_thread());
6790                 }
6791         }
6792 }
6793
6794 /*
6795  * Control the I/O monitor for a task.
6796  */
6797 kern_return_t
6798 task_io_monitor_ctl(task_t task, uint32_t *flags)
6799 {
6800         ledger_t ledger = task->ledger;
6801
6802         task_lock(task);
6803         if (*flags & IOMON_ENABLE) {
6804                 /* Configure the physical I/O ledger */
6805                 ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
6806                 ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
6807         } else if (*flags & IOMON_DISABLE) {
6808                 /*
6809                  * Caller wishes to disable I/O monitor on the task.
6810                  */
6811                 ledger_disable_refill(ledger, task_ledgers.physical_writes);
6812                 ledger_disable_callback(ledger, task_ledgers.physical_writes);
6813         }
6814
6815         task_unlock(task);
6816         return KERN_SUCCESS;
6817 }
6818
6819 void
6820 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
6821 {
6822         if (warning == 0) {
6823                 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
6824         }
6825 }
6826
6827 void __attribute__((noinline))
6828 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
6829 {
6830         int                             pid = 0;
6831         task_t                          task = current_task();
6832 #ifdef EXC_RESOURCE_MONITORS
6833         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
6834 #endif /* EXC_RESOURCE_MONITORS */
6835         struct ledger_entry_info        lei;
6836         kern_return_t                   kr;
6837
6838 #ifdef MACH_BSD
6839         pid = proc_selfpid();
6840 #endif
6841         /*
6842          * Get the ledger entry info. We need to do this before disabling the exception
6843          * to get correct values for all fields.
6844          */
6845         switch (flavor) {
6846         case FLAVOR_IO_PHYSICAL_WRITES:
6847                 ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
6848                 break;
6849         }
6850
6851
6852         /*
6853          * Disable the exception notification so we don't overwhelm
6854          * the listener with an endless stream of redundant exceptions.
6855          * TODO: detect whether another thread is already reporting the violation.
6856          */
6857         uint32_t flags = IOMON_DISABLE;
6858         task_io_monitor_ctl(task, &flags);
6859
6860         if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
6861                 trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
6862         }
6863         os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
6864             pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
6865
6866         kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
6867         if (kr) {
6868                 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
6869         }
6870
6871 #ifdef EXC_RESOURCE_MONITORS
6872         code[0] = code[1] = 0;
6873         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
6874         EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
6875         EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
6876         EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
6877         EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
6878         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
6879 #endif /* EXC_RESOURCE_MONITORS */
6880 }
6881
6882 /* Placeholders for the task set/get voucher interfaces */
6883 kern_return_t
6884 task_get_mach_voucher(
6885         task_t                  task,
6886         mach_voucher_selector_t __unused which,
6887         ipc_voucher_t           *voucher)
6888 {
6889         if (TASK_NULL == task) {
6890                 return KERN_INVALID_TASK;
6891         }
6892
6893         *voucher = NULL;
6894         return KERN_SUCCESS;
6895 }
6896
6897 kern_return_t
6898 task_set_mach_voucher(
6899         task_t                  task,
6900         ipc_voucher_t           __unused voucher)
6901 {
6902         if (TASK_NULL == task) {
6903                 return KERN_INVALID_TASK;
6904         }
6905
6906         return KERN_SUCCESS;
6907 }
6908
6909 kern_return_t
6910 task_swap_mach_voucher(
6911         __unused task_t         task,
6912         __unused ipc_voucher_t  new_voucher,
6913         ipc_voucher_t          *in_out_old_voucher)
6914 {
6915         /*
6916          * Currently this function is only called from a MIG generated
6917          * routine which doesn't release the reference on the voucher
6918          * addressed by in_out_old_voucher. To avoid leaking this reference,
6919          * a call to release it has been added here.
6920          */
6921         ipc_voucher_release(*in_out_old_voucher);
6922         return KERN_NOT_SUPPORTED;
6923 }
6924
6925 void
6926 task_set_gpu_denied(task_t task, boolean_t denied)
6927 {
6928         task_lock(task);
6929
6930         if (denied) {
6931                 task->t_flags |= TF_GPU_DENIED;
6932         } else {
6933                 task->t_flags &= ~TF_GPU_DENIED;
6934         }
6935
6936         task_unlock(task);
6937 }
6938
6939 boolean_t
6940 task_is_gpu_denied(task_t task)
6941 {
6942         /* We don't need the lock to read this flag */
6943         return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
6944 }
6945
6946
6947 uint64_t
6948 get_task_memory_region_count(task_t task)
6949 {
6950         vm_map_t map;
6951         map = (task == kernel_task) ? kernel_map: task->map;
6952         return (uint64_t)get_map_nentries(map);
6953 }
6954
6955 static void
6956 kdebug_trace_dyld_internal(uint32_t base_code,
6957     struct dyld_kernel_image_info *info)
6958 {
6959         static_assert(sizeof(info->uuid) >= 16);
6960
6961 #if defined(__LP64__)
6962         uint64_t *uuid = (uint64_t *)&(info->uuid);
6963
6964         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6965             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
6966             uuid[1], info->load_addr,
6967             (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
6968             0);
6969         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6970             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
6971             (uint64_t)info->fsobjid.fid_objno |
6972             ((uint64_t)info->fsobjid.fid_generation << 32),
6973             0, 0, 0, 0);
6974 #else /* defined(__LP64__) */
6975         uint32_t *uuid = (uint32_t *)&(info->uuid);
6976
6977         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6978             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
6979             uuid[1], uuid[2], uuid[3], 0);
6980         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6981             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
6982             (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
6983             info->fsobjid.fid_objno, 0);
6984         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6985             KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
6986             info->fsobjid.fid_generation, 0, 0, 0, 0);
6987 #endif /* !defined(__LP64__) */
6988 }
6989
6990 static kern_return_t
6991 kdebug_trace_dyld(task_t task, uint32_t base_code,
6992     vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
6993 {
6994         kern_return_t kr;
6995         dyld_kernel_image_info_array_t infos;
6996         vm_map_offset_t map_data;
6997         vm_offset_t data;
6998
6999         if (!infos_copy) {
7000                 return KERN_INVALID_ADDRESS;
7001         }
7002
7003         if (!kdebug_enable ||
7004             !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0))) {
7005                 vm_map_copy_discard(infos_copy);
7006                 return KERN_SUCCESS;
7007         }
7008
7009         if (task == NULL || task != current_task()) {
7010                 return KERN_INVALID_TASK;
7011         }
7012
7013         kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
7014         if (kr != KERN_SUCCESS) {
7015                 return kr;
7016         }
7017
7018         infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
7019
7020         for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
7021                 kdebug_trace_dyld_internal(base_code, &(infos[i]));
7022         }
7023
7024         data = CAST_DOWN(vm_offset_t, map_data);
7025         mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
7026         return KERN_SUCCESS;
7027 }
7028
7029 kern_return_t
7030 task_register_dyld_image_infos(task_t task,
7031     dyld_kernel_image_info_array_t infos_copy,
7032     mach_msg_type_number_t infos_len)
7033 {
7034         return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
7035                    (vm_map_copy_t)infos_copy, infos_len);
7036 }
7037
7038 kern_return_t
7039 task_unregister_dyld_image_infos(task_t task,
7040     dyld_kernel_image_info_array_t infos_copy,
7041     mach_msg_type_number_t infos_len)
7042 {
7043         return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
7044                    (vm_map_copy_t)infos_copy, infos_len);
7045 }
7046
7047 kern_return_t
7048 task_get_dyld_image_infos(__unused task_t task,
7049     __unused dyld_kernel_image_info_array_t * dyld_images,
7050     __unused mach_msg_type_number_t * dyld_imagesCnt)
7051 {
7052         return KERN_NOT_SUPPORTED;
7053 }
7054
7055 kern_return_t
7056 task_register_dyld_shared_cache_image_info(task_t task,
7057     dyld_kernel_image_info_t cache_img,
7058     __unused boolean_t no_cache,
7059     __unused boolean_t private_cache)
7060 {
7061         if (task == NULL || task != current_task()) {
7062                 return KERN_INVALID_TASK;
7063         }
7064
7065         kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
7066         return KERN_SUCCESS;
7067 }
7068
7069 kern_return_t
7070 task_register_dyld_set_dyld_state(__unused task_t task,
7071     __unused uint8_t dyld_state)
7072 {
7073         return KERN_NOT_SUPPORTED;
7074 }
7075
7076 kern_return_t
7077 task_register_dyld_get_process_state(__unused task_t task,
7078     __unused dyld_kernel_process_info_t * dyld_process_state)
7079 {
7080         return KERN_NOT_SUPPORTED;
7081 }
7082
7083 kern_return_t
7084 task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
7085     task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
7086 {
7087 #if MONOTONIC
7088         task_t task = (task_t)task_insp;
7089         kern_return_t kr = KERN_SUCCESS;
7090         mach_msg_type_number_t size;
7091
7092         if (task == TASK_NULL) {
7093                 return KERN_INVALID_ARGUMENT;
7094         }
7095
7096         size = *size_in_out;
7097
7098         switch (flavor) {
7099         case TASK_INSPECT_BASIC_COUNTS: {
7100                 struct task_inspect_basic_counts *bc;
7101                 uint64_t task_counts[MT_CORE_NFIXED] = { 0 };
7102
7103                 if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
7104                         kr = KERN_INVALID_ARGUMENT;
7105                         break;
7106                 }
7107
7108                 mt_fixed_task_counts(task, task_counts);
7109                 bc = (struct task_inspect_basic_counts *)info_out;
7110 #ifdef MT_CORE_INSTRS
7111                 bc->instructions = task_counts[MT_CORE_INSTRS];
7112 #else /* defined(MT_CORE_INSTRS) */
7113                 bc->instructions = 0;
7114 #endif /* !defined(MT_CORE_INSTRS) */
7115                 bc->cycles = task_counts[MT_CORE_CYCLES];
7116                 size = TASK_INSPECT_BASIC_COUNTS_COUNT;
7117                 break;
7118         }
7119         default:
7120                 kr = KERN_INVALID_ARGUMENT;
7121                 break;
7122         }
7123
7124         if (kr == KERN_SUCCESS) {
7125                 *size_in_out = size;
7126         }
7127         return kr;
7128 #else /* MONOTONIC */
7129 #pragma unused(task_insp, flavor, info_out, size_in_out)
7130         return KERN_NOT_SUPPORTED;
7131 #endif /* !MONOTONIC */
7132 }
7133
7134 #if CONFIG_SECLUDED_MEMORY
7135 int num_tasks_can_use_secluded_mem = 0;
7136
7137 void
7138 task_set_can_use_secluded_mem(
7139         task_t          task,
7140         boolean_t       can_use_secluded_mem)
7141 {
7142         if (!task->task_could_use_secluded_mem) {
7143                 return;
7144         }
7145         task_lock(task);
7146         task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
7147         task_unlock(task);
7148 }
7149
7150 void
7151 task_set_can_use_secluded_mem_locked(
7152         task_t          task,
7153         boolean_t       can_use_secluded_mem)
7154 {
7155         assert(task->task_could_use_secluded_mem);
7156         if (can_use_secluded_mem &&
7157             secluded_for_apps && /* global boot-arg */
7158             !task->task_can_use_secluded_mem) {
7159                 assert(num_tasks_can_use_secluded_mem >= 0);
7160                 OSAddAtomic(+1,
7161                     (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
7162                 task->task_can_use_secluded_mem = TRUE;
7163         } else if (!can_use_secluded_mem &&
7164             task->task_can_use_secluded_mem) {
7165                 assert(num_tasks_can_use_secluded_mem > 0);
7166                 OSAddAtomic(-1,
7167                     (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
7168                 task->task_can_use_secluded_mem = FALSE;
7169         }
7170 }
7171
7172 void
7173 task_set_could_use_secluded_mem(
7174         task_t          task,
7175         boolean_t       could_use_secluded_mem)
7176 {
7177         task->task_could_use_secluded_mem = could_use_secluded_mem;
7178 }
7179
7180 void
7181 task_set_could_also_use_secluded_mem(
7182         task_t          task,
7183         boolean_t       could_also_use_secluded_mem)
7184 {
7185         task->task_could_also_use_secluded_mem = could_also_use_secluded_mem;
7186 }
7187
7188 boolean_t
7189 task_can_use_secluded_mem(
7190         task_t          task,
7191         boolean_t       is_alloc)
7192 {
7193         if (task->task_can_use_secluded_mem) {
7194                 assert(task->task_could_use_secluded_mem);
7195                 assert(num_tasks_can_use_secluded_mem > 0);
7196                 return TRUE;
7197         }
7198         if (task->task_could_also_use_secluded_mem &&
7199             num_tasks_can_use_secluded_mem > 0) {
7200                 assert(num_tasks_can_use_secluded_mem > 0);
7201                 return TRUE;
7202         }
7203
7204         /*
7205          * If a single task is using more than some amount of
7206          * memory, allow it to dip into secluded and also begin
7207          * suppression of secluded memory until the tasks exits.
7208          */
7209         if (is_alloc && secluded_shutoff_trigger != 0) {
7210                 uint64_t phys_used = get_task_phys_footprint(task);
7211                 if (phys_used > secluded_shutoff_trigger) {
7212                         start_secluded_suppression(task);
7213                         return TRUE;
7214                 }
7215         }
7216
7217         return FALSE;
7218 }
7219
7220 boolean_t
7221 task_could_use_secluded_mem(
7222         task_t  task)
7223 {
7224         return task->task_could_use_secluded_mem;
7225 }
7226
7227 boolean_t
7228 task_could_also_use_secluded_mem(
7229         task_t  task)
7230 {
7231         return task->task_could_also_use_secluded_mem;
7232 }
7233 #endif /* CONFIG_SECLUDED_MEMORY */
7234
7235 queue_head_t *
7236 task_io_user_clients(task_t task)
7237 {
7238         return &task->io_user_clients;
7239 }
7240
7241 void
7242 task_set_message_app_suspended(task_t task, boolean_t enable)
7243 {
7244         task->message_app_suspended = enable;
7245 }
7246
7247 void
7248 task_copy_fields_for_exec(task_t dst_task, task_t src_task)
7249 {
7250         dst_task->vtimers = src_task->vtimers;
7251 }
7252
7253 #if DEVELOPMENT || DEBUG
7254 int vm_region_footprint = 0;
7255 #endif /* DEVELOPMENT || DEBUG */
7256
7257 boolean_t
7258 task_self_region_footprint(void)
7259 {
7260 #if DEVELOPMENT || DEBUG
7261         if (vm_region_footprint) {
7262                 /* system-wide override */
7263                 return TRUE;
7264         }
7265 #endif /* DEVELOPMENT || DEBUG */
7266         return current_task()->task_region_footprint;
7267 }
7268
7269 void
7270 task_self_region_footprint_set(
7271         boolean_t newval)
7272 {
7273         task_t  curtask;
7274
7275         curtask = current_task();
7276         task_lock(curtask);
7277         if (newval) {
7278                 curtask->task_region_footprint = TRUE;
7279         } else {
7280                 curtask->task_region_footprint = FALSE;
7281         }
7282         task_unlock(curtask);
7283 }
7284
7285 void
7286 task_set_darkwake_mode(task_t task, boolean_t set_mode)
7287 {
7288         assert(task);
7289
7290         task_lock(task);
7291
7292         if (set_mode) {
7293                 task->t_flags |= TF_DARKWAKE_MODE;
7294         } else {
7295                 task->t_flags &= ~(TF_DARKWAKE_MODE);
7296         }
7297
7298         task_unlock(task);
7299 }
7300
7301 boolean_t
7302 task_get_darkwake_mode(task_t task)
7303 {
7304         assert(task);
7305         return (task->t_flags & TF_DARKWAKE_MODE) != 0;
7306 }
7307
7308 kern_return_t
7309 task_get_exc_guard_behavior(
7310         task_t task,
7311         task_exc_guard_behavior_t *behaviorp)
7312 {
7313         if (task == TASK_NULL) {
7314                 return KERN_INVALID_TASK;
7315         }
7316         *behaviorp = task->task_exc_guard;
7317         return KERN_SUCCESS;
7318 }
7319
7320 #ifndef TASK_EXC_GUARD_ALL
7321 /* Temporary define until two branches are merged */
7322 #define TASK_EXC_GUARD_ALL (TASK_EXC_GUARD_VM_ALL | 0xf0)
7323 #endif
7324
7325 kern_return_t
7326 task_set_exc_guard_behavior(
7327         task_t task,
7328         task_exc_guard_behavior_t behavior)
7329 {
7330         if (task == TASK_NULL) {
7331                 return KERN_INVALID_TASK;
7332         }
7333         if (behavior & ~TASK_EXC_GUARD_ALL) {
7334                 return KERN_INVALID_VALUE;
7335         }
7336         task->task_exc_guard = behavior;
7337         return KERN_SUCCESS;
7338 }
7339
7340 #if __arm64__
7341 extern int legacy_footprint_entitlement_mode;
7342 extern void memorystatus_act_on_legacy_footprint_entitlement(proc_t, boolean_t);
7343 extern void memorystatus_act_on_ios13extended_footprint_entitlement(proc_t);
7344
7345 void
7346 task_set_legacy_footprint(
7347         task_t task)
7348 {
7349         task_lock(task);
7350         task->task_legacy_footprint = TRUE;
7351         task_unlock(task);
7352 }
7353
7354 void
7355 task_set_extra_footprint_limit(
7356         task_t task)
7357 {
7358         if (task->task_extra_footprint_limit) {
7359                 return;
7360         }
7361         task_lock(task);
7362         if (task->task_extra_footprint_limit) {
7363                 task_unlock(task);
7364                 return;
7365         }
7366         task->task_extra_footprint_limit = TRUE;
7367         task_unlock(task);
7368         memorystatus_act_on_legacy_footprint_entitlement(task->bsd_info, TRUE);
7369 }
7370
7371 void
7372 task_set_ios13extended_footprint_limit(
7373         task_t task)
7374 {
7375         if (task->task_ios13extended_footprint_limit) {
7376                 return;
7377         }
7378         task_lock(task);
7379         if (task->task_ios13extended_footprint_limit) {
7380                 task_unlock(task);
7381                 return;
7382         }
7383         task->task_ios13extended_footprint_limit = TRUE;
7384         task_unlock(task);
7385         memorystatus_act_on_ios13extended_footprint_entitlement(task->bsd_info);
7386 }
7387 #endif /* __arm64__ */
7388
7389 static inline ledger_amount_t
7390 task_ledger_get_balance(
7391         ledger_t        ledger,
7392         int             ledger_idx)
7393 {
7394         ledger_amount_t amount;
7395         amount = 0;
7396         ledger_get_balance(ledger, ledger_idx, &amount);
7397         return amount;
7398 }
7399
7400 /*
7401  * Gather the amount of memory counted in a task's footprint due to
7402  * being in a specific set of ledgers.
7403  */
7404 void
7405 task_ledgers_footprint(
7406         ledger_t        ledger,
7407         ledger_amount_t *ledger_resident,
7408         ledger_amount_t *ledger_compressed)
7409 {
7410         *ledger_resident = 0;
7411         *ledger_compressed = 0;
7412
7413         /* purgeable non-volatile memory */
7414         *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile);
7415         *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.purgeable_nonvolatile_compressed);
7416
7417         /* "default" tagged memory */
7418         *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint);
7419         *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.tagged_footprint_compressed);
7420
7421         /* "network" currently never counts in the footprint... */
7422
7423         /* "media" tagged memory */
7424         *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.media_footprint);
7425         *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.media_footprint_compressed);
7426
7427         /* "graphics" tagged memory */
7428         *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint);
7429         *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.graphics_footprint_compressed);
7430
7431         /* "neural" tagged memory */
7432         *ledger_resident += task_ledger_get_balance(ledger, task_ledgers.neural_footprint);
7433         *ledger_compressed += task_ledger_get_balance(ledger, task_ledgers.neural_footprint_compressed);
7434 }
7435
7436 void
7437 task_set_memory_ownership_transfer(
7438         task_t    task,
7439         boolean_t value)
7440 {
7441         task_lock(task);
7442         task->task_can_transfer_memory_ownership = value;
7443         task_unlock(task);
7444 }
7445
7446 void
7447 task_copy_vmobjects(task_t task, vm_object_query_t query, int len, int64_t* num)
7448 {
7449         vm_object_t find_vmo;
7450         int64_t size = 0;
7451
7452         task_objq_lock(task);
7453         if (query != NULL) {
7454                 queue_iterate(&task->task_objq, find_vmo, vm_object_t, task_objq)
7455                 {
7456                         int byte_size;
7457                         vm_object_query_t p = &query[size++];
7458
7459                         p->object_id = (vm_object_id_t) VM_KERNEL_ADDRPERM(find_vmo);
7460                         p->virtual_size = find_vmo->internal ? find_vmo->vo_size : 0;
7461                         p->resident_size = find_vmo->resident_page_count * PAGE_SIZE;
7462                         p->wired_size = find_vmo->wired_page_count * PAGE_SIZE;
7463                         p->reusable_size = find_vmo->reusable_page_count * PAGE_SIZE;
7464                         p->vo_no_footprint = find_vmo->vo_no_footprint;
7465                         p->vo_ledger_tag = find_vmo->vo_ledger_tag;
7466                         p->purgable = find_vmo->purgable;
7467
7468                         if (find_vmo->internal && find_vmo->pager_created && find_vmo->pager != NULL) {
7469                                 p->compressed_size = vm_compressor_pager_get_count(find_vmo->pager) * PAGE_SIZE;
7470                         } else {
7471                                 p->compressed_size = 0;
7472                         }
7473
7474                         /* make sure to not overrun */
7475                         byte_size = (int) size * sizeof(vm_object_query_data_t);
7476                         if ((int)(byte_size + sizeof(vm_object_query_data_t)) > len) {
7477                                 break;
7478                         }
7479                 }
7480         } else {
7481                 size = task->task_owned_objects;
7482         }
7483         task_objq_unlock(task);
7484
7485         *num = size;
7486 }