osfmk/kern/task.c

   1 /*
   2  * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  *      File:   kern/task.c
  58  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
  59  *              David Black
  60  *
  61  *      Task management primitives implementation.
  62  */
  63 /*
  64  * Copyright (c) 1993 The University of Utah and
  65  * the Computer Systems Laboratory (CSL).  All rights reserved.
  66  *
  67  * Permission to use, copy, modify and distribute this software and its
  68  * documentation is hereby granted, provided that both the copyright
  69  * notice and this permission notice appear in all copies of the
  70  * software, derivative works or modified versions, and any portions
  71  * thereof, and that both notices appear in supporting documentation.
  72  *
  73  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  74  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  75  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  76  *
  77  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  78  * improvements that they make and grant CSL redistribution rights.
  79  *
  80  */
  81 /*
  82  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
  83  * support for mandatory and extensible security protections.  This notice
  84  * is included in support of clause 2.2 (b) of the Apple Public License,
  85  * Version 2.0.
  86  * Copyright (c) 2005 SPARTA, Inc.
  87  */
  88
  89 #include <mach/mach_types.h>
  90 #include <mach/boolean.h>
  91 #include <mach/host_priv.h>
  92 #include <mach/machine/vm_types.h>
  93 #include <mach/vm_param.h>
  94 #include <mach/mach_vm.h>
  95 #include <mach/semaphore.h>
  96 #include <mach/task_info.h>
  97 #include <mach/task_inspect.h>
  98 #include <mach/task_special_ports.h>
  99 #include <mach/sdt.h>
 100
 101 #include <ipc/ipc_importance.h>
 102 #include <ipc/ipc_types.h>
 103 #include <ipc/ipc_space.h>
 104 #include <ipc/ipc_entry.h>
 105 #include <ipc/ipc_hash.h>
 106
 107 #include <kern/kern_types.h>
 108 #include <kern/mach_param.h>
 109 #include <kern/misc_protos.h>
 110 #include <kern/task.h>
 111 #include <kern/thread.h>
 112 #include <kern/coalition.h>
 113 #include <kern/zalloc.h>
 114 #include <kern/kalloc.h>
 115 #include <kern/kern_cdata.h>
 116 #include <kern/processor.h>
 117 #include <kern/sched_prim.h>    /* for thread_wakeup */
 118 #include <kern/ipc_tt.h>
 119 #include <kern/host.h>
 120 #include <kern/clock.h>
 121 #include <kern/timer.h>
 122 #include <kern/assert.h>
 123 #include <kern/sync_lock.h>
 124 #include <kern/affinity.h>
 125 #include <kern/exc_resource.h>
 126 #include <kern/machine.h>
 127 #include <kern/policy_internal.h>
 128
 129 #include <corpses/task_corpse.h>
 130 #if CONFIG_TELEMETRY
 131 #include <kern/telemetry.h>
 132 #endif
 133
 134 #if MONOTONIC
 135 #include <kern/monotonic.h>
 136 #include <machine/monotonic.h>
 137 #endif /* MONOTONIC */
 138
 139 #include <os/log.h>
 140
 141 #include <vm/pmap.h>
 142 #include <vm/vm_map.h>
 143 #include <vm/vm_kern.h>         /* for kernel_map, ipc_kernel_map */
 144 #include <vm/vm_pageout.h>
 145 #include <vm/vm_protos.h>
 146 #include <vm/vm_purgeable_internal.h>
 147
 148 #include <sys/resource.h>
 149 #include <sys/signalvar.h> /* for coredump */
 150
 151 /*
 152  * Exported interfaces
 153  */
 154
 155 #include <mach/task_server.h>
 156 #include <mach/mach_host_server.h>
 157 #include <mach/host_security_server.h>
 158 #include <mach/mach_port_server.h>
 159
 160 #include <vm/vm_shared_region.h>
 161
 162 #include <libkern/OSDebug.h>
 163 #include <libkern/OSAtomic.h>
 164
 165 #if CONFIG_ATM
 166 #include <atm/atm_internal.h>
 167 #endif
 168
 169 #include <kern/sfi.h>           /* picks up ledger.h */
 170
 171 #if CONFIG_MACF
 172 #include <security/mac_mach_internal.h>
 173 #endif
 174
 175 #if KPERF
 176 extern int kpc_force_all_ctrs(task_t, int);
 177 #endif
 178
 179 task_t                  kernel_task;
 180 zone_t                  task_zone;
 181 lck_attr_t      task_lck_attr;
 182 lck_grp_t       task_lck_grp;
 183 lck_grp_attr_t  task_lck_grp_attr;
 184
 185 extern int exc_via_corpse_forking;
 186 extern int corpse_for_fatal_memkill;
 187
 188 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
 189 int audio_active = 0;
 190
 191 zinfo_usage_store_t tasks_tkm_private;
 192 zinfo_usage_store_t tasks_tkm_shared;
 193
 194 /* A container to accumulate statistics for expired tasks */
 195 expired_task_statistics_t               dead_task_statistics;
 196 lck_spin_t              dead_task_statistics_lock;
 197
 198 ledger_template_t task_ledger_template = NULL;
 199
 200 struct _task_ledger_indices task_ledgers __attribute__((used)) =
 201         {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
 202 #if !CONFIG_EMBEDDED
 203          { 0 /* initialized at runtime */},
 204 #endif /* !CONFIG_EMBEDDED */
 205          -1, -1,
 206          -1, -1,
 207          -1, -1,
 208         };
 209
 210 /* System sleep state */
 211 boolean_t tasks_suspend_state;
 212
 213
 214 void init_task_ledgers(void);
 215 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 216 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 217 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
 218 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
 219 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
 220 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
 221
 222 kern_return_t task_suspend_internal(task_t);
 223 kern_return_t task_resume_internal(task_t);
 224 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
 225
 226 extern kern_return_t iokit_task_terminate(task_t task);
 227
 228 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
 229 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
 230 extern kern_return_t thread_resume(thread_t thread);
 231
 232 // Warn tasks when they hit 80% of their memory limit.
 233 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
 234
 235 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT              150 /* wakeups per second */
 236 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL   300 /* in seconds. */
 237
 238 /*
 239  * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
 240  *
 241  * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
 242  *  stacktraces, aka micro-stackshots)
 243  */
 244 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER        70
 245
 246 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
 247 int task_wakeups_monitor_rate;     /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
 248
 249 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 250
 251 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
 252
 253 ledger_amount_t max_task_footprint = 0;  /* Per-task limit on physical memory consumption in bytes     */
 254 int max_task_footprint_warning_level = 0;  /* Per-task limit warning percentage */
 255 int max_task_footprint_mb = 0;  /* Per-task limit on physical memory consumption in megabytes */
 256
 257 /* I/O Monitor Limits */
 258 #define IOMON_DEFAULT_LIMIT                     (20480ull)      /* MB of logical/physical I/O */
 259 #define IOMON_DEFAULT_INTERVAL                  (86400ull)      /* in seconds */
 260
 261 uint64_t task_iomon_limit_mb;           /* Per-task I/O monitor limit in MBs */
 262 uint64_t task_iomon_interval_secs;      /* Per-task I/O monitor interval in secs */
 263
 264 #define IO_TELEMETRY_DEFAULT_LIMIT              (10ll * 1024ll * 1024ll)
 265 int64_t io_telemetry_limit;                     /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
 266 int64_t global_logical_writes_count = 0;        /* Global count for logical writes */
 267 static boolean_t global_update_logical_writes(int64_t);
 268
 269 #if MACH_ASSERT
 270 int pmap_ledgers_panic = 1;
 271 #endif /* MACH_ASSERT */
 272
 273 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
 274
 275 #if CONFIG_COREDUMP
 276 int hwm_user_cores = 0; /* high watermark violations generate user core files */
 277 #endif
 278
 279 #ifdef MACH_BSD
 280 extern void     proc_getexecutableuuid(void *, unsigned char *, unsigned long);
 281 extern int      proc_pid(struct proc *p);
 282 extern int      proc_selfpid(void);
 283 extern char     *proc_name_address(struct proc *p);
 284 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 285 extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, int bufsize);
 286
 287 #if CONFIG_MEMORYSTATUS
 288 extern void     proc_memstat_terminated(struct proc* p, boolean_t set);
 289 extern void     memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
 290 extern void     memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
 291 extern boolean_t memorystatus_allowed_vm_map_fork(task_t task);
 292
 293 #if DEVELOPMENT || DEBUG
 294 extern void memorystatus_abort_vm_map_fork(task_t);
 295 #endif
 296
 297 #endif /* CONFIG_MEMORYSTATUS */
 298
 299 #endif /* MACH_BSD */
 300
 301 /* Forwards */
 302
 303 static void task_hold_locked(task_t task);
 304 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
 305 static void task_release_locked(task_t task);
 306
 307 static void task_synchronizer_destroy_all(task_t task);
 308
 309 void
 310 task_backing_store_privileged(
 311                         task_t task)
 312 {
 313         task_lock(task);
 314         task->priv_flags |= VM_BACKING_STORE_PRIV;
 315         task_unlock(task);
 316         return;
 317 }
 318
 319
 320 void
 321 task_set_64bit(
 322                 task_t task,
 323                 boolean_t is64bit)
 324 {
 325 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
 326         thread_t thread;
 327 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
 328
 329         task_lock(task);
 330
 331         if (is64bit) {
 332                 if (task_has_64BitAddr(task))
 333                         goto out;
 334                 task_set_64BitAddr(task);
 335         } else {
 336                 if ( !task_has_64BitAddr(task))
 337                         goto out;
 338                 task_clear_64BitAddr(task);
 339         }
 340         /* FIXME: On x86, the thread save state flavor can diverge from the
 341          * task's 64-bit feature flag due to the 32-bit/64-bit register save
 342          * state dichotomy. Since we can be pre-empted in this interval,
 343          * certain routines may observe the thread as being in an inconsistent
 344          * state with respect to its task's 64-bitness.
 345          */
 346
 347 #if defined(__x86_64__) || defined(__arm64__)
 348         queue_iterate(&task->threads, thread, thread_t, task_threads) {
 349                 thread_mtx_lock(thread);
 350                 machine_thread_switch_addrmode(thread);
 351                 thread_mtx_unlock(thread);
 352
 353 #if defined(__arm64__)
 354                 /* specifically, if running on H9 */
 355                 if (thread == current_thread()) {
 356                         uint64_t arg1, arg2;
 357                         int urgency;
 358                         spl_t spl = splsched();
 359                         /*
 360                          * This call tell that the current thread changed it's 32bitness.
 361                          * Other thread were no more on core when 32bitness was changed,
 362                          * but current_thread() is on core and the previous call to
 363                          * machine_thread_going_on_core() gave 32bitness which is now wrong.
 364                          *
 365                          * This is needed for bring-up, a different callback should be used
 366                          * in the future.
 367                          *
 368                          * TODO: Remove this callout when we no longer support 32-bit code on H9
 369                          */
 370                         thread_lock(thread);
 371                         urgency = thread_get_urgency(thread, &arg1, &arg2);
 372                         machine_thread_going_on_core(thread, urgency, 0, 0, mach_approximate_time());
 373                         thread_unlock(thread);
 374                         splx(spl);
 375                 }
 376 #endif /* defined(__arm64__) */
 377         }
 378 #endif /* defined(__x86_64__) || defined(__arm64__) */
 379
 380 out:
 381         task_unlock(task);
 382 }
 383
 384 void
 385 task_set_platform_binary(
 386                 task_t task,
 387                 boolean_t is_platform)
 388 {
 389         task_lock(task);
 390         if (is_platform) {
 391                 task->t_flags |= TF_PLATFORM;
 392         } else {
 393                 task->t_flags &= ~(TF_PLATFORM);
 394         }
 395         task_unlock(task);
 396 }
 397
 398 /*
 399  * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
 400  * Returns "false" if flag is already set, and "true" in other cases.
 401  */
 402 bool
 403 task_set_ca_client_wi(
 404                 task_t task,
 405                 boolean_t set_or_clear)
 406 {
 407         bool ret = true;
 408         task_lock(task);
 409         if (set_or_clear) {
 410                 /* Tasks can have only one CA_CLIENT work interval */
 411                 if (task->t_flags & TF_CA_CLIENT_WI)
 412                         ret = false;
 413                 else
 414                         task->t_flags |= TF_CA_CLIENT_WI;
 415         } else {
 416                 task->t_flags &= ~TF_CA_CLIENT_WI;
 417         }
 418         task_unlock(task);
 419         return ret;
 420 }
 421
 422 void
 423 task_set_dyld_info(
 424     task_t task,
 425     mach_vm_address_t addr,
 426     mach_vm_size_t size)
 427 {
 428         task_lock(task);
 429         task->all_image_info_addr = addr;
 430         task->all_image_info_size = size;
 431     task_unlock(task);
 432 }
 433
 434 void
 435 task_atm_reset(__unused task_t task) {
 436
 437 #if CONFIG_ATM
 438         if (task->atm_context != NULL) {
 439                  atm_task_descriptor_destroy(task->atm_context);
 440                  task->atm_context = NULL;
 441         }
 442 #endif
 443
 444 }
 445
 446 void
 447 task_bank_reset(__unused task_t task) {
 448
 449         if (task->bank_context != NULL) {
 450                  bank_task_destroy(task);
 451         }
 452 }
 453
 454 /*
 455  * NOTE: This should only be called when the P_LINTRANSIT
 456  *       flag is set (the proc_trans lock is held) on the
 457  *       proc associated with the task.
 458  */
 459 void
 460 task_bank_init(__unused task_t task) {
 461
 462         if (task->bank_context != NULL) {
 463                 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
 464         }
 465         bank_task_initialize(task);
 466 }
 467
 468 void
 469 task_set_did_exec_flag(task_t task)
 470 {
 471         task->t_procflags |= TPF_DID_EXEC;
 472 }
 473
 474 void
 475 task_clear_exec_copy_flag(task_t task)
 476 {
 477         task->t_procflags &= ~TPF_EXEC_COPY;
 478 }
 479
 480 /*
 481  * This wait event is t_procflags instead of t_flags because t_flags is volatile
 482  *
 483  * TODO: store the flags in the same place as the event
 484  * rdar://problem/28501994
 485  */
 486 event_t
 487 task_get_return_wait_event(task_t task)
 488 {
 489         return (event_t)&task->t_procflags;
 490 }
 491
 492 void
 493 task_clear_return_wait(task_t task)
 494 {
 495         task_lock(task);
 496
 497         task->t_flags &= ~TF_LRETURNWAIT;
 498
 499         if (task->t_flags & TF_LRETURNWAITER) {
 500                 thread_wakeup(task_get_return_wait_event(task));
 501                 task->t_flags &= ~TF_LRETURNWAITER;
 502         }
 503
 504         task_unlock(task);
 505 }
 506
 507 void __attribute__((noreturn))
 508 task_wait_to_return(void)
 509 {
 510         task_t task;
 511
 512         task = current_task();
 513         task_lock(task);
 514
 515         if (task->t_flags & TF_LRETURNWAIT) {
 516                 do {
 517                         task->t_flags |= TF_LRETURNWAITER;
 518                         assert_wait(task_get_return_wait_event(task), THREAD_UNINT);
 519                         task_unlock(task);
 520
 521                         thread_block(THREAD_CONTINUE_NULL);
 522
 523                         task_lock(task);
 524                 } while (task->t_flags & TF_LRETURNWAIT);
 525         }
 526
 527         task_unlock(task);
 528
 529         thread_bootstrap_return();
 530 }
 531
 532 #ifdef CONFIG_32BIT_TELEMETRY
 533 boolean_t
 534 task_consume_32bit_log_flag(task_t task)
 535 {
 536         if ((task->t_procflags & TPF_LOG_32BIT_TELEMETRY) != 0) {
 537                 task->t_procflags &= ~TPF_LOG_32BIT_TELEMETRY;
 538                 return TRUE;
 539         } else {
 540                 return FALSE;
 541         }
 542 }
 543
 544 void
 545 task_set_32bit_log_flag(task_t task)
 546 {
 547         task->t_procflags |= TPF_LOG_32BIT_TELEMETRY;
 548 }
 549 #endif /* CONFIG_32BIT_TELEMETRY */
 550
 551 boolean_t
 552 task_is_exec_copy(task_t task)
 553 {
 554         return task_is_exec_copy_internal(task);
 555 }
 556
 557 boolean_t
 558 task_did_exec(task_t task)
 559 {
 560         return task_did_exec_internal(task);
 561 }
 562
 563 boolean_t
 564 task_is_active(task_t task)
 565 {
 566         return task->active;
 567 }
 568
 569 boolean_t
 570 task_is_halting(task_t task)
 571 {
 572         return task->halting;
 573 }
 574
 575 #if TASK_REFERENCE_LEAK_DEBUG
 576 #include <kern/btlog.h>
 577
 578 static btlog_t *task_ref_btlog;
 579 #define TASK_REF_OP_INCR        0x1
 580 #define TASK_REF_OP_DECR        0x2
 581
 582 #define TASK_REF_NUM_RECORDS    100000
 583 #define TASK_REF_BTDEPTH        7
 584
 585 void
 586 task_reference_internal(task_t task)
 587 {
 588         void *       bt[TASK_REF_BTDEPTH];
 589         int             numsaved = 0;
 590
 591         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 592
 593         (void)hw_atomic_add(&(task)->ref_count, 1);
 594         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
 595                                         bt, numsaved);
 596 }
 597
 598 uint32_t
 599 task_deallocate_internal(task_t task)
 600 {
 601         void *       bt[TASK_REF_BTDEPTH];
 602         int             numsaved = 0;
 603
 604         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 605
 606         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
 607                                         bt, numsaved);
 608         return hw_atomic_sub(&(task)->ref_count, 1);
 609 }
 610
 611 #endif /* TASK_REFERENCE_LEAK_DEBUG */
 612
 613 void
 614 task_init(void)
 615 {
 616
 617         lck_grp_attr_setdefault(&task_lck_grp_attr);
 618         lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
 619         lck_attr_setdefault(&task_lck_attr);
 620         lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
 621         lck_mtx_init(&tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
 622
 623         task_zone = zinit(
 624                         sizeof(struct task),
 625                         task_max * sizeof(struct task),
 626                         TASK_CHUNK * sizeof(struct task),
 627                         "tasks");
 628
 629         zone_change(task_zone, Z_NOENCRYPT, TRUE);
 630
 631 #if CONFIG_EMBEDDED
 632         task_watch_init();
 633 #endif /* CONFIG_EMBEDDED */
 634
 635         /*
 636          * Configure per-task memory limit.
 637          * The boot-arg is interpreted as Megabytes,
 638          * and takes precedence over the device tree.
 639          * Setting the boot-arg to 0 disables task limits.
 640          */
 641         if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
 642                         sizeof (max_task_footprint_mb))) {
 643                 /*
 644                  * No limit was found in boot-args, so go look in the device tree.
 645                  */
 646                 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
 647                                 sizeof(max_task_footprint_mb))) {
 648                         /*
 649                          * No limit was found in device tree.
 650                          */
 651                         max_task_footprint_mb = 0;
 652                 }
 653         }
 654
 655         if (max_task_footprint_mb != 0) {
 656 #if CONFIG_MEMORYSTATUS
 657                 if (max_task_footprint_mb < 50) {
 658                                 printf("Warning: max_task_pmem %d below minimum.\n",
 659                                 max_task_footprint_mb);
 660                                 max_task_footprint_mb = 50;
 661                 }
 662                 printf("Limiting task physical memory footprint to %d MB\n",
 663                         max_task_footprint_mb);
 664
 665                 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
 666
 667                 /*
 668                  * Configure the per-task memory limit warning level.
 669                  * This is computed as a percentage.
 670                  */
 671                 max_task_footprint_warning_level = 0;
 672
 673                 if (max_mem < 0x40000000) {
 674                         /*
 675                          * On devices with < 1GB of memory:
 676                          *    -- set warnings to 50MB below the per-task limit.
 677                          */
 678                         if (max_task_footprint_mb > 50) {
 679                                 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
 680                         }
 681                 } else {
 682                         /*
 683                          * On devices with >= 1GB of memory:
 684                          *    -- set warnings to 100MB below the per-task limit.
 685                          */
 686                         if (max_task_footprint_mb > 100) {
 687                                 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
 688                         }
 689                 }
 690
 691                 /*
 692                  * Never allow warning level to land below the default.
 693                  */
 694                 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
 695                         max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
 696                 }
 697
 698                 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
 699
 700 #else
 701                 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
 702 #endif /* CONFIG_MEMORYSTATUS */
 703         }
 704
 705 #if MACH_ASSERT
 706         PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
 707                           sizeof (pmap_ledgers_panic));
 708 #endif /* MACH_ASSERT */
 709
 710 #if CONFIG_COREDUMP
 711         if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
 712                         sizeof (hwm_user_cores))) {
 713                 hwm_user_cores = 0;
 714         }
 715 #endif
 716
 717         proc_init_cpumon_params();
 718
 719         if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
 720                 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
 721         }
 722
 723         if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
 724                 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
 725         }
 726
 727         if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
 728                 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
 729                 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
 730         }
 731
 732         if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
 733                 sizeof (disable_exc_resource))) {
 734                 disable_exc_resource = 0;
 735         }
 736
 737         if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof (task_iomon_limit_mb))) {
 738                 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
 739         }
 740
 741         if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof (task_iomon_interval_secs))) {
 742                 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
 743         }
 744
 745         if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof (io_telemetry_limit))) {
 746                 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
 747         }
 748
 749 /*
 750  * If we have coalitions, coalition_init() will call init_task_ledgers() as it
 751  * sets up the ledgers for the default coalition. If we don't have coalitions,
 752  * then we have to call it now.
 753  */
 754 #if CONFIG_COALITIONS
 755         assert(task_ledger_template);
 756 #else /* CONFIG_COALITIONS */
 757         init_task_ledgers();
 758 #endif /* CONFIG_COALITIONS */
 759
 760 #if TASK_REFERENCE_LEAK_DEBUG
 761         task_ref_btlog = btlog_create(TASK_REF_NUM_RECORDS, TASK_REF_BTDEPTH, TRUE /* caller_will_remove_entries_for_element? */);
 762         assert(task_ref_btlog);
 763 #endif
 764
 765         /*
 766          * Create the kernel task as the first task.
 767          */
 768 #ifdef __LP64__
 769         if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)
 770 #else
 771         if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)
 772 #endif
 773                 panic("task_init\n");
 774
 775
 776         vm_map_deallocate(kernel_task->map);
 777         kernel_task->map = kernel_map;
 778         lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
 779 }
 780
 781 /*
 782  * Create a task running in the kernel address space.  It may
 783  * have its own map of size mem_size and may have ipc privileges.
 784  */
 785 kern_return_t
 786 kernel_task_create(
 787         __unused task_t         parent_task,
 788         __unused vm_offset_t            map_base,
 789         __unused vm_size_t              map_size,
 790         __unused task_t         *child_task)
 791 {
 792         return (KERN_INVALID_ARGUMENT);
 793 }
 794
 795 kern_return_t
 796 task_create(
 797         task_t                          parent_task,
 798         __unused ledger_port_array_t    ledger_ports,
 799         __unused mach_msg_type_number_t num_ledger_ports,
 800         __unused boolean_t              inherit_memory,
 801         __unused task_t                 *child_task)    /* OUT */
 802 {
 803         if (parent_task == TASK_NULL)
 804                 return(KERN_INVALID_ARGUMENT);
 805
 806         /*
 807          * No longer supported: too many calls assume that a task has a valid
 808          * process attached.
 809          */
 810         return(KERN_FAILURE);
 811 }
 812
 813 kern_return_t
 814 host_security_create_task_token(
 815         host_security_t                 host_security,
 816         task_t                          parent_task,
 817         __unused security_token_t       sec_token,
 818         __unused audit_token_t          audit_token,
 819         __unused host_priv_t            host_priv,
 820         __unused ledger_port_array_t    ledger_ports,
 821         __unused mach_msg_type_number_t num_ledger_ports,
 822         __unused boolean_t              inherit_memory,
 823         __unused task_t                 *child_task)    /* OUT */
 824 {
 825         if (parent_task == TASK_NULL)
 826                 return(KERN_INVALID_ARGUMENT);
 827
 828         if (host_security == HOST_NULL)
 829                 return(KERN_INVALID_SECURITY);
 830
 831         /*
 832          * No longer supported.
 833          */
 834         return(KERN_FAILURE);
 835 }
 836
 837 /*
 838  * Task ledgers
 839  * ------------
 840  *
 841  * phys_footprint
 842  *   Physical footprint: This is the sum of:
 843  *     + (internal - alternate_accounting)
 844  *     + (internal_compressed - alternate_accounting_compressed)
 845  *     + iokit_mapped
 846  *     + purgeable_nonvolatile
 847  *     + purgeable_nonvolatile_compressed
 848  *     + page_table
 849  *
 850  * internal
 851  *   The task's anonymous memory, which on iOS is always resident.
 852  *
 853  * internal_compressed
 854  *   Amount of this task's internal memory which is held by the compressor.
 855  *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
 856  *   and could be either decompressed back into memory, or paged out to storage, depending
 857  *   on our implementation.
 858  *
 859  * iokit_mapped
 860  *   IOKit mappings: The total size of all IOKit mappings in this task, regardless of
 861      clean/dirty or internal/external state].
 862  *
 863  * alternate_accounting
 864  *   The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
 865  *   are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
 866  *   double counting.
 867  */
 868 void
 869 init_task_ledgers(void)
 870 {
 871         ledger_template_t t;
 872
 873         assert(task_ledger_template == NULL);
 874         assert(kernel_task == TASK_NULL);
 875
 876 #if MACH_ASSERT
 877         PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
 878                           sizeof (pmap_ledgers_panic));
 879 #endif /* MACH_ASSERT */
 880
 881         if ((t = ledger_template_create("Per-task ledger")) == NULL)
 882                 panic("couldn't create task ledger template");
 883
 884         task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
 885         task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
 886             "physmem", "bytes");
 887         task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
 888             "bytes");
 889         task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
 890             "bytes");
 891         task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
 892             "bytes");
 893         task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
 894             "bytes");
 895         task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
 896             "bytes");
 897         task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
 898             "bytes");
 899         task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
 900             "bytes");
 901         task_ledgers.page_table = ledger_entry_add(t, "page_table", "physmem",
 902             "bytes");
 903         task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
 904             "bytes");
 905         task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
 906             "bytes");
 907         task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
 908         task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
 909         task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
 910         task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
 911         task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
 912             "count");
 913         task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
 914             "count");
 915
 916 #if CONFIG_SCHED_SFI
 917         sfi_class_id_t class_id, ledger_alias;
 918         for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
 919                 task_ledgers.sfi_wait_times[class_id] = -1;
 920         }
 921
 922         /* don't account for UNSPECIFIED */
 923         for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
 924                 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
 925                 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
 926                         /* Check to see if alias has been registered yet */
 927                         if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
 928                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
 929                         } else {
 930                                 /* Otherwise, initialize it first */
 931                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
 932                         }
 933                 } else {
 934                         task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
 935                 }
 936
 937                 if (task_ledgers.sfi_wait_times[class_id] < 0) {
 938                         panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
 939                 }
 940         }
 941
 942         assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
 943 #endif /* CONFIG_SCHED_SFI */
 944
 945         task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
 946         task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
 947         task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
 948         task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
 949         task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
 950         task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
 951
 952         if ((task_ledgers.cpu_time < 0) ||
 953             (task_ledgers.tkm_private < 0) ||
 954             (task_ledgers.tkm_shared < 0) ||
 955             (task_ledgers.phys_mem < 0) ||
 956             (task_ledgers.wired_mem < 0) ||
 957             (task_ledgers.internal < 0) ||
 958             (task_ledgers.iokit_mapped < 0) ||
 959             (task_ledgers.alternate_accounting < 0) ||
 960             (task_ledgers.alternate_accounting_compressed < 0) ||
 961             (task_ledgers.page_table < 0) ||
 962             (task_ledgers.phys_footprint < 0) ||
 963             (task_ledgers.internal_compressed < 0) ||
 964             (task_ledgers.purgeable_volatile < 0) ||
 965             (task_ledgers.purgeable_nonvolatile < 0) ||
 966             (task_ledgers.purgeable_volatile_compressed < 0) ||
 967             (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
 968             (task_ledgers.platform_idle_wakeups < 0) ||
 969             (task_ledgers.interrupt_wakeups < 0) ||
 970             (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
 971             (task_ledgers.physical_writes < 0) ||
 972             (task_ledgers.logical_writes < 0) ||
 973             (task_ledgers.energy_billed_to_me < 0) ||
 974             (task_ledgers.energy_billed_to_others < 0)
 975             ) {
 976                 panic("couldn't create entries for task ledger template");
 977         }
 978
 979         ledger_track_credit_only(t, task_ledgers.phys_footprint);
 980         ledger_track_credit_only(t, task_ledgers.page_table);
 981         ledger_track_credit_only(t, task_ledgers.internal);
 982         ledger_track_credit_only(t, task_ledgers.internal_compressed);
 983         ledger_track_credit_only(t, task_ledgers.iokit_mapped);
 984         ledger_track_credit_only(t, task_ledgers.alternate_accounting);
 985         ledger_track_credit_only(t, task_ledgers.alternate_accounting_compressed);
 986         ledger_track_credit_only(t, task_ledgers.purgeable_volatile);
 987         ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile);
 988         ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed);
 989         ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed);
 990
 991         ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
 992 #if MACH_ASSERT
 993         if (pmap_ledgers_panic) {
 994                 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
 995                 ledger_panic_on_negative(t, task_ledgers.page_table);
 996                 ledger_panic_on_negative(t, task_ledgers.internal);
 997                 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
 998                 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
 999                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1000                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1001                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1002                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1003                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1004                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1005         }
1006 #endif /* MACH_ASSERT */
1007
1008 #if CONFIG_MEMORYSTATUS
1009         ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
1010 #endif /* CONFIG_MEMORYSTATUS */
1011
1012         ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1013                 task_wakeups_rate_exceeded, NULL, NULL);
1014         ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1015         ledger_set_callback(t, task_ledgers.logical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_LOGICAL_WRITES, NULL);
1016
1017         ledger_template_complete(t);
1018         task_ledger_template = t;
1019 }
1020
1021 kern_return_t
1022 task_create_internal(
1023         task_t          parent_task,
1024         coalition_t     *parent_coalitions __unused,
1025         boolean_t       inherit_memory,
1026         __unused boolean_t      is_64bit,
1027         uint32_t        t_flags,
1028         uint32_t        t_procflags,
1029         task_t          *child_task)            /* OUT */
1030 {
1031         task_t                  new_task;
1032         vm_shared_region_t      shared_region;
1033         ledger_t                ledger = NULL;
1034
1035         new_task = (task_t) zalloc(task_zone);
1036
1037         if (new_task == TASK_NULL)
1038                 return(KERN_RESOURCE_SHORTAGE);
1039
1040         /* one ref for just being alive; one for our caller */
1041         new_task->ref_count = 2;
1042
1043         /* allocate with active entries */
1044         assert(task_ledger_template != NULL);
1045         if ((ledger = ledger_instantiate(task_ledger_template,
1046                         LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
1047                 zfree(task_zone, new_task);
1048                 return(KERN_RESOURCE_SHORTAGE);
1049         }
1050
1051
1052         new_task->ledger = ledger;
1053
1054 #if defined(CONFIG_SCHED_MULTIQ)
1055         new_task->sched_group = sched_group_create();
1056 #endif
1057
1058         /* if inherit_memory is true, parent_task MUST not be NULL */
1059         if (!(t_flags & TF_CORPSE_FORK) && inherit_memory)
1060                 new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1061         else
1062                 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
1063                                 (vm_map_offset_t)(VM_MIN_ADDRESS),
1064                                 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
1065
1066         /* Inherit memlock limit from parent */
1067         if (parent_task)
1068                 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
1069
1070         lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1071         queue_init(&new_task->threads);
1072         new_task->suspend_count = 0;
1073         new_task->thread_count = 0;
1074         new_task->active_thread_count = 0;
1075         new_task->user_stop_count = 0;
1076         new_task->legacy_stop_count = 0;
1077         new_task->active = TRUE;
1078         new_task->halting = FALSE;
1079         new_task->user_data = NULL;
1080         new_task->priv_flags = 0;
1081         new_task->t_flags = t_flags;
1082         new_task->t_procflags = t_procflags;
1083         new_task->importance = 0;
1084         new_task->crashed_thread_id = 0;
1085         new_task->exec_token = 0;
1086
1087 #if CONFIG_ATM
1088         new_task->atm_context = NULL;
1089 #endif
1090         new_task->bank_context = NULL;
1091
1092 #ifdef MACH_BSD
1093         new_task->bsd_info = NULL;
1094         new_task->corpse_info = NULL;
1095 #endif /* MACH_BSD */
1096
1097 #if CONFIG_MACF
1098         new_task->crash_label = NULL;
1099 #endif
1100
1101 #if CONFIG_MEMORYSTATUS
1102         if (max_task_footprint != 0) {
1103                 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1104         }
1105 #endif /* CONFIG_MEMORYSTATUS */
1106
1107         if (task_wakeups_monitor_rate != 0) {
1108                 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1109                 int32_t  rate; // Ignored because of WAKEMON_SET_DEFAULTS
1110                 task_wakeups_monitor_ctl(new_task, &flags, &rate);
1111         }
1112
1113 #if CONFIG_IO_ACCOUNTING
1114         uint32_t flags = IOMON_ENABLE;
1115         task_io_monitor_ctl(new_task, &flags);
1116 #endif /* CONFIG_IO_ACCOUNTING */
1117
1118         machine_task_init(new_task, parent_task, inherit_memory);
1119
1120         new_task->task_debug = NULL;
1121
1122 #if DEVELOPMENT || DEBUG
1123         new_task->task_unnested = FALSE;
1124         new_task->task_disconnected_count = 0;
1125 #endif
1126         queue_init(&new_task->semaphore_list);
1127         new_task->semaphores_owned = 0;
1128
1129         ipc_task_init(new_task, parent_task);
1130
1131         new_task->vtimers = 0;
1132
1133         new_task->shared_region = NULL;
1134
1135         new_task->affinity_space = NULL;
1136
1137         new_task->t_kpc = 0;
1138
1139         new_task->pidsuspended = FALSE;
1140         new_task->frozen = FALSE;
1141         new_task->changing_freeze_state = FALSE;
1142         new_task->rusage_cpu_flags = 0;
1143         new_task->rusage_cpu_percentage = 0;
1144         new_task->rusage_cpu_interval = 0;
1145         new_task->rusage_cpu_deadline = 0;
1146         new_task->rusage_cpu_callt = NULL;
1147 #if MACH_ASSERT
1148         new_task->suspends_outstanding = 0;
1149 #endif
1150
1151 #if HYPERVISOR
1152         new_task->hv_task_target = NULL;
1153 #endif /* HYPERVISOR */
1154
1155 #if CONFIG_EMBEDDED
1156         queue_init(&new_task->task_watchers);
1157         new_task->num_taskwatchers  = 0;
1158         new_task->watchapplying  = 0;
1159 #endif /* CONFIG_EMBEDDED */
1160
1161         new_task->mem_notify_reserved = 0;
1162         new_task->memlimit_attrs_reserved = 0;
1163 #if IMPORTANCE_INHERITANCE
1164         new_task->task_imp_base = NULL;
1165 #endif /* IMPORTANCE_INHERITANCE */
1166
1167         new_task->requested_policy = default_task_requested_policy;
1168         new_task->effective_policy = default_task_effective_policy;
1169
1170         if (parent_task != TASK_NULL) {
1171                 new_task->sec_token = parent_task->sec_token;
1172                 new_task->audit_token = parent_task->audit_token;
1173
1174                 /* inherit the parent's shared region */
1175                 shared_region = vm_shared_region_get(parent_task);
1176                 vm_shared_region_set(new_task, shared_region);
1177
1178                 if(task_has_64BitAddr(parent_task))
1179                         task_set_64BitAddr(new_task);
1180                 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1181                 new_task->all_image_info_size = parent_task->all_image_info_size;
1182
1183                 if (inherit_memory && parent_task->affinity_space)
1184                         task_affinity_create(parent_task, new_task);
1185
1186                 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1187
1188 #if IMPORTANCE_INHERITANCE
1189                 ipc_importance_task_t new_task_imp = IIT_NULL;
1190                 boolean_t inherit_receive = TRUE;
1191
1192                 if (task_is_marked_importance_donor(parent_task)) {
1193                         new_task_imp = ipc_importance_for_task(new_task, FALSE);
1194                         assert(IIT_NULL != new_task_imp);
1195                         ipc_importance_task_mark_donor(new_task_imp, TRUE);
1196                 }
1197 #if CONFIG_EMBEDDED
1198                 /* Embedded only wants to inherit for exec copy task */
1199                 if ((t_procflags & TPF_EXEC_COPY) == 0) {
1200                         inherit_receive = FALSE;
1201                 }
1202 #endif /* CONFIG_EMBEDDED */
1203
1204                 if (inherit_receive) {
1205                         if (task_is_marked_importance_receiver(parent_task)) {
1206                                 if (IIT_NULL == new_task_imp)
1207                                         new_task_imp = ipc_importance_for_task(new_task, FALSE);
1208                                 assert(IIT_NULL != new_task_imp);
1209                                 ipc_importance_task_mark_receiver(new_task_imp, TRUE);
1210                         }
1211                         if (task_is_marked_importance_denap_receiver(parent_task)) {
1212                                 if (IIT_NULL == new_task_imp)
1213                                         new_task_imp = ipc_importance_for_task(new_task, FALSE);
1214                                 assert(IIT_NULL != new_task_imp);
1215                                 ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
1216                         }
1217                 }
1218
1219                 if (IIT_NULL != new_task_imp) {
1220                         assert(new_task->task_imp_base == new_task_imp);
1221                         ipc_importance_task_release(new_task_imp);
1222                 }
1223 #endif /* IMPORTANCE_INHERITANCE */
1224
1225                 new_task->priority = BASEPRI_DEFAULT;
1226                 new_task->max_priority = MAXPRI_USER;
1227
1228                 task_policy_create(new_task, parent_task);
1229         } else {
1230                 new_task->sec_token = KERNEL_SECURITY_TOKEN;
1231                 new_task->audit_token = KERNEL_AUDIT_TOKEN;
1232 #ifdef __LP64__
1233                 if(is_64bit)
1234                         task_set_64BitAddr(new_task);
1235 #endif
1236                 new_task->all_image_info_addr = (mach_vm_address_t)0;
1237                 new_task->all_image_info_size = (mach_vm_size_t)0;
1238
1239                 new_task->pset_hint = PROCESSOR_SET_NULL;
1240
1241                 if (kernel_task == TASK_NULL) {
1242                         new_task->priority = BASEPRI_KERNEL;
1243                         new_task->max_priority = MAXPRI_KERNEL;
1244                 } else {
1245                         new_task->priority = BASEPRI_DEFAULT;
1246                         new_task->max_priority = MAXPRI_USER;
1247                 }
1248         }
1249
1250         bzero(new_task->coalition, sizeof(new_task->coalition));
1251         for (int i = 0; i < COALITION_NUM_TYPES; i++)
1252                 queue_chain_init(new_task->task_coalition[i]);
1253
1254         /* Allocate I/O Statistics */
1255         new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1256         assert(new_task->task_io_stats != NULL);
1257         bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1258
1259         bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1260         bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
1261
1262         bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1263
1264         /* Copy resource acc. info from Parent for Corpe Forked task. */
1265         if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1266                 task_rollup_accounting_info(new_task, parent_task);
1267         } else {
1268                 /* Initialize to zero for standard fork/spawn case */
1269                 new_task->total_user_time = 0;
1270                 new_task->total_system_time = 0;
1271                 new_task->total_ptime = 0;
1272                 new_task->faults = 0;
1273                 new_task->pageins = 0;
1274                 new_task->cow_faults = 0;
1275                 new_task->messages_sent = 0;
1276                 new_task->messages_received = 0;
1277                 new_task->syscalls_mach = 0;
1278                 new_task->syscalls_unix = 0;
1279                 new_task->c_switch = 0;
1280                 new_task->p_switch = 0;
1281                 new_task->ps_switch = 0;
1282                 new_task->low_mem_notified_warn = 0;
1283                 new_task->low_mem_notified_critical = 0;
1284                 new_task->purged_memory_warn = 0;
1285                 new_task->purged_memory_critical = 0;
1286                 new_task->low_mem_privileged_listener = 0;
1287                 new_task->memlimit_is_active = 0;
1288                 new_task->memlimit_is_fatal = 0;
1289                 new_task->memlimit_active_exc_resource = 0;
1290                 new_task->memlimit_inactive_exc_resource = 0;
1291                 new_task->task_timer_wakeups_bin_1 = 0;
1292                 new_task->task_timer_wakeups_bin_2 = 0;
1293                 new_task->task_gpu_ns = 0;
1294                 new_task->task_immediate_writes = 0;
1295                 new_task->task_deferred_writes = 0;
1296                 new_task->task_invalidated_writes = 0;
1297                 new_task->task_metadata_writes = 0;
1298                 new_task->task_energy = 0;
1299 #if MONOTONIC
1300                 memset(&new_task->task_monotonic, 0, sizeof(new_task->task_monotonic));
1301 #endif /* MONOTONIC */
1302         }
1303
1304
1305 #if CONFIG_COALITIONS
1306         if (!(t_flags & TF_CORPSE_FORK)) {
1307                 /* TODO: there is no graceful failure path here... */
1308                 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1309                         coalitions_adopt_task(parent_coalitions, new_task);
1310                 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1311                         /*
1312                          * all tasks at least have a resource coalition, so
1313                          * if the parent has one then inherit all coalitions
1314                          * the parent is a part of
1315                          */
1316                         coalitions_adopt_task(parent_task->coalition, new_task);
1317                 } else {
1318                         /* TODO: assert that new_task will be PID 1 (launchd) */
1319                         coalitions_adopt_init_task(new_task);
1320                 }
1321                 /*
1322                  * on exec, we need to transfer the coalition roles from the
1323                  * parent task to the exec copy task.
1324                  */
1325                 if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1326                         int coal_roles[COALITION_NUM_TYPES];
1327                         task_coalition_roles(parent_task, coal_roles);
1328                         (void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1329                 }
1330         } else {
1331                 coalitions_adopt_corpse_task(new_task);
1332         }
1333
1334         if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1335                 panic("created task is not a member of a resource coalition");
1336         }
1337 #endif /* CONFIG_COALITIONS */
1338
1339         new_task->dispatchqueue_offset = 0;
1340         if (parent_task != NULL) {
1341                 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1342         }
1343
1344         if (vm_backing_store_low && parent_task != NULL)
1345                 new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
1346
1347         new_task->task_volatile_objects = 0;
1348         new_task->task_nonvolatile_objects = 0;
1349         new_task->task_purgeable_disowning = FALSE;
1350         new_task->task_purgeable_disowned = FALSE;
1351         queue_init(&new_task->task_objq);
1352         task_objq_lock_init(new_task);
1353
1354         new_task->task_region_footprint = FALSE;
1355
1356 #if CONFIG_SECLUDED_MEMORY
1357         new_task->task_can_use_secluded_mem = FALSE;
1358         new_task->task_could_use_secluded_mem = FALSE;
1359         new_task->task_could_also_use_secluded_mem = FALSE;
1360 #endif /* CONFIG_SECLUDED_MEMORY */
1361
1362         queue_init(&new_task->io_user_clients);
1363
1364         ipc_task_enable(new_task);
1365
1366         lck_mtx_lock(&tasks_threads_lock);
1367         queue_enter(&tasks, new_task, task_t, tasks);
1368         tasks_count++;
1369         if (tasks_suspend_state) {
1370             task_suspend_internal(new_task);
1371         }
1372         lck_mtx_unlock(&tasks_threads_lock);
1373
1374         *child_task = new_task;
1375         return(KERN_SUCCESS);
1376 }
1377
1378 /*
1379  *      task_rollup_accounting_info
1380  *
1381  *      Roll up accounting stats. Used to rollup stats
1382  *      for exec copy task and corpse fork.
1383  */
1384 void
1385 task_rollup_accounting_info(task_t to_task, task_t from_task)
1386 {
1387         assert(from_task != to_task);
1388
1389         to_task->total_user_time = from_task->total_user_time;
1390         to_task->total_system_time = from_task->total_system_time;
1391         to_task->total_ptime = from_task->total_ptime;
1392         to_task->faults = from_task->faults;
1393         to_task->pageins = from_task->pageins;
1394         to_task->cow_faults = from_task->cow_faults;
1395         to_task->messages_sent = from_task->messages_sent;
1396         to_task->messages_received = from_task->messages_received;
1397         to_task->syscalls_mach = from_task->syscalls_mach;
1398         to_task->syscalls_unix = from_task->syscalls_unix;
1399         to_task->c_switch = from_task->c_switch;
1400         to_task->p_switch = from_task->p_switch;
1401         to_task->ps_switch = from_task->ps_switch;
1402         to_task->extmod_statistics = from_task->extmod_statistics;
1403         to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1404         to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1405         to_task->purged_memory_warn = from_task->purged_memory_warn;
1406         to_task->purged_memory_critical = from_task->purged_memory_critical;
1407         to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1408         *to_task->task_io_stats = *from_task->task_io_stats;
1409         to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1410         to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1411         to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1412         to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1413         to_task->task_gpu_ns = from_task->task_gpu_ns;
1414         to_task->task_immediate_writes = from_task->task_immediate_writes;
1415         to_task->task_deferred_writes = from_task->task_deferred_writes;
1416         to_task->task_invalidated_writes = from_task->task_invalidated_writes;
1417         to_task->task_metadata_writes = from_task->task_metadata_writes;
1418         to_task->task_energy = from_task->task_energy;
1419
1420         /* Skip ledger roll up for memory accounting entries */
1421         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1422         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1423         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1424 #if CONFIG_SCHED_SFI
1425         for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1426                 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1427         }
1428 #endif
1429         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1430         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1431         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1432         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1433         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1434         ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
1435 }
1436
1437 int task_dropped_imp_count = 0;
1438
1439 /*
1440  *      task_deallocate:
1441  *
1442  *      Drop a reference on a task.
1443  */
1444 void
1445 task_deallocate(
1446         task_t          task)
1447 {
1448         ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1449         uint32_t refs;
1450
1451         if (task == TASK_NULL)
1452             return;
1453
1454         refs = task_deallocate_internal(task);
1455
1456 #if IMPORTANCE_INHERITANCE
1457         if (refs > 1)
1458                 return;
1459
1460         atomic_load_explicit(&task->ref_count, memory_order_acquire);
1461
1462         if (refs == 1) {
1463                 /*
1464                  * If last ref potentially comes from the task's importance,
1465                  * disconnect it.  But more task refs may be added before
1466                  * that completes, so wait for the reference to go to zero
1467                  * naturually (it may happen on a recursive task_deallocate()
1468                  * from the ipc_importance_disconnect_task() call).
1469                  */
1470                 if (IIT_NULL != task->task_imp_base)
1471                         ipc_importance_disconnect_task(task);
1472                 return;
1473         }
1474 #else
1475         if (refs > 0)
1476                 return;
1477
1478         atomic_load_explicit(&task->ref_count, memory_order_acquire);
1479
1480 #endif /* IMPORTANCE_INHERITANCE */
1481
1482         lck_mtx_lock(&tasks_threads_lock);
1483         queue_remove(&terminated_tasks, task, task_t, tasks);
1484         terminated_tasks_count--;
1485         lck_mtx_unlock(&tasks_threads_lock);
1486
1487         /*
1488          * remove the reference on atm descriptor
1489          */
1490         task_atm_reset(task);
1491
1492         /*
1493          * remove the reference on bank context
1494          */
1495         task_bank_reset(task);
1496
1497         if (task->task_io_stats)
1498                 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1499
1500         /*
1501          *      Give the machine dependent code a chance
1502          *      to perform cleanup before ripping apart
1503          *      the task.
1504          */
1505         machine_task_terminate(task);
1506
1507         ipc_task_terminate(task);
1508
1509         /* let iokit know */
1510         iokit_task_terminate(task);
1511
1512         if (task->affinity_space)
1513                 task_affinity_deallocate(task);
1514
1515 #if MACH_ASSERT
1516         if (task->ledger != NULL &&
1517             task->map != NULL &&
1518             task->map->pmap != NULL &&
1519             task->map->pmap->ledger != NULL) {
1520                 assert(task->ledger == task->map->pmap->ledger);
1521         }
1522 #endif /* MACH_ASSERT */
1523
1524         vm_purgeable_disown(task);
1525         assert(task->task_purgeable_disowned);
1526         if (task->task_volatile_objects != 0 ||
1527             task->task_nonvolatile_objects != 0) {
1528                 panic("task_deallocate(%p): "
1529                       "volatile_objects=%d nonvolatile_objects=%d\n",
1530                       task,
1531                       task->task_volatile_objects,
1532                       task->task_nonvolatile_objects);
1533         }
1534
1535         vm_map_deallocate(task->map);
1536         is_release(task->itk_space);
1537
1538         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1539                            &interrupt_wakeups, &debit);
1540         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1541                            &platform_idle_wakeups, &debit);
1542
1543 #if defined(CONFIG_SCHED_MULTIQ)
1544         sched_group_destroy(task->sched_group);
1545 #endif
1546
1547         /* Accumulate statistics for dead tasks */
1548         lck_spin_lock(&dead_task_statistics_lock);
1549         dead_task_statistics.total_user_time += task->total_user_time;
1550         dead_task_statistics.total_system_time += task->total_system_time;
1551
1552         dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1553         dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1554
1555         dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1556         dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1557         dead_task_statistics.total_ptime += task->total_ptime;
1558         dead_task_statistics.total_pset_switches += task->ps_switch;
1559         dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
1560         dead_task_statistics.task_energy += task->task_energy;
1561
1562         lck_spin_unlock(&dead_task_statistics_lock);
1563         lck_mtx_destroy(&task->lock, &task_lck_grp);
1564
1565         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1566             &debit)) {
1567                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1568                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1569         }
1570         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1571             &debit)) {
1572                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1573                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1574         }
1575         ledger_dereference(task->ledger);
1576
1577 #if TASK_REFERENCE_LEAK_DEBUG
1578         btlog_remove_entries_for_element(task_ref_btlog, task);
1579 #endif
1580
1581 #if CONFIG_COALITIONS
1582         task_release_coalitions(task);
1583 #endif /* CONFIG_COALITIONS */
1584
1585         bzero(task->coalition, sizeof(task->coalition));
1586
1587 #if MACH_BSD
1588         /* clean up collected information since last reference to task is gone */
1589         if (task->corpse_info) {
1590                 void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
1591                 task_crashinfo_destroy(task->corpse_info);
1592                 task->corpse_info = NULL;
1593                 if (corpse_info_kernel) {
1594                         kfree(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
1595                 }
1596         }
1597 #endif
1598
1599 #if CONFIG_MACF
1600         if (task->crash_label) {
1601                 mac_exc_free_label(task->crash_label);
1602                 task->crash_label = NULL;
1603         }
1604 #endif
1605
1606         assert(queue_empty(&task->task_objq));
1607
1608         zfree(task_zone, task);
1609 }
1610
1611 /*
1612  *      task_name_deallocate:
1613  *
1614  *      Drop a reference on a task name.
1615  */
1616 void
1617 task_name_deallocate(
1618         task_name_t             task_name)
1619 {
1620         return(task_deallocate((task_t)task_name));
1621 }
1622
1623 /*
1624  *      task_inspect_deallocate:
1625  *
1626  *      Drop a task inspection reference.
1627  */
1628 void
1629 task_inspect_deallocate(
1630         task_inspect_t          task_inspect)
1631 {
1632         return(task_deallocate((task_t)task_inspect));
1633 }
1634
1635 /*
1636  *      task_suspension_token_deallocate:
1637  *
1638  *      Drop a reference on a task suspension token.
1639  */
1640 void
1641 task_suspension_token_deallocate(
1642         task_suspension_token_t         token)
1643 {
1644         return(task_deallocate((task_t)token));
1645 }
1646
1647
1648 /*
1649  * task_collect_crash_info:
1650  *
1651  * collect crash info from bsd and mach based data
1652  */
1653 kern_return_t
1654 task_collect_crash_info(
1655         task_t task,
1656 #ifdef CONFIG_MACF
1657         struct label *crash_label,
1658 #endif
1659         int is_corpse_fork)
1660 {
1661         kern_return_t kr = KERN_SUCCESS;
1662
1663         kcdata_descriptor_t crash_data = NULL;
1664         kcdata_descriptor_t crash_data_release = NULL;
1665         mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1666         mach_vm_offset_t crash_data_ptr = 0;
1667         void *crash_data_kernel = NULL;
1668         void *crash_data_kernel_release = NULL;
1669 #if CONFIG_MACF
1670         struct label *label, *free_label;
1671 #endif
1672
1673         if (!corpses_enabled()) {
1674                 return KERN_NOT_SUPPORTED;
1675         }
1676
1677 #if CONFIG_MACF
1678         free_label = label = mac_exc_create_label();
1679 #endif
1680
1681         task_lock(task);
1682
1683         assert(is_corpse_fork || task->bsd_info != NULL);
1684         if (task->corpse_info == NULL && (is_corpse_fork || task->bsd_info != NULL)) {
1685 #if CONFIG_MACF
1686                 /* Set the crash label, used by the exception delivery mac hook */
1687                 free_label = task->crash_label; // Most likely NULL.
1688                 task->crash_label = label;
1689                 mac_exc_update_task_crash_label(task, crash_label);
1690 #endif
1691                 task_unlock(task);
1692
1693                 crash_data_kernel = (void *) kalloc(CORPSEINFO_ALLOCATION_SIZE);
1694                 if (crash_data_kernel == NULL) {
1695                         kr = KERN_RESOURCE_SHORTAGE;
1696                         goto out_no_lock;
1697                 }
1698                 bzero(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1699                 crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
1700
1701                 /* Do not get a corpse ref for corpse fork */
1702                 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
1703                                 is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
1704                                 KCFLAG_USE_MEMCOPY);
1705                 if (crash_data) {
1706                         task_lock(task);
1707                         crash_data_release = task->corpse_info;
1708                         crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
1709                         task->corpse_info = crash_data;
1710
1711                         task_unlock(task);
1712                         kr = KERN_SUCCESS;
1713                 } else {
1714                         kfree(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1715                         kr = KERN_FAILURE;
1716                 }
1717
1718                 if (crash_data_release != NULL) {
1719                         task_crashinfo_destroy(crash_data_release);
1720                 }
1721                 if (crash_data_kernel_release != NULL) {
1722                         kfree(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
1723                 }
1724         } else {
1725                 task_unlock(task);
1726         }
1727
1728 out_no_lock:
1729 #if CONFIG_MACF
1730         if (free_label != NULL) {
1731                 mac_exc_free_label(free_label);
1732         }
1733 #endif
1734         return kr;
1735 }
1736
1737 /*
1738  * task_deliver_crash_notification:
1739  *
1740  * Makes outcall to registered host port for a corpse.
1741  */
1742 kern_return_t
1743 task_deliver_crash_notification(
1744         task_t task,
1745         thread_t thread,
1746         exception_type_t etype,
1747         mach_exception_subcode_t subcode)
1748 {
1749         kcdata_descriptor_t crash_info = task->corpse_info;
1750         thread_t th_iter = NULL;
1751         kern_return_t kr = KERN_SUCCESS;
1752         wait_interrupt_t wsave;
1753         mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1754         ipc_port_t task_port, old_notify;
1755
1756         if (crash_info == NULL)
1757                 return KERN_FAILURE;
1758
1759         task_lock(task);
1760         if (task_is_a_corpse_fork(task)) {
1761                 /* Populate code with EXC_{RESOURCE,GUARD} for corpse fork */
1762                 code[0] = etype;
1763                 code[1] = subcode;
1764         } else {
1765                 /* Populate code with EXC_CRASH for corpses */
1766                 code[0] = EXC_CRASH;
1767                 code[1] = 0;
1768                 /* Update the code[1] if the boot-arg corpse_for_fatal_memkill is set */
1769                 if (corpse_for_fatal_memkill) {
1770                         code[1] = subcode;
1771                 }
1772         }
1773
1774         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1775         {
1776                 if (th_iter->corpse_dup == FALSE) {
1777                         ipc_thread_reset(th_iter);
1778                 }
1779         }
1780         task_unlock(task);
1781
1782         /* Arm the no-sender notification for taskport */
1783         task_reference(task);
1784         task_port = convert_task_to_port(task);
1785         ip_lock(task_port);
1786         assert(ip_active(task_port));
1787         ipc_port_nsrequest(task_port, task_port->ip_mscount, ipc_port_make_sonce_locked(task_port), &old_notify);
1788         /* port unlocked */
1789         assert(IP_NULL == old_notify);
1790
1791         wsave = thread_interrupt_level(THREAD_UNINT);
1792         kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
1793         if (kr != KERN_SUCCESS) {
1794                 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
1795         }
1796
1797         (void)thread_interrupt_level(wsave);
1798
1799         /*
1800          * Drop the send right on task port, will fire the
1801          * no-sender notification if exception deliver failed.
1802          */
1803         ipc_port_release_send(task_port);
1804         return kr;
1805 }
1806
1807 /*
1808  *      task_terminate:
1809  *
1810  *      Terminate the specified task.  See comments on thread_terminate
1811  *      (kern/thread.c) about problems with terminating the "current task."
1812  */
1813
1814 kern_return_t
1815 task_terminate(
1816         task_t          task)
1817 {
1818         if (task == TASK_NULL)
1819                 return (KERN_INVALID_ARGUMENT);
1820
1821         if (task->bsd_info)
1822                 return (KERN_FAILURE);
1823
1824         return (task_terminate_internal(task));
1825 }
1826
1827 #if MACH_ASSERT
1828 extern int proc_pid(struct proc *);
1829 extern void proc_name_kdp(task_t t, char *buf, int size);
1830 #endif /* MACH_ASSERT */
1831
1832 #define VM_MAP_PARTIAL_REAP 0x54  /* 0x150 */
1833 static void
1834 __unused task_partial_reap(task_t task, __unused int pid)
1835 {
1836         unsigned int    reclaimed_resident = 0;
1837         unsigned int    reclaimed_compressed = 0;
1838         uint64_t        task_page_count;
1839
1840         task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1841
1842         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
1843                               pid, task_page_count, 0, 0, 0);
1844
1845         vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1846
1847         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
1848                               pid, reclaimed_resident, reclaimed_compressed, 0, 0);
1849 }
1850
1851 kern_return_t
1852 task_mark_corpse(task_t task)
1853 {
1854         kern_return_t kr = KERN_SUCCESS;
1855         thread_t self_thread;
1856         (void) self_thread;
1857         wait_interrupt_t wsave;
1858 #if CONFIG_MACF
1859         struct label *crash_label = NULL;
1860 #endif
1861
1862         assert(task != kernel_task);
1863         assert(task == current_task());
1864         assert(!task_is_a_corpse(task));
1865
1866 #if CONFIG_MACF
1867         crash_label = mac_exc_create_label_for_proc((struct proc*)task->bsd_info);
1868 #endif
1869
1870         kr = task_collect_crash_info(task,
1871 #if CONFIG_MACF
1872                                                                  crash_label,
1873 #endif
1874                                                                  FALSE);
1875         if (kr != KERN_SUCCESS) {
1876                 goto out;
1877         }
1878
1879         self_thread = current_thread();
1880
1881         wsave = thread_interrupt_level(THREAD_UNINT);
1882         task_lock(task);
1883
1884         task_set_corpse_pending_report(task);
1885         task_set_corpse(task);
1886         task->crashed_thread_id = thread_tid(self_thread);
1887
1888         kr = task_start_halt_locked(task, TRUE);
1889         assert(kr == KERN_SUCCESS);
1890
1891         ipc_task_reset(task);
1892         /* Remove the naked send right for task port, needed to arm no sender notification */
1893         task_set_special_port(task, TASK_KERNEL_PORT, IPC_PORT_NULL);
1894         ipc_task_enable(task);
1895
1896         task_unlock(task);
1897         /* terminate the ipc space */
1898         ipc_space_terminate(task->itk_space);
1899
1900         /* Add it to global corpse task list */
1901         task_add_to_corpse_task_list(task);
1902
1903         task_start_halt(task);
1904         thread_terminate_internal(self_thread);
1905
1906         (void) thread_interrupt_level(wsave);
1907         assert(task->halting == TRUE);
1908
1909 out:
1910 #if CONFIG_MACF
1911         mac_exc_free_label(crash_label);
1912 #endif
1913         return kr;
1914 }
1915
1916 /*
1917  *      task_clear_corpse
1918  *
1919  *      Clears the corpse pending bit on task.
1920  *      Removes inspection bit on the threads.
1921  */
1922 void
1923 task_clear_corpse(task_t task)
1924 {
1925         thread_t th_iter = NULL;
1926
1927         task_lock(task);
1928         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1929         {
1930                 thread_mtx_lock(th_iter);
1931                 th_iter->inspection = FALSE;
1932                 thread_mtx_unlock(th_iter);
1933         }
1934
1935         thread_terminate_crashed_threads();
1936         /* remove the pending corpse report flag */
1937         task_clear_corpse_pending_report(task);
1938
1939         task_unlock(task);
1940 }
1941
1942 /*
1943  *      task_port_notify
1944  *
1945  *      Called whenever the Mach port system detects no-senders on
1946  *      the task port of a corpse.
1947  *      Each notification that comes in should terminate the task (corpse).
1948  */
1949 void
1950 task_port_notify(mach_msg_header_t *msg)
1951 {
1952         mach_no_senders_notification_t *notification = (void *)msg;
1953         ipc_port_t port = notification->not_header.msgh_remote_port;
1954         task_t task;
1955
1956         assert(ip_active(port));
1957         assert(IKOT_TASK == ip_kotype(port));
1958         task = (task_t) port->ip_kobject;
1959
1960         assert(task_is_a_corpse(task));
1961
1962         /* Remove the task from global corpse task list */
1963         task_remove_from_corpse_task_list(task);
1964
1965         task_clear_corpse(task);
1966         task_terminate_internal(task);
1967 }
1968
1969 /*
1970  *      task_wait_till_threads_terminate_locked
1971  *
1972  *      Wait till all the threads in the task are terminated.
1973  *      Might release the task lock and re-acquire it.
1974  */
1975 void
1976 task_wait_till_threads_terminate_locked(task_t task)
1977 {
1978         /* wait for all the threads in the task to terminate */
1979         while (task->active_thread_count != 0) {
1980                 assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
1981                 task_unlock(task);
1982                 thread_block(THREAD_CONTINUE_NULL);
1983
1984                 task_lock(task);
1985         }
1986 }
1987
1988 /*
1989  *      task_duplicate_map_and_threads
1990  *
1991  *      Copy vmmap of source task.
1992  *      Copy active threads from source task to destination task.
1993  *      Source task would be suspended during the copy.
1994  */
1995 kern_return_t
1996 task_duplicate_map_and_threads(
1997                 task_t task,
1998                 void *p,
1999                 task_t new_task,
2000                 thread_t *thread_ret,
2001                 uint64_t **udata_buffer,
2002                 int *size,
2003                 int *num_udata)
2004 {
2005         kern_return_t kr = KERN_SUCCESS;
2006         int active;
2007         thread_t thread, self, thread_return = THREAD_NULL;
2008         thread_t new_thread = THREAD_NULL;
2009         thread_t *thread_array;
2010         uint32_t active_thread_count = 0, array_count = 0, i;
2011         vm_map_t oldmap;
2012         uint64_t *buffer = NULL;
2013         int buf_size = 0;
2014         int est_knotes = 0, num_knotes = 0;
2015
2016         self = current_thread();
2017
2018         /*
2019          * Suspend the task to copy thread state, use the internal
2020          * variant so that no user-space process can resume
2021          * the task from under us
2022          */
2023         kr = task_suspend_internal(task);
2024         if (kr != KERN_SUCCESS) {
2025                 return kr;
2026         }
2027
2028         if (task->map->disable_vmentry_reuse == TRUE) {
2029                 /*
2030                  * Quite likely GuardMalloc (or some debugging tool)
2031                  * is being used on this task. And it has gone through
2032                  * its limit. Making a corpse will likely encounter
2033                  * a lot of VM entries that will need COW.
2034                  *
2035                  * Skip it.
2036                  */
2037 #if DEVELOPMENT || DEBUG
2038                 memorystatus_abort_vm_map_fork(task);
2039 #endif
2040                 task_resume_internal(task);
2041                 return KERN_FAILURE;
2042         }
2043
2044         /* Check with VM if vm_map_fork is allowed for this task */
2045         if (memorystatus_allowed_vm_map_fork(task)) {
2046
2047                 /* Setup new task's vmmap, switch from parent task's map to it COW map */
2048                 oldmap = new_task->map;
2049                 new_task->map = vm_map_fork(new_task->ledger,
2050                                             task->map,
2051                                             (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
2052                                              VM_MAP_FORK_PRESERVE_PURGEABLE));
2053                 vm_map_deallocate(oldmap);
2054
2055                 /* Get all the udata pointers from kqueue */
2056                 est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
2057                 if (est_knotes > 0) {
2058                         buf_size = (est_knotes + 32) * sizeof(uint64_t);
2059                         buffer = (uint64_t *) kalloc(buf_size);
2060                         num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2061                         if (num_knotes > est_knotes + 32) {
2062                                 num_knotes = est_knotes + 32;
2063                         }
2064                 }
2065         }
2066
2067         active_thread_count = task->active_thread_count;
2068         if (active_thread_count == 0) {
2069                 if (buffer != NULL) {
2070                         kfree(buffer, buf_size);
2071                 }
2072                 task_resume_internal(task);
2073                 return KERN_FAILURE;
2074         }
2075
2076         thread_array = (thread_t *) kalloc(sizeof(thread_t) * active_thread_count);
2077
2078         /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2079         task_lock(task);
2080         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2081                 /* Skip inactive threads */
2082                 active = thread->active;
2083                 if (!active) {
2084                         continue;
2085                 }
2086
2087                 if (array_count >= active_thread_count) {
2088                         break;
2089                 }
2090
2091                 thread_array[array_count++] = thread;
2092                 thread_reference(thread);
2093         }
2094         task_unlock(task);
2095
2096         for (i = 0; i < array_count; i++) {
2097
2098                 kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2099                 if (kr != KERN_SUCCESS) {
2100                         break;
2101                 }
2102
2103                 /* Equivalent of current thread in corpse */
2104                 if (thread_array[i] == self) {
2105                         thread_return = new_thread;
2106                         new_task->crashed_thread_id = thread_tid(new_thread);
2107                 } else {
2108                         /* drop the extra ref returned by thread_create_with_continuation */
2109                         thread_deallocate(new_thread);
2110                 }
2111
2112                 kr = thread_dup2(thread_array[i], new_thread);
2113                 if (kr != KERN_SUCCESS) {
2114                         thread_mtx_lock(new_thread);
2115                         new_thread->corpse_dup = TRUE;
2116                         thread_mtx_unlock(new_thread);
2117                         continue;
2118                 }
2119
2120                 /* Copy thread name */
2121                 bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread);
2122                 thread_copy_resource_info(new_thread, thread_array[i]);
2123         }
2124
2125         task_resume_internal(task);
2126
2127         for (i = 0; i < array_count; i++) {
2128                 thread_deallocate(thread_array[i]);
2129         }
2130         kfree(thread_array, sizeof(thread_t) * active_thread_count);
2131
2132         if (kr == KERN_SUCCESS) {
2133                 *thread_ret = thread_return;
2134                 *udata_buffer = buffer;
2135                 *size = buf_size;
2136                 *num_udata = num_knotes;
2137         } else {
2138                 if (thread_return != THREAD_NULL) {
2139                         thread_deallocate(thread_return);
2140                 }
2141                 if (buffer != NULL) {
2142                         kfree(buffer, buf_size);
2143                 }
2144         }
2145
2146         return kr;
2147 }
2148
2149 #if CONFIG_SECLUDED_MEMORY
2150 extern void task_set_can_use_secluded_mem_locked(
2151         task_t          task,
2152         boolean_t       can_use_secluded_mem);
2153 #endif /* CONFIG_SECLUDED_MEMORY */
2154
2155 kern_return_t
2156 task_terminate_internal(
2157         task_t                  task)
2158 {
2159         thread_t                        thread, self;
2160         task_t                          self_task;
2161         boolean_t                       interrupt_save;
2162         int                             pid = 0;
2163
2164         assert(task != kernel_task);
2165
2166         self = current_thread();
2167         self_task = self->task;
2168
2169         /*
2170          *      Get the task locked and make sure that we are not racing
2171          *      with someone else trying to terminate us.
2172          */
2173         if (task == self_task)
2174                 task_lock(task);
2175         else
2176         if (task < self_task) {
2177                 task_lock(task);
2178                 task_lock(self_task);
2179         }
2180         else {
2181                 task_lock(self_task);
2182                 task_lock(task);
2183         }
2184
2185 #if CONFIG_SECLUDED_MEMORY
2186         if (task->task_can_use_secluded_mem) {
2187                 task_set_can_use_secluded_mem_locked(task, FALSE);
2188         }
2189         task->task_could_use_secluded_mem = FALSE;
2190         task->task_could_also_use_secluded_mem = FALSE;
2191 #endif /* CONFIG_SECLUDED_MEMORY */
2192
2193         if (!task->active) {
2194                 /*
2195                  *      Task is already being terminated.
2196                  *      Just return an error. If we are dying, this will
2197                  *      just get us to our AST special handler and that
2198                  *      will get us to finalize the termination of ourselves.
2199                  */
2200                 task_unlock(task);
2201                 if (self_task != task)
2202                         task_unlock(self_task);
2203
2204                 return (KERN_FAILURE);
2205         }
2206
2207         if (task_corpse_pending_report(task)) {
2208                 /*
2209                  *      Task is marked for reporting as corpse.
2210                  *      Just return an error. This will
2211                  *      just get us to our AST special handler and that
2212                  *      will get us to finish the path to death
2213                  */
2214                 task_unlock(task);
2215                 if (self_task != task)
2216                         task_unlock(self_task);
2217
2218                 return (KERN_FAILURE);
2219         }
2220
2221         if (self_task != task)
2222                 task_unlock(self_task);
2223
2224         /*
2225          * Make sure the current thread does not get aborted out of
2226          * the waits inside these operations.
2227          */
2228         interrupt_save = thread_interrupt_level(THREAD_UNINT);
2229
2230         /*
2231          *      Indicate that we want all the threads to stop executing
2232          *      at user space by holding the task (we would have held
2233          *      each thread independently in thread_terminate_internal -
2234          *      but this way we may be more likely to already find it
2235          *      held there).  Mark the task inactive, and prevent
2236          *      further task operations via the task port.
2237          */
2238         task_hold_locked(task);
2239         task->active = FALSE;
2240         ipc_task_disable(task);
2241
2242 #if CONFIG_TELEMETRY
2243         /*
2244          * Notify telemetry that this task is going away.
2245          */
2246         telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2247 #endif
2248
2249         /*
2250          *      Terminate each thread in the task.
2251          */
2252         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2253                         thread_terminate_internal(thread);
2254         }
2255
2256 #ifdef MACH_BSD
2257         if (task->bsd_info != NULL && !task_is_exec_copy(task)) {
2258                 pid = proc_pid(task->bsd_info);
2259         }
2260 #endif /* MACH_BSD */
2261
2262         task_unlock(task);
2263
2264         proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
2265                              TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
2266
2267         /* Early object reap phase */
2268
2269 // PR-17045188: Revisit implementation
2270 //        task_partial_reap(task, pid);
2271
2272 #if CONFIG_EMBEDDED
2273         /*
2274          * remove all task watchers
2275          */
2276         task_removewatchers(task);
2277
2278 #endif /* CONFIG_EMBEDDED */
2279
2280         /*
2281          *      Destroy all synchronizers owned by the task.
2282          */
2283         task_synchronizer_destroy_all(task);
2284
2285         /*
2286          *      Destroy the IPC space, leaving just a reference for it.
2287          */
2288         ipc_space_terminate(task->itk_space);
2289
2290 #if 00
2291         /* if some ledgers go negative on tear-down again... */
2292         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2293                                          task_ledgers.phys_footprint);
2294         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2295                                          task_ledgers.internal);
2296         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2297                                          task_ledgers.internal_compressed);
2298         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2299                                          task_ledgers.iokit_mapped);
2300         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2301                                          task_ledgers.alternate_accounting);
2302         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2303                                          task_ledgers.alternate_accounting_compressed);
2304 #endif
2305
2306         /*
2307          * If the current thread is a member of the task
2308          * being terminated, then the last reference to
2309          * the task will not be dropped until the thread
2310          * is finally reaped.  To avoid incurring the
2311          * expense of removing the address space regions
2312          * at reap time, we do it explictly here.
2313          */
2314
2315         vm_map_lock(task->map);
2316         vm_map_disable_hole_optimization(task->map);
2317         vm_map_unlock(task->map);
2318
2319 #if MACH_ASSERT
2320         /*
2321          * Identify the pmap's process, in case the pmap ledgers drift
2322          * and we have to report it.
2323          */
2324         char procname[17];
2325         if (task->bsd_info && !task_is_exec_copy(task)) {
2326                 pid = proc_pid(task->bsd_info);
2327                 proc_name_kdp(task, procname, sizeof (procname));
2328         } else {
2329                 pid = 0;
2330                 strlcpy(procname, "<unknown>", sizeof (procname));
2331         }
2332         pmap_set_process(task->map->pmap, pid, procname);
2333 #endif /* MACH_ASSERT */
2334
2335         vm_map_remove(task->map,
2336                       task->map->min_offset,
2337                       task->map->max_offset,
2338                       /*
2339                        * Final cleanup:
2340                        * + no unnesting
2341                        * + remove immutable mappings
2342                        */
2343                       (VM_MAP_REMOVE_NO_UNNESTING |
2344                        VM_MAP_REMOVE_IMMUTABLE));
2345
2346         /* release our shared region */
2347         vm_shared_region_set(task, NULL);
2348
2349
2350         lck_mtx_lock(&tasks_threads_lock);
2351         queue_remove(&tasks, task, task_t, tasks);
2352         queue_enter(&terminated_tasks, task, task_t, tasks);
2353         tasks_count--;
2354         terminated_tasks_count++;
2355         lck_mtx_unlock(&tasks_threads_lock);
2356
2357         /*
2358          * We no longer need to guard against being aborted, so restore
2359          * the previous interruptible state.
2360          */
2361         thread_interrupt_level(interrupt_save);
2362
2363 #if KPC
2364         /* force the task to release all ctrs */
2365         if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS)
2366                 kpc_force_all_ctrs(task, 0);
2367 #endif /* KPC */
2368
2369 #if CONFIG_COALITIONS
2370         /*
2371          * Leave our coalitions. (drop activation but not reference)
2372          */
2373         coalitions_remove_task(task);
2374 #endif
2375
2376         /*
2377          * Get rid of the task active reference on itself.
2378          */
2379         task_deallocate(task);
2380
2381         return (KERN_SUCCESS);
2382 }
2383
2384 void
2385 tasks_system_suspend(boolean_t suspend)
2386 {
2387         task_t task;
2388
2389         lck_mtx_lock(&tasks_threads_lock);
2390         assert(tasks_suspend_state != suspend);
2391         tasks_suspend_state = suspend;
2392         queue_iterate(&tasks, task, task_t, tasks) {
2393                 if (task == kernel_task) {
2394                         continue;
2395                 }
2396                 suspend ? task_suspend_internal(task) : task_resume_internal(task);
2397         }
2398         lck_mtx_unlock(&tasks_threads_lock);
2399 }
2400
2401 /*
2402  * task_start_halt:
2403  *
2404  *      Shut the current task down (except for the current thread) in
2405  *      preparation for dramatic changes to the task (probably exec).
2406  *      We hold the task and mark all other threads in the task for
2407  *      termination.
2408  */
2409 kern_return_t
2410 task_start_halt(task_t task)
2411 {
2412         kern_return_t kr = KERN_SUCCESS;
2413         task_lock(task);
2414         kr = task_start_halt_locked(task, FALSE);
2415         task_unlock(task);
2416         return kr;
2417 }
2418
2419 static kern_return_t
2420 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
2421 {
2422         thread_t thread, self;
2423         uint64_t dispatchqueue_offset;
2424
2425         assert(task != kernel_task);
2426
2427         self = current_thread();
2428
2429         if (task != self->task && !task_is_a_corpse_fork(task))
2430                 return (KERN_INVALID_ARGUMENT);
2431
2432         if (task->halting || !task->active || !self->active) {
2433                 /*
2434                  * Task or current thread is already being terminated.
2435                  * Hurry up and return out of the current kernel context
2436                  * so that we run our AST special handler to terminate
2437                  * ourselves.
2438                  */
2439                 return (KERN_FAILURE);
2440         }
2441
2442         task->halting = TRUE;
2443
2444         /*
2445          * Mark all the threads to keep them from starting any more
2446          * user-level execution.  The thread_terminate_internal code
2447          * would do this on a thread by thread basis anyway, but this
2448          * gives us a better chance of not having to wait there.
2449          */
2450         task_hold_locked(task);
2451         dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
2452
2453         /*
2454          * Terminate all the other threads in the task.
2455          */
2456         queue_iterate(&task->threads, thread, thread_t, task_threads)
2457         {
2458                 if (should_mark_corpse) {
2459                         thread_mtx_lock(thread);
2460                         thread->inspection = TRUE;
2461                         thread_mtx_unlock(thread);
2462                 }
2463                 if (thread != self)
2464                         thread_terminate_internal(thread);
2465         }
2466         task->dispatchqueue_offset = dispatchqueue_offset;
2467
2468         task_release_locked(task);
2469
2470         return KERN_SUCCESS;
2471 }
2472
2473
2474 /*
2475  * task_complete_halt:
2476  *
2477  *      Complete task halt by waiting for threads to terminate, then clean
2478  *      up task resources (VM, port namespace, etc...) and then let the
2479  *      current thread go in the (practically empty) task context.
2480  *
2481  *      Note: task->halting flag is not cleared in order to avoid creation
2482  *      of new thread in old exec'ed task.
2483  */
2484 void
2485 task_complete_halt(task_t task)
2486 {
2487         task_lock(task);
2488         assert(task->halting);
2489         assert(task == current_task());
2490
2491         /*
2492          *      Wait for the other threads to get shut down.
2493          *      When the last other thread is reaped, we'll be
2494          *      woken up.
2495          */
2496         if (task->thread_count > 1) {
2497                 assert_wait((event_t)&task->halting, THREAD_UNINT);
2498                 task_unlock(task);
2499                 thread_block(THREAD_CONTINUE_NULL);
2500         } else {
2501                 task_unlock(task);
2502         }
2503
2504         /*
2505          *      Give the machine dependent code a chance
2506          *      to perform cleanup of task-level resources
2507          *      associated with the current thread before
2508          *      ripping apart the task.
2509          */
2510         machine_task_terminate(task);
2511
2512         /*
2513          *      Destroy all synchronizers owned by the task.
2514          */
2515         task_synchronizer_destroy_all(task);
2516
2517         /*
2518          *      Destroy the contents of the IPC space, leaving just
2519          *      a reference for it.
2520          */
2521         ipc_space_clean(task->itk_space);
2522
2523         /*
2524          * Clean out the address space, as we are going to be
2525          * getting a new one.
2526          */
2527         vm_map_remove(task->map, task->map->min_offset,
2528                       task->map->max_offset,
2529                       /*
2530                        * Final cleanup:
2531                        * + no unnesting
2532                        * + remove immutable mappings
2533                        */
2534                       (VM_MAP_REMOVE_NO_UNNESTING |
2535                        VM_MAP_REMOVE_IMMUTABLE));
2536
2537         /*
2538          * Kick out any IOKitUser handles to the task. At best they're stale,
2539          * at worst someone is racing a SUID exec.
2540          */
2541         iokit_task_terminate(task);
2542 }
2543
2544 /*
2545  *      task_hold_locked:
2546  *
2547  *      Suspend execution of the specified task.
2548  *      This is a recursive-style suspension of the task, a count of
2549  *      suspends is maintained.
2550  *
2551  *      CONDITIONS: the task is locked and active.
2552  */
2553 void
2554 task_hold_locked(
2555         task_t          task)
2556 {
2557         thread_t        thread;
2558
2559         assert(task->active);
2560
2561         if (task->suspend_count++ > 0)
2562                 return;
2563
2564         /*
2565          *      Iterate through all the threads and hold them.
2566          */
2567         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2568                 thread_mtx_lock(thread);
2569                 thread_hold(thread);
2570                 thread_mtx_unlock(thread);
2571         }
2572 }
2573
2574 /*
2575  *      task_hold:
2576  *
2577  *      Same as the internal routine above, except that is must lock
2578  *      and verify that the task is active.  This differs from task_suspend
2579  *      in that it places a kernel hold on the task rather than just a
2580  *      user-level hold.  This keeps users from over resuming and setting
2581  *      it running out from under the kernel.
2582  *
2583  *      CONDITIONS: the caller holds a reference on the task
2584  */
2585 kern_return_t
2586 task_hold(
2587         task_t          task)
2588 {
2589         if (task == TASK_NULL)
2590                 return (KERN_INVALID_ARGUMENT);
2591
2592         task_lock(task);
2593
2594         if (!task->active) {
2595                 task_unlock(task);
2596
2597                 return (KERN_FAILURE);
2598         }
2599
2600         task_hold_locked(task);
2601         task_unlock(task);
2602
2603         return (KERN_SUCCESS);
2604 }
2605
2606 kern_return_t
2607 task_wait(
2608                 task_t          task,
2609                 boolean_t       until_not_runnable)
2610 {
2611         if (task == TASK_NULL)
2612                 return (KERN_INVALID_ARGUMENT);
2613
2614         task_lock(task);
2615
2616         if (!task->active) {
2617                 task_unlock(task);
2618
2619                 return (KERN_FAILURE);
2620         }
2621
2622         task_wait_locked(task, until_not_runnable);
2623         task_unlock(task);
2624
2625         return (KERN_SUCCESS);
2626 }
2627
2628 /*
2629  *      task_wait_locked:
2630  *
2631  *      Wait for all threads in task to stop.
2632  *
2633  * Conditions:
2634  *      Called with task locked, active, and held.
2635  */
2636 void
2637 task_wait_locked(
2638         task_t          task,
2639         boolean_t               until_not_runnable)
2640 {
2641         thread_t        thread, self;
2642
2643         assert(task->active);
2644         assert(task->suspend_count > 0);
2645
2646         self = current_thread();
2647
2648         /*
2649          *      Iterate through all the threads and wait for them to
2650          *      stop.  Do not wait for the current thread if it is within
2651          *      the task.
2652          */
2653         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2654                 if (thread != self)
2655                         thread_wait(thread, until_not_runnable);
2656         }
2657 }
2658
2659 /*
2660  *      task_release_locked:
2661  *
2662  *      Release a kernel hold on a task.
2663  *
2664  *      CONDITIONS: the task is locked and active
2665  */
2666 void
2667 task_release_locked(
2668         task_t          task)
2669 {
2670         thread_t        thread;
2671
2672         assert(task->active);
2673         assert(task->suspend_count > 0);
2674
2675         if (--task->suspend_count > 0)
2676                 return;
2677
2678         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2679                 thread_mtx_lock(thread);
2680                 thread_release(thread);
2681                 thread_mtx_unlock(thread);
2682         }
2683 }
2684
2685 /*
2686  *      task_release:
2687  *
2688  *      Same as the internal routine above, except that it must lock
2689  *      and verify that the task is active.
2690  *
2691  *      CONDITIONS: The caller holds a reference to the task
2692  */
2693 kern_return_t
2694 task_release(
2695         task_t          task)
2696 {
2697         if (task == TASK_NULL)
2698                 return (KERN_INVALID_ARGUMENT);
2699
2700         task_lock(task);
2701
2702         if (!task->active) {
2703                 task_unlock(task);
2704
2705                 return (KERN_FAILURE);
2706         }
2707
2708         task_release_locked(task);
2709         task_unlock(task);
2710
2711         return (KERN_SUCCESS);
2712 }
2713
2714 kern_return_t
2715 task_threads(
2716         task_t                                  task,
2717         thread_act_array_t              *threads_out,
2718         mach_msg_type_number_t  *count)
2719 {
2720         mach_msg_type_number_t  actual;
2721         thread_t                                *thread_list;
2722         thread_t                                thread;
2723         vm_size_t                               size, size_needed;
2724         void                                    *addr;
2725         unsigned int                    i, j;
2726
2727         if (task == TASK_NULL)
2728                 return (KERN_INVALID_ARGUMENT);
2729
2730         size = 0; addr = NULL;
2731
2732         for (;;) {
2733                 task_lock(task);
2734                 if (!task->active) {
2735                         task_unlock(task);
2736
2737                         if (size != 0)
2738                                 kfree(addr, size);
2739
2740                         return (KERN_FAILURE);
2741                 }
2742
2743                 actual = task->thread_count;
2744
2745                 /* do we have the memory we need? */
2746                 size_needed = actual * sizeof (mach_port_t);
2747                 if (size_needed <= size)
2748                         break;
2749
2750                 /* unlock the task and allocate more memory */
2751                 task_unlock(task);
2752
2753                 if (size != 0)
2754                         kfree(addr, size);
2755
2756                 assert(size_needed > 0);
2757                 size = size_needed;
2758
2759                 addr = kalloc(size);
2760                 if (addr == 0)
2761                         return (KERN_RESOURCE_SHORTAGE);
2762         }
2763
2764         /* OK, have memory and the task is locked & active */
2765         thread_list = (thread_t *)addr;
2766
2767         i = j = 0;
2768
2769         for (thread = (thread_t)queue_first(&task->threads); i < actual;
2770                                 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
2771                 thread_reference_internal(thread);
2772                 thread_list[j++] = thread;
2773         }
2774
2775         assert(queue_end(&task->threads, (queue_entry_t)thread));
2776
2777         actual = j;
2778         size_needed = actual * sizeof (mach_port_t);
2779
2780         /* can unlock task now that we've got the thread refs */
2781         task_unlock(task);
2782
2783         if (actual == 0) {
2784                 /* no threads, so return null pointer and deallocate memory */
2785
2786                 *threads_out = NULL;
2787                 *count = 0;
2788
2789                 if (size != 0)
2790                         kfree(addr, size);
2791         }
2792         else {
2793                 /* if we allocated too much, must copy */
2794
2795                 if (size_needed < size) {
2796                         void *newaddr;
2797
2798                         newaddr = kalloc(size_needed);
2799                         if (newaddr == 0) {
2800                                 for (i = 0; i < actual; ++i)
2801                                         thread_deallocate(thread_list[i]);
2802                                 kfree(addr, size);
2803                                 return (KERN_RESOURCE_SHORTAGE);
2804                         }
2805
2806                         bcopy(addr, newaddr, size_needed);
2807                         kfree(addr, size);
2808                         thread_list = (thread_t *)newaddr;
2809                 }
2810
2811                 *threads_out = thread_list;
2812                 *count = actual;
2813
2814                 /* do the conversion that Mig should handle */
2815
2816                 for (i = 0; i < actual; ++i)
2817                         ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
2818         }
2819
2820         return (KERN_SUCCESS);
2821 }
2822
2823 #define TASK_HOLD_NORMAL        0
2824 #define TASK_HOLD_PIDSUSPEND    1
2825 #define TASK_HOLD_LEGACY        2
2826 #define TASK_HOLD_LEGACY_ALL    3
2827
2828 static kern_return_t
2829 place_task_hold    (
2830         task_t task,
2831         int mode)
2832 {
2833         if (!task->active && !task_is_a_corpse(task)) {
2834                 return (KERN_FAILURE);
2835         }
2836
2837         /* Return success for corpse task */
2838         if (task_is_a_corpse(task)) {
2839                 return KERN_SUCCESS;
2840         }
2841
2842         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2843             MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
2844             task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2845             task->user_stop_count, task->user_stop_count + 1, 0);
2846
2847 #if MACH_ASSERT
2848         current_task()->suspends_outstanding++;
2849 #endif
2850
2851         if (mode == TASK_HOLD_LEGACY)
2852                 task->legacy_stop_count++;
2853
2854         if (task->user_stop_count++ > 0) {
2855                 /*
2856                  *      If the stop count was positive, the task is
2857                  *      already stopped and we can exit.
2858                  */
2859                 return (KERN_SUCCESS);
2860         }
2861
2862         /*
2863          * Put a kernel-level hold on the threads in the task (all
2864          * user-level task suspensions added together represent a
2865          * single kernel-level hold).  We then wait for the threads
2866          * to stop executing user code.
2867          */
2868         task_hold_locked(task);
2869         task_wait_locked(task, FALSE);
2870
2871         return (KERN_SUCCESS);
2872 }
2873
2874 static kern_return_t
2875 release_task_hold    (
2876         task_t          task,
2877         int                     mode)
2878 {
2879         boolean_t release = FALSE;
2880
2881         if (!task->active && !task_is_a_corpse(task)) {
2882                 return (KERN_FAILURE);
2883         }
2884
2885         /* Return success for corpse task */
2886         if (task_is_a_corpse(task)) {
2887                 return KERN_SUCCESS;
2888         }
2889
2890         if (mode == TASK_HOLD_PIDSUSPEND) {
2891             if (task->pidsuspended == FALSE) {
2892                     return (KERN_FAILURE);
2893             }
2894             task->pidsuspended = FALSE;
2895         }
2896
2897         if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
2898
2899                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2900                     MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
2901                     task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2902                     task->user_stop_count, mode, task->legacy_stop_count);
2903
2904 #if MACH_ASSERT
2905                 /*
2906                  * This is obviously not robust; if we suspend one task and then resume a different one,
2907                  * we'll fly under the radar. This is only meant to catch the common case of a crashed
2908                  * or buggy suspender.
2909                  */
2910                 current_task()->suspends_outstanding--;
2911 #endif
2912
2913                 if (mode == TASK_HOLD_LEGACY_ALL) {
2914                         if (task->legacy_stop_count >= task->user_stop_count) {
2915                                 task->user_stop_count = 0;
2916                                 release = TRUE;
2917                         } else {
2918                                 task->user_stop_count -= task->legacy_stop_count;
2919                         }
2920                         task->legacy_stop_count = 0;
2921                 } else {
2922                         if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
2923                                 task->legacy_stop_count--;
2924                         if (--task->user_stop_count == 0)
2925                                 release = TRUE;
2926                 }
2927         }
2928         else {
2929                 return (KERN_FAILURE);
2930         }
2931
2932         /*
2933          *      Release the task if necessary.
2934          */
2935         if (release)
2936                 task_release_locked(task);
2937
2938     return (KERN_SUCCESS);
2939 }
2940
2941
2942 /*
2943  *      task_suspend:
2944  *
2945  *      Implement an (old-fashioned) user-level suspension on a task.
2946  *
2947  *      Because the user isn't expecting to have to manage a suspension
2948  *      token, we'll track it for him in the kernel in the form of a naked
2949  *      send right to the task's resume port.  All such send rights
2950  *      account for a single suspension against the task (unlike task_suspend2()
2951  *      where each caller gets a unique suspension count represented by a
2952  *      unique send-once right).
2953  *
2954  * Conditions:
2955  *      The caller holds a reference to the task
2956  */
2957 kern_return_t
2958 task_suspend(
2959         task_t          task)
2960 {
2961         kern_return_t                   kr;
2962         mach_port_t                     port, send, old_notify;
2963         mach_port_name_t                name;
2964
2965         if (task == TASK_NULL || task == kernel_task)
2966                 return (KERN_INVALID_ARGUMENT);
2967
2968         task_lock(task);
2969
2970         /*
2971          * Claim a send right on the task resume port, and request a no-senders
2972          * notification on that port (if none outstanding).
2973          */
2974         if (task->itk_resume == IP_NULL) {
2975                 task->itk_resume = ipc_port_alloc_kernel();
2976                 if (!IP_VALID(task->itk_resume))
2977                         panic("failed to create resume port");
2978                 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
2979         }
2980
2981         port = task->itk_resume;
2982         ip_lock(port);
2983         assert(ip_active(port));
2984
2985         send = ipc_port_make_send_locked(port);
2986         assert(IP_VALID(send));
2987
2988         if (port->ip_nsrequest == IP_NULL) {
2989                 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2990                 assert(old_notify == IP_NULL);
2991                 /* port unlocked */
2992         } else {
2993                 ip_unlock(port);
2994         }
2995
2996         /*
2997          * place a legacy hold on the task.
2998          */
2999         kr = place_task_hold(task, TASK_HOLD_LEGACY);
3000         if (kr != KERN_SUCCESS) {
3001                 task_unlock(task);
3002                 ipc_port_release_send(send);
3003                 return kr;
3004         }
3005
3006         task_unlock(task);
3007
3008         /*
3009          * Copyout the send right into the calling task's IPC space.  It won't know it is there,
3010          * but we'll look it up when calling a traditional resume.  Any IPC operations that
3011          * deallocate the send right will auto-release the suspension.
3012          */
3013         if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
3014                 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
3015                 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
3016                                 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3017                                 task_pid(task), kr);
3018                 return (kr);
3019         }
3020
3021         return (kr);
3022 }
3023
3024 /*
3025  *      task_resume:
3026  *              Release a user hold on a task.
3027  *
3028  * Conditions:
3029  *              The caller holds a reference to the task
3030  */
3031 kern_return_t
3032 task_resume(
3033         task_t  task)
3034 {
3035         kern_return_t    kr;
3036         mach_port_name_t resume_port_name;
3037         ipc_entry_t              resume_port_entry;
3038         ipc_space_t              space = current_task()->itk_space;
3039
3040         if (task == TASK_NULL || task == kernel_task )
3041                 return (KERN_INVALID_ARGUMENT);
3042
3043         /* release a legacy task hold */
3044         task_lock(task);
3045         kr = release_task_hold(task, TASK_HOLD_LEGACY);
3046         task_unlock(task);
3047
3048         is_write_lock(space);
3049         if (is_active(space) && IP_VALID(task->itk_resume) &&
3050             ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
3051                 /*
3052                  * We found a suspension token in the caller's IPC space. Release a send right to indicate that
3053                  * we are holding one less legacy hold on the task from this caller.  If the release failed,
3054                  * go ahead and drop all the rights, as someone either already released our holds or the task
3055                  * is gone.
3056                  */
3057                 if (kr == KERN_SUCCESS)
3058                         ipc_right_dealloc(space, resume_port_name, resume_port_entry);
3059                 else
3060                         ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
3061                 /* space unlocked */
3062         } else {
3063                 is_write_unlock(space);
3064                 if (kr == KERN_SUCCESS)
3065                         printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
3066                                proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3067                                task_pid(task));
3068         }
3069
3070         return kr;
3071 }
3072
3073 /*
3074  * Suspend the target task.
3075  * Making/holding a token/reference/port is the callers responsibility.
3076  */
3077 kern_return_t
3078 task_suspend_internal(task_t task)
3079 {
3080         kern_return_t    kr;
3081
3082         if (task == TASK_NULL || task == kernel_task)
3083                 return (KERN_INVALID_ARGUMENT);
3084
3085         task_lock(task);
3086         kr = place_task_hold(task, TASK_HOLD_NORMAL);
3087         task_unlock(task);
3088         return (kr);
3089 }
3090
3091 /*
3092  * Suspend the target task, and return a suspension token. The token
3093  * represents a reference on the suspended task.
3094  */
3095 kern_return_t
3096 task_suspend2(
3097         task_t                  task,
3098         task_suspension_token_t *suspend_token)
3099 {
3100         kern_return_t    kr;
3101
3102         kr = task_suspend_internal(task);
3103         if (kr != KERN_SUCCESS) {
3104                 *suspend_token = TASK_NULL;
3105                 return (kr);
3106         }
3107
3108         /*
3109          * Take a reference on the target task and return that to the caller
3110          * as a "suspension token," which can be converted into an SO right to
3111          * the now-suspended task's resume port.
3112          */
3113         task_reference_internal(task);
3114         *suspend_token = task;
3115
3116         return (KERN_SUCCESS);
3117 }
3118
3119 /*
3120  * Resume the task
3121  * (reference/token/port management is caller's responsibility).
3122  */
3123 kern_return_t
3124 task_resume_internal(
3125         task_suspension_token_t         task)
3126 {
3127         kern_return_t kr;
3128
3129         if (task == TASK_NULL || task == kernel_task)
3130                 return (KERN_INVALID_ARGUMENT);
3131
3132         task_lock(task);
3133         kr = release_task_hold(task, TASK_HOLD_NORMAL);
3134         task_unlock(task);
3135         return (kr);
3136 }
3137
3138 /*
3139  * Resume the task using a suspension token. Consumes the token's ref.
3140  */
3141 kern_return_t
3142 task_resume2(
3143         task_suspension_token_t         task)
3144 {
3145         kern_return_t kr;
3146
3147         kr = task_resume_internal(task);
3148         task_suspension_token_deallocate(task);
3149
3150         return (kr);
3151 }
3152
3153 boolean_t
3154 task_suspension_notify(mach_msg_header_t *request_header)
3155 {
3156         ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
3157         task_t task = convert_port_to_task_suspension_token(port);
3158         mach_msg_type_number_t not_count;
3159
3160         if (task == TASK_NULL || task == kernel_task)
3161                 return TRUE;  /* nothing to do */
3162
3163         switch (request_header->msgh_id) {
3164
3165         case MACH_NOTIFY_SEND_ONCE:
3166                 /* release the hold held by this specific send-once right */
3167                 task_lock(task);
3168                 release_task_hold(task, TASK_HOLD_NORMAL);
3169                 task_unlock(task);
3170                 break;
3171
3172         case MACH_NOTIFY_NO_SENDERS:
3173                 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
3174
3175                 task_lock(task);
3176                 ip_lock(port);
3177                 if (port->ip_mscount == not_count) {
3178
3179                         /* release all the [remaining] outstanding legacy holds */
3180                         assert(port->ip_nsrequest == IP_NULL);
3181                         ip_unlock(port);
3182                         release_task_hold(task, TASK_HOLD_LEGACY_ALL);
3183                         task_unlock(task);
3184
3185                 } else if (port->ip_nsrequest == IP_NULL) {
3186                         ipc_port_t old_notify;
3187
3188                         task_unlock(task);
3189                         /* new send rights, re-arm notification at current make-send count */
3190                         ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3191                         assert(old_notify == IP_NULL);
3192                         /* port unlocked */
3193                 } else {
3194                         ip_unlock(port);
3195                         task_unlock(task);
3196                 }
3197                 break;
3198
3199         default:
3200                 break;
3201         }
3202
3203         task_suspension_token_deallocate(task); /* drop token reference */
3204         return TRUE;
3205 }
3206
3207 kern_return_t
3208 task_pidsuspend_locked(task_t task)
3209 {
3210         kern_return_t kr;
3211
3212         if (task->pidsuspended) {
3213                 kr = KERN_FAILURE;
3214                 goto out;
3215         }
3216
3217         task->pidsuspended = TRUE;
3218
3219         kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
3220         if (kr != KERN_SUCCESS) {
3221                 task->pidsuspended = FALSE;
3222         }
3223 out:
3224         return(kr);
3225 }
3226
3227
3228 /*
3229  *      task_pidsuspend:
3230  *
3231  *      Suspends a task by placing a hold on its threads.
3232  *
3233  * Conditions:
3234  *      The caller holds a reference to the task
3235  */
3236 kern_return_t
3237 task_pidsuspend(
3238         task_t          task)
3239 {
3240         kern_return_t    kr;
3241
3242         if (task == TASK_NULL || task == kernel_task)
3243                 return (KERN_INVALID_ARGUMENT);
3244
3245         task_lock(task);
3246
3247         kr = task_pidsuspend_locked(task);
3248
3249         task_unlock(task);
3250
3251         return (kr);
3252 }
3253
3254 /*
3255  *      task_pidresume:
3256  *              Resumes a previously suspended task.
3257  *
3258  * Conditions:
3259  *              The caller holds a reference to the task
3260  */
3261 kern_return_t
3262 task_pidresume(
3263         task_t  task)
3264 {
3265         kern_return_t    kr;
3266
3267         if (task == TASK_NULL || task == kernel_task)
3268                 return (KERN_INVALID_ARGUMENT);
3269
3270         task_lock(task);
3271
3272 #if CONFIG_FREEZE
3273
3274         while (task->changing_freeze_state) {
3275
3276                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3277                 task_unlock(task);
3278                 thread_block(THREAD_CONTINUE_NULL);
3279
3280                 task_lock(task);
3281         }
3282         task->changing_freeze_state = TRUE;
3283 #endif
3284
3285         kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
3286
3287         task_unlock(task);
3288
3289 #if CONFIG_FREEZE
3290
3291         task_lock(task);
3292
3293         if (kr == KERN_SUCCESS)
3294                 task->frozen = FALSE;
3295         task->changing_freeze_state = FALSE;
3296         thread_wakeup(&task->changing_freeze_state);
3297
3298         task_unlock(task);
3299 #endif
3300
3301         return (kr);
3302 }
3303
3304
3305 #if DEVELOPMENT || DEBUG
3306
3307 extern void IOSleep(int);
3308
3309 kern_return_t
3310 task_disconnect_page_mappings(task_t task)
3311 {
3312         int     n;
3313
3314         if (task == TASK_NULL || task == kernel_task)
3315                 return (KERN_INVALID_ARGUMENT);
3316
3317         /*
3318          * this function is used to strip all of the mappings from
3319          * the pmap for the specified task to force the task to
3320          * re-fault all of the pages it is actively using... this
3321          * allows us to approximate the true working set of the
3322          * specified task.  We only engage if at least 1 of the
3323          * threads in the task is runnable, but we want to continuously
3324          * sweep (at least for a while - I've arbitrarily set the limit at
3325          * 100 sweeps to be re-looked at as we gain experience) to get a better
3326          * view into what areas within a page are being visited (as opposed to only
3327          * seeing the first fault of a page after the task becomes
3328          * runnable)...  in the future I may
3329          * try to block until awakened by a thread in this task
3330          * being made runnable, but for now we'll periodically poll from the
3331          * user level debug tool driving the sysctl
3332          */
3333         for (n = 0; n < 100; n++) {
3334                 thread_t        thread;
3335                 boolean_t       runnable;
3336                 boolean_t       do_unnest;
3337                 int             page_count;
3338
3339                 runnable = FALSE;
3340                 do_unnest = FALSE;
3341
3342                 task_lock(task);
3343
3344                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3345
3346                         if (thread->state & TH_RUN) {
3347                                 runnable = TRUE;
3348                                 break;
3349                         }
3350                 }
3351                 if (n == 0)
3352                         task->task_disconnected_count++;
3353
3354                 if (task->task_unnested == FALSE) {
3355                         if (runnable == TRUE) {
3356                                 task->task_unnested = TRUE;
3357                                 do_unnest = TRUE;
3358                         }
3359                 }
3360                 task_unlock(task);
3361
3362                 if (runnable == FALSE)
3363                         break;
3364
3365                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
3366                                           task, do_unnest, task->task_disconnected_count, 0, 0);
3367
3368                 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
3369
3370                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
3371                                           task, page_count, 0, 0, 0);
3372
3373                 if ((n % 5) == 4)
3374                         IOSleep(1);
3375         }
3376         return (KERN_SUCCESS);
3377 }
3378
3379 #endif
3380
3381
3382 #if CONFIG_FREEZE
3383
3384 /*
3385  *      task_freeze:
3386  *
3387  *      Freeze a task.
3388  *
3389  * Conditions:
3390  *      The caller holds a reference to the task
3391  */
3392 extern void             vm_wake_compactor_swapper(void);
3393 extern queue_head_t     c_swapout_list_head;
3394
3395 kern_return_t
3396 task_freeze(
3397         task_t    task,
3398         uint32_t           *purgeable_count,
3399         uint32_t           *wired_count,
3400         uint32_t           *clean_count,
3401         uint32_t           *dirty_count,
3402         uint32_t           dirty_budget,
3403         boolean_t          *shared,
3404         boolean_t          walk_only)
3405 {
3406         kern_return_t kr = KERN_SUCCESS;
3407
3408         if (task == TASK_NULL || task == kernel_task)
3409                 return (KERN_INVALID_ARGUMENT);
3410
3411         task_lock(task);
3412
3413         while (task->changing_freeze_state) {
3414
3415                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3416                 task_unlock(task);
3417                 thread_block(THREAD_CONTINUE_NULL);
3418
3419                 task_lock(task);
3420         }
3421         if (task->frozen) {
3422                 task_unlock(task);
3423                 return (KERN_FAILURE);
3424         }
3425         task->changing_freeze_state = TRUE;
3426
3427         task_unlock(task);
3428
3429         if (walk_only) {
3430                 panic("task_freeze - walk_only == TRUE");
3431         } else {
3432                 kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
3433         }
3434
3435         task_lock(task);
3436
3437         if (walk_only == FALSE && kr == KERN_SUCCESS)
3438                 task->frozen = TRUE;
3439         task->changing_freeze_state = FALSE;
3440         thread_wakeup(&task->changing_freeze_state);
3441
3442         task_unlock(task);
3443
3444         if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3445                 vm_wake_compactor_swapper();
3446                 /*
3447                  * We do an explicit wakeup of the swapout thread here
3448                  * because the compact_and_swap routines don't have
3449                  * knowledge about these kind of "per-task packed c_segs"
3450                  * and so will not be evaluating whether we need to do
3451                  * a wakeup there.
3452                  */
3453                 thread_wakeup((event_t)&c_swapout_list_head);
3454         }
3455
3456         return (kr);
3457 }
3458
3459 /*
3460  *      task_thaw:
3461  *
3462  *      Thaw a currently frozen task.
3463  *
3464  * Conditions:
3465  *      The caller holds a reference to the task
3466  */
3467 kern_return_t
3468 task_thaw(
3469         task_t          task)
3470 {
3471         if (task == TASK_NULL || task == kernel_task)
3472                 return (KERN_INVALID_ARGUMENT);
3473
3474         task_lock(task);
3475
3476         while (task->changing_freeze_state) {
3477
3478                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3479                 task_unlock(task);
3480                 thread_block(THREAD_CONTINUE_NULL);
3481
3482                 task_lock(task);
3483         }
3484         if (!task->frozen) {
3485                 task_unlock(task);
3486                 return (KERN_FAILURE);
3487         }
3488         task->frozen = FALSE;
3489
3490         task_unlock(task);
3491
3492         return (KERN_SUCCESS);
3493 }
3494
3495 #endif /* CONFIG_FREEZE */
3496
3497 kern_return_t
3498 host_security_set_task_token(
3499         host_security_t  host_security,
3500         task_t           task,
3501         security_token_t sec_token,
3502         audit_token_t    audit_token,
3503         host_priv_t      host_priv)
3504 {
3505         ipc_port_t       host_port;
3506         kern_return_t    kr;
3507
3508         if (task == TASK_NULL)
3509                 return(KERN_INVALID_ARGUMENT);
3510
3511         if (host_security == HOST_NULL)
3512                 return(KERN_INVALID_SECURITY);
3513
3514         task_lock(task);
3515         task->sec_token = sec_token;
3516         task->audit_token = audit_token;
3517
3518         task_unlock(task);
3519
3520         if (host_priv != HOST_PRIV_NULL) {
3521                 kr = host_get_host_priv_port(host_priv, &host_port);
3522         } else {
3523                 kr = host_get_host_port(host_priv_self(), &host_port);
3524         }
3525         assert(kr == KERN_SUCCESS);
3526         kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
3527         return(kr);
3528 }
3529
3530 kern_return_t
3531 task_send_trace_memory(
3532         task_t        target_task,
3533         __unused uint32_t pid,
3534         __unused uint64_t uniqueid)
3535 {
3536         kern_return_t kr = KERN_INVALID_ARGUMENT;
3537         if (target_task == TASK_NULL)
3538                 return (KERN_INVALID_ARGUMENT);
3539
3540 #if CONFIG_ATM
3541         kr = atm_send_proc_inspect_notification(target_task,
3542                                   pid,
3543                                   uniqueid);
3544
3545 #endif
3546         return (kr);
3547 }
3548 /*
3549  * This routine was added, pretty much exclusively, for registering the
3550  * RPC glue vector for in-kernel short circuited tasks.  Rather than
3551  * removing it completely, I have only disabled that feature (which was
3552  * the only feature at the time).  It just appears that we are going to
3553  * want to add some user data to tasks in the future (i.e. bsd info,
3554  * task names, etc...), so I left it in the formal task interface.
3555  */
3556 kern_return_t
3557 task_set_info(
3558         task_t          task,
3559         task_flavor_t   flavor,
3560         __unused task_info_t    task_info_in,           /* pointer to IN array */
3561         __unused mach_msg_type_number_t task_info_count)
3562 {
3563         if (task == TASK_NULL)
3564                 return(KERN_INVALID_ARGUMENT);
3565
3566         switch (flavor) {
3567
3568 #if CONFIG_ATM
3569                 case TASK_TRACE_MEMORY_INFO:
3570                 {
3571                         if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
3572                                 return (KERN_INVALID_ARGUMENT);
3573
3574                         assert(task_info_in != NULL);
3575                         task_trace_memory_info_t mem_info;
3576                         mem_info = (task_trace_memory_info_t) task_info_in;
3577                         kern_return_t kr = atm_register_trace_memory(task,
3578                                                 mem_info->user_memory_address,
3579                                                 mem_info->buffer_size);
3580                         return kr;
3581                 }
3582
3583 #endif
3584             default:
3585                 return (KERN_INVALID_ARGUMENT);
3586         }
3587         return (KERN_SUCCESS);
3588 }
3589
3590 int radar_20146450 = 1;
3591 kern_return_t
3592 task_info(
3593         task_t                  task,
3594         task_flavor_t           flavor,
3595         task_info_t             task_info_out,
3596         mach_msg_type_number_t  *task_info_count)
3597 {
3598         kern_return_t error = KERN_SUCCESS;
3599         mach_msg_type_number_t  original_task_info_count;
3600
3601         if (task == TASK_NULL)
3602                 return (KERN_INVALID_ARGUMENT);
3603
3604         original_task_info_count = *task_info_count;
3605         task_lock(task);
3606
3607         if ((task != current_task()) && (!task->active)) {
3608                 task_unlock(task);
3609                 return (KERN_INVALID_ARGUMENT);
3610         }
3611
3612         switch (flavor) {
3613
3614         case TASK_BASIC_INFO_32:
3615         case TASK_BASIC2_INFO_32:
3616 #if defined(__arm__) || defined(__arm64__)
3617         case TASK_BASIC_INFO_64:
3618 #endif
3619         {
3620                 task_basic_info_32_t    basic_info;
3621                 vm_map_t                                map;
3622                 clock_sec_t                             secs;
3623                 clock_usec_t                    usecs;
3624
3625                 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
3626                     error = KERN_INVALID_ARGUMENT;
3627                     break;
3628                 }
3629
3630                 basic_info = (task_basic_info_32_t)task_info_out;
3631
3632                 map = (task == kernel_task)? kernel_map: task->map;
3633                 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
3634                 if (flavor == TASK_BASIC2_INFO_32) {
3635                         /*
3636                          * The "BASIC2" flavor gets the maximum resident
3637                          * size instead of the current resident size...
3638                          */
3639                         basic_info->resident_size = pmap_resident_max(map->pmap);
3640                 } else {
3641                         basic_info->resident_size = pmap_resident_count(map->pmap);
3642                 }
3643                 basic_info->resident_size *= PAGE_SIZE;
3644
3645                 basic_info->policy = ((task != kernel_task)?
3646                                                                                   POLICY_TIMESHARE: POLICY_RR);
3647                 basic_info->suspend_count = task->user_stop_count;
3648
3649                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3650                 basic_info->user_time.seconds =
3651                         (typeof(basic_info->user_time.seconds))secs;
3652                 basic_info->user_time.microseconds = usecs;
3653
3654                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3655                 basic_info->system_time.seconds =
3656                         (typeof(basic_info->system_time.seconds))secs;
3657                 basic_info->system_time.microseconds = usecs;
3658
3659                 *task_info_count = TASK_BASIC_INFO_32_COUNT;
3660                 break;
3661         }
3662
3663 #if defined(__arm__) || defined(__arm64__)
3664         case TASK_BASIC_INFO_64_2:
3665         {
3666                 task_basic_info_64_2_t  basic_info;
3667                 vm_map_t                                map;
3668                 clock_sec_t                             secs;
3669                 clock_usec_t                    usecs;
3670
3671                 if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
3672                     error = KERN_INVALID_ARGUMENT;
3673                     break;
3674                 }
3675
3676                 basic_info = (task_basic_info_64_2_t)task_info_out;
3677
3678                 map = (task == kernel_task)? kernel_map: task->map;
3679                 basic_info->virtual_size  = map->size;
3680                 basic_info->resident_size =
3681                         (mach_vm_size_t)(pmap_resident_count(map->pmap))
3682                         * PAGE_SIZE_64;
3683
3684                 basic_info->policy = ((task != kernel_task)?
3685                                                                                   POLICY_TIMESHARE: POLICY_RR);
3686                 basic_info->suspend_count = task->user_stop_count;
3687
3688                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3689                 basic_info->user_time.seconds =
3690                         (typeof(basic_info->user_time.seconds))secs;
3691                 basic_info->user_time.microseconds = usecs;
3692
3693                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3694                 basic_info->system_time.seconds =
3695                         (typeof(basic_info->system_time.seconds))secs;
3696                 basic_info->system_time.microseconds = usecs;
3697
3698                 *task_info_count = TASK_BASIC_INFO_64_2_COUNT;
3699                 break;
3700         }
3701
3702 #else /* defined(__arm__) || defined(__arm64__) */
3703         case TASK_BASIC_INFO_64:
3704         {
3705                 task_basic_info_64_t    basic_info;
3706                 vm_map_t                                map;
3707                 clock_sec_t                             secs;
3708                 clock_usec_t                    usecs;
3709
3710                 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
3711                     error = KERN_INVALID_ARGUMENT;
3712                     break;
3713                 }
3714
3715                 basic_info = (task_basic_info_64_t)task_info_out;
3716
3717                 map = (task == kernel_task)? kernel_map: task->map;
3718                 basic_info->virtual_size  = map->size;
3719                 basic_info->resident_size =
3720                         (mach_vm_size_t)(pmap_resident_count(map->pmap))
3721                         * PAGE_SIZE_64;
3722
3723                 basic_info->policy = ((task != kernel_task)?
3724                                                                                   POLICY_TIMESHARE: POLICY_RR);
3725                 basic_info->suspend_count = task->user_stop_count;
3726
3727                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3728                 basic_info->user_time.seconds =
3729                         (typeof(basic_info->user_time.seconds))secs;
3730                 basic_info->user_time.microseconds = usecs;
3731
3732                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3733                 basic_info->system_time.seconds =
3734                         (typeof(basic_info->system_time.seconds))secs;
3735                 basic_info->system_time.microseconds = usecs;
3736
3737                 *task_info_count = TASK_BASIC_INFO_64_COUNT;
3738                 break;
3739         }
3740 #endif /* defined(__arm__) || defined(__arm64__) */
3741
3742         case MACH_TASK_BASIC_INFO:
3743         {
3744                 mach_task_basic_info_t  basic_info;
3745                 vm_map_t                map;
3746                 clock_sec_t             secs;
3747                 clock_usec_t            usecs;
3748
3749                 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
3750                     error = KERN_INVALID_ARGUMENT;
3751                     break;
3752                 }
3753
3754                 basic_info = (mach_task_basic_info_t)task_info_out;
3755
3756                 map = (task == kernel_task) ? kernel_map : task->map;
3757
3758                 basic_info->virtual_size  = map->size;
3759
3760                 basic_info->resident_size =
3761                     (mach_vm_size_t)(pmap_resident_count(map->pmap));
3762                 basic_info->resident_size *= PAGE_SIZE_64;
3763
3764                 basic_info->resident_size_max =
3765                     (mach_vm_size_t)(pmap_resident_max(map->pmap));
3766                 basic_info->resident_size_max *= PAGE_SIZE_64;
3767
3768                 basic_info->policy = ((task != kernel_task) ?
3769                                       POLICY_TIMESHARE : POLICY_RR);
3770
3771                 basic_info->suspend_count = task->user_stop_count;
3772
3773                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3774                 basic_info->user_time.seconds =
3775                     (typeof(basic_info->user_time.seconds))secs;
3776                 basic_info->user_time.microseconds = usecs;
3777
3778                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3779                 basic_info->system_time.seconds =
3780                     (typeof(basic_info->system_time.seconds))secs;
3781                 basic_info->system_time.microseconds = usecs;
3782
3783                 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
3784                 break;
3785         }
3786
3787         case TASK_THREAD_TIMES_INFO:
3788         {
3789                 task_thread_times_info_t        times_info;
3790                 thread_t                                        thread;
3791
3792                 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
3793                     error = KERN_INVALID_ARGUMENT;
3794                     break;
3795                 }
3796
3797                 times_info = (task_thread_times_info_t) task_info_out;
3798                 times_info->user_time.seconds = 0;
3799                 times_info->user_time.microseconds = 0;
3800                 times_info->system_time.seconds = 0;
3801                 times_info->system_time.microseconds = 0;
3802
3803
3804                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3805                         time_value_t    user_time, system_time;
3806
3807                         if (thread->options & TH_OPT_IDLE_THREAD)
3808                                 continue;
3809
3810                         thread_read_times(thread, &user_time, &system_time);
3811
3812                         time_value_add(&times_info->user_time, &user_time);
3813                         time_value_add(&times_info->system_time, &system_time);
3814                 }
3815
3816                 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
3817                 break;
3818         }
3819
3820         case TASK_ABSOLUTETIME_INFO:
3821         {
3822                 task_absolutetime_info_t        info;
3823                 thread_t                        thread;
3824
3825                 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
3826                         error = KERN_INVALID_ARGUMENT;
3827                         break;
3828                 }
3829
3830                 info = (task_absolutetime_info_t)task_info_out;
3831                 info->threads_user = info->threads_system = 0;
3832
3833
3834                 info->total_user = task->total_user_time;
3835                 info->total_system = task->total_system_time;
3836
3837                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3838                         uint64_t        tval;
3839                         spl_t           x;
3840
3841                         if (thread->options & TH_OPT_IDLE_THREAD)
3842                                 continue;
3843
3844                         x = splsched();
3845                         thread_lock(thread);
3846
3847                         tval = timer_grab(&thread->user_timer);
3848                         info->threads_user += tval;
3849                         info->total_user += tval;
3850
3851                         tval = timer_grab(&thread->system_timer);
3852                         if (thread->precise_user_kernel_time) {
3853                                 info->threads_system += tval;
3854                                 info->total_system += tval;
3855                         } else {
3856                                 /* system_timer may represent either sys or user */
3857                                 info->threads_user += tval;
3858                                 info->total_user += tval;
3859                         }
3860
3861                         thread_unlock(thread);
3862                         splx(x);
3863                 }
3864
3865
3866                 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
3867                 break;
3868         }
3869
3870         case TASK_DYLD_INFO:
3871         {
3872                 task_dyld_info_t info;
3873
3874                 /*
3875                  * We added the format field to TASK_DYLD_INFO output.  For
3876                  * temporary backward compatibility, accept the fact that
3877                  * clients may ask for the old version - distinquished by the
3878                  * size of the expected result structure.
3879                  */
3880 #define TASK_LEGACY_DYLD_INFO_COUNT \
3881                 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
3882
3883                 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
3884                         error = KERN_INVALID_ARGUMENT;
3885                         break;
3886                 }
3887
3888                 info = (task_dyld_info_t)task_info_out;
3889                 info->all_image_info_addr = task->all_image_info_addr;
3890                 info->all_image_info_size = task->all_image_info_size;
3891
3892                 /* only set format on output for those expecting it */
3893                 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
3894                         info->all_image_info_format = task_has_64BitAddr(task) ?
3895                                                  TASK_DYLD_ALL_IMAGE_INFO_64 :
3896                                                  TASK_DYLD_ALL_IMAGE_INFO_32 ;
3897                         *task_info_count = TASK_DYLD_INFO_COUNT;
3898                 } else {
3899                         *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
3900                 }
3901                 break;
3902         }
3903
3904         case TASK_EXTMOD_INFO:
3905         {
3906                 task_extmod_info_t info;
3907                 void *p;
3908
3909                 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
3910                         error = KERN_INVALID_ARGUMENT;
3911                         break;
3912                 }
3913
3914                 info = (task_extmod_info_t)task_info_out;
3915
3916                 p = get_bsdtask_info(task);
3917                 if (p) {
3918                         proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
3919                 } else {
3920                         bzero(info->task_uuid, sizeof(info->task_uuid));
3921                 }
3922                 info->extmod_statistics = task->extmod_statistics;
3923                 *task_info_count = TASK_EXTMOD_INFO_COUNT;
3924
3925                 break;
3926         }
3927
3928         case TASK_KERNELMEMORY_INFO:
3929         {
3930                 task_kernelmemory_info_t        tkm_info;
3931                 ledger_amount_t                 credit, debit;
3932
3933                 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
3934                    error = KERN_INVALID_ARGUMENT;
3935                    break;
3936                 }
3937
3938                 tkm_info = (task_kernelmemory_info_t) task_info_out;
3939                 tkm_info->total_palloc = 0;
3940                 tkm_info->total_pfree = 0;
3941                 tkm_info->total_salloc = 0;
3942                 tkm_info->total_sfree = 0;
3943
3944                 if (task == kernel_task) {
3945                         /*
3946                          * All shared allocs/frees from other tasks count against
3947                          * the kernel private memory usage.  If we are looking up
3948                          * info for the kernel task, gather from everywhere.
3949                          */
3950                         task_unlock(task);
3951
3952                         /* start by accounting for all the terminated tasks against the kernel */
3953                         tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
3954                         tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
3955
3956                         /* count all other task/thread shared alloc/free against the kernel */
3957                         lck_mtx_lock(&tasks_threads_lock);
3958
3959                         /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
3960                         queue_iterate(&tasks, task, task_t, tasks) {
3961                                 if (task == kernel_task) {
3962                                         if (ledger_get_entries(task->ledger,
3963                                             task_ledgers.tkm_private, &credit,
3964                                             &debit) == KERN_SUCCESS) {
3965                                                 tkm_info->total_palloc += credit;
3966                                                 tkm_info->total_pfree += debit;
3967                                         }
3968                                 }
3969                                 if (!ledger_get_entries(task->ledger,
3970                                     task_ledgers.tkm_shared, &credit, &debit)) {
3971                                         tkm_info->total_palloc += credit;
3972                                         tkm_info->total_pfree += debit;
3973                                 }
3974                         }
3975                         lck_mtx_unlock(&tasks_threads_lock);
3976                 } else {
3977                         if (!ledger_get_entries(task->ledger,
3978                             task_ledgers.tkm_private, &credit, &debit)) {
3979                                 tkm_info->total_palloc = credit;
3980                                 tkm_info->total_pfree = debit;
3981                         }
3982                         if (!ledger_get_entries(task->ledger,
3983                             task_ledgers.tkm_shared, &credit, &debit)) {
3984                                 tkm_info->total_salloc = credit;
3985                                 tkm_info->total_sfree = debit;
3986                         }
3987                         task_unlock(task);
3988                 }
3989
3990                 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
3991                 return KERN_SUCCESS;
3992         }
3993
3994         /* OBSOLETE */
3995         case TASK_SCHED_FIFO_INFO:
3996         {
3997
3998                 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
3999                         error = KERN_INVALID_ARGUMENT;
4000                         break;
4001                 }
4002
4003                 error = KERN_INVALID_POLICY;
4004                 break;
4005         }
4006
4007         /* OBSOLETE */
4008         case TASK_SCHED_RR_INFO:
4009         {
4010                 policy_rr_base_t        rr_base;
4011                 uint32_t quantum_time;
4012                 uint64_t quantum_ns;
4013
4014                 if (*task_info_count < POLICY_RR_BASE_COUNT) {
4015                         error = KERN_INVALID_ARGUMENT;
4016                         break;
4017                 }
4018
4019                 rr_base = (policy_rr_base_t) task_info_out;
4020
4021                 if (task != kernel_task) {
4022                         error = KERN_INVALID_POLICY;
4023                         break;
4024                 }
4025
4026                 rr_base->base_priority = task->priority;
4027
4028                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
4029                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
4030
4031                 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
4032
4033                 *task_info_count = POLICY_RR_BASE_COUNT;
4034                 break;
4035         }
4036
4037         /* OBSOLETE */
4038         case TASK_SCHED_TIMESHARE_INFO:
4039         {
4040                 policy_timeshare_base_t ts_base;
4041
4042                 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
4043                         error = KERN_INVALID_ARGUMENT;
4044                         break;
4045                 }
4046
4047                 ts_base = (policy_timeshare_base_t) task_info_out;
4048
4049                 if (task == kernel_task) {
4050                         error = KERN_INVALID_POLICY;
4051                         break;
4052                 }
4053
4054                 ts_base->base_priority = task->priority;
4055
4056                 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
4057                 break;
4058         }
4059
4060         case TASK_SECURITY_TOKEN:
4061         {
4062                 security_token_t        *sec_token_p;
4063
4064                 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
4065                     error = KERN_INVALID_ARGUMENT;
4066                     break;
4067                 }
4068
4069                 sec_token_p = (security_token_t *) task_info_out;
4070
4071                 *sec_token_p = task->sec_token;
4072
4073                 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
4074                 break;
4075         }
4076
4077         case TASK_AUDIT_TOKEN:
4078         {
4079                 audit_token_t   *audit_token_p;
4080
4081                 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
4082                     error = KERN_INVALID_ARGUMENT;
4083                     break;
4084                 }
4085
4086                 audit_token_p = (audit_token_t *) task_info_out;
4087
4088                 *audit_token_p = task->audit_token;
4089
4090                 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
4091                 break;
4092         }
4093
4094         case TASK_SCHED_INFO:
4095                 error = KERN_INVALID_ARGUMENT;
4096                 break;
4097
4098         case TASK_EVENTS_INFO:
4099         {
4100                 task_events_info_t      events_info;
4101                 thread_t                        thread;
4102
4103                 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
4104                    error = KERN_INVALID_ARGUMENT;
4105                    break;
4106                 }
4107
4108                 events_info = (task_events_info_t) task_info_out;
4109
4110
4111                 events_info->faults = task->faults;
4112                 events_info->pageins = task->pageins;
4113                 events_info->cow_faults = task->cow_faults;
4114                 events_info->messages_sent = task->messages_sent;
4115                 events_info->messages_received = task->messages_received;
4116                 events_info->syscalls_mach = task->syscalls_mach;
4117                 events_info->syscalls_unix = task->syscalls_unix;
4118
4119                 events_info->csw = task->c_switch;
4120
4121                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4122                         events_info->csw           += thread->c_switch;
4123                         events_info->syscalls_mach += thread->syscalls_mach;
4124                         events_info->syscalls_unix += thread->syscalls_unix;
4125                 }
4126
4127
4128                 *task_info_count = TASK_EVENTS_INFO_COUNT;
4129                 break;
4130         }
4131         case TASK_AFFINITY_TAG_INFO:
4132         {
4133                 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
4134                     error = KERN_INVALID_ARGUMENT;
4135                     break;
4136                 }
4137
4138                 error = task_affinity_info(task, task_info_out, task_info_count);
4139                 break;
4140         }
4141         case TASK_POWER_INFO:
4142         {
4143                 if (*task_info_count < TASK_POWER_INFO_COUNT) {
4144                         error = KERN_INVALID_ARGUMENT;
4145                         break;
4146                 }
4147
4148                 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL);
4149                 break;
4150         }
4151
4152         case TASK_POWER_INFO_V2:
4153         {
4154                 if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
4155                         error = KERN_INVALID_ARGUMENT;
4156                         break;
4157                 }
4158                 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
4159                 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2);
4160                 break;
4161         }
4162
4163         case TASK_VM_INFO:
4164         case TASK_VM_INFO_PURGEABLE:
4165         {
4166                 task_vm_info_t          vm_info;
4167                 vm_map_t                map;
4168
4169                 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
4170                     error = KERN_INVALID_ARGUMENT;
4171                     break;
4172                 }
4173
4174                 vm_info = (task_vm_info_t)task_info_out;
4175
4176                 if (task == kernel_task) {
4177                         map = kernel_map;
4178                         /* no lock */
4179                 } else {
4180                         map = task->map;
4181                         vm_map_lock_read(map);
4182                 }
4183
4184                 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
4185                 vm_info->region_count = map->hdr.nentries;
4186                 vm_info->page_size = vm_map_page_size(map);
4187
4188                 vm_info->resident_size = pmap_resident_count(map->pmap);
4189                 vm_info->resident_size *= PAGE_SIZE;
4190                 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
4191                 vm_info->resident_size_peak *= PAGE_SIZE;
4192
4193 #define _VM_INFO(_name) \
4194         vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
4195
4196                 _VM_INFO(device);
4197                 _VM_INFO(device_peak);
4198                 _VM_INFO(external);
4199                 _VM_INFO(external_peak);
4200                 _VM_INFO(internal);
4201                 _VM_INFO(internal_peak);
4202                 _VM_INFO(reusable);
4203                 _VM_INFO(reusable_peak);
4204                 _VM_INFO(compressed);
4205                 _VM_INFO(compressed_peak);
4206                 _VM_INFO(compressed_lifetime);
4207
4208                 vm_info->purgeable_volatile_pmap = 0;
4209                 vm_info->purgeable_volatile_resident = 0;
4210                 vm_info->purgeable_volatile_virtual = 0;
4211                 if (task == kernel_task) {
4212                         /*
4213                          * We do not maintain the detailed stats for the
4214                          * kernel_pmap, so just count everything as
4215                          * "internal"...
4216                          */
4217                         vm_info->internal = vm_info->resident_size;
4218                         /*
4219                          * ... but since the memory held by the VM compressor
4220                          * in the kernel address space ought to be attributed
4221                          * to user-space tasks, we subtract it from "internal"
4222                          * to give memory reporting tools a more accurate idea
4223                          * of what the kernel itself is actually using, instead
4224                          * of making it look like the kernel is leaking memory
4225                          * when the system is under memory pressure.
4226                          */
4227                         vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
4228                                               PAGE_SIZE);
4229                 } else {
4230                         mach_vm_size_t  volatile_virtual_size;
4231                         mach_vm_size_t  volatile_resident_size;
4232                         mach_vm_size_t  volatile_compressed_size;
4233                         mach_vm_size_t  volatile_pmap_size;
4234                         mach_vm_size_t  volatile_compressed_pmap_size;
4235                         kern_return_t   kr;
4236
4237                         if (flavor == TASK_VM_INFO_PURGEABLE) {
4238                                 kr = vm_map_query_volatile(
4239                                         map,
4240                                         &volatile_virtual_size,
4241                                         &volatile_resident_size,
4242                                         &volatile_compressed_size,
4243                                         &volatile_pmap_size,
4244                                         &volatile_compressed_pmap_size);
4245                                 if (kr == KERN_SUCCESS) {
4246                                         vm_info->purgeable_volatile_pmap =
4247                                                 volatile_pmap_size;
4248                                         if (radar_20146450) {
4249                                         vm_info->compressed -=
4250                                                 volatile_compressed_pmap_size;
4251                                         }
4252                                         vm_info->purgeable_volatile_resident =
4253                                                 volatile_resident_size;
4254                                         vm_info->purgeable_volatile_virtual =
4255                                                 volatile_virtual_size;
4256                                 }
4257                         }
4258                 }
4259                 *task_info_count = TASK_VM_INFO_REV0_COUNT;
4260
4261                 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
4262                         vm_info->phys_footprint =
4263                                 (mach_vm_size_t) get_task_phys_footprint(task);
4264                         *task_info_count = TASK_VM_INFO_REV1_COUNT;
4265                 }
4266                 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
4267                         vm_info->min_address = map->min_offset;
4268                         vm_info->max_address = map->max_offset;
4269                         *task_info_count = TASK_VM_INFO_REV2_COUNT;
4270                 }
4271
4272                 if (task != kernel_task) {
4273                         vm_map_unlock_read(map);
4274                 }
4275
4276                 break;
4277         }
4278
4279         case TASK_WAIT_STATE_INFO:
4280         {
4281                 /*
4282                  * Deprecated flavor. Currently allowing some results until all users
4283                  * stop calling it. The results may not be accurate.
4284          */
4285                 task_wait_state_info_t  wait_state_info;
4286                 uint64_t total_sfi_ledger_val = 0;
4287
4288                 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
4289                    error = KERN_INVALID_ARGUMENT;
4290                    break;
4291                 }
4292
4293                 wait_state_info = (task_wait_state_info_t) task_info_out;
4294
4295                 wait_state_info->total_wait_state_time = 0;
4296                 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
4297
4298 #if CONFIG_SCHED_SFI
4299                 int i, prev_lentry = -1;
4300                 int64_t  val_credit, val_debit;
4301
4302                 for (i = 0; i < MAX_SFI_CLASS_ID; i++){
4303                         val_credit =0;
4304                         /*
4305                          * checking with prev_lentry != entry ensures adjacent classes
4306                          * which share the same ledger do not add wait times twice.
4307                          * Note: Use ledger() call to get data for each individual sfi class.
4308                          */
4309                         if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
4310                                 KERN_SUCCESS == ledger_get_entries(task->ledger,
4311                                                 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
4312                                 total_sfi_ledger_val += val_credit;
4313                         }
4314                         prev_lentry = task_ledgers.sfi_wait_times[i];
4315                 }
4316
4317 #endif /* CONFIG_SCHED_SFI */
4318                 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
4319                 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
4320
4321                 break;
4322         }
4323         case TASK_VM_INFO_PURGEABLE_ACCOUNT:
4324         {
4325 #if DEVELOPMENT || DEBUG
4326                 pvm_account_info_t      acnt_info;
4327
4328                 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
4329                         error = KERN_INVALID_ARGUMENT;
4330                         break;
4331                 }
4332
4333                 if (task_info_out == NULL) {
4334                         error = KERN_INVALID_ARGUMENT;
4335                         break;
4336                 }
4337
4338                 acnt_info = (pvm_account_info_t) task_info_out;
4339
4340                 error = vm_purgeable_account(task, acnt_info);
4341
4342                 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
4343
4344                 break;
4345 #else /* DEVELOPMENT || DEBUG */
4346                 error = KERN_NOT_SUPPORTED;
4347                 break;
4348 #endif /* DEVELOPMENT || DEBUG */
4349         }
4350         case TASK_FLAGS_INFO:
4351         {
4352                 task_flags_info_t               flags_info;
4353
4354                 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
4355                     error = KERN_INVALID_ARGUMENT;
4356                     break;
4357                 }
4358
4359                 flags_info = (task_flags_info_t)task_info_out;
4360
4361                 /* only publish the 64-bit flag of the task */
4362                 flags_info->flags = task->t_flags & TF_64B_ADDR;
4363
4364                 *task_info_count = TASK_FLAGS_INFO_COUNT;
4365                 break;
4366         }
4367
4368         case TASK_DEBUG_INFO_INTERNAL:
4369         {
4370 #if DEVELOPMENT || DEBUG
4371                 task_debug_info_internal_t dbg_info;
4372                 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
4373                         error = KERN_NOT_SUPPORTED;
4374                         break;
4375                 }
4376
4377                 if (task_info_out == NULL) {
4378                         error = KERN_INVALID_ARGUMENT;
4379                         break;
4380                 }
4381                 dbg_info = (task_debug_info_internal_t) task_info_out;
4382                 dbg_info->ipc_space_size = 0;
4383                 if (task->itk_space){
4384                         dbg_info->ipc_space_size = task->itk_space->is_table_size;
4385                 }
4386
4387                 dbg_info->suspend_count = task->suspend_count;
4388
4389                 error = KERN_SUCCESS;
4390                 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
4391                 break;
4392 #else /* DEVELOPMENT || DEBUG */
4393                 error = KERN_NOT_SUPPORTED;
4394                 break;
4395 #endif /* DEVELOPMENT || DEBUG */
4396         }
4397         default:
4398                 error = KERN_INVALID_ARGUMENT;
4399         }
4400
4401         task_unlock(task);
4402         return (error);
4403 }
4404
4405 /*
4406  * task_info_from_user
4407  *
4408  * When calling task_info from user space,
4409  * this function will be executed as mig server side
4410  * instead of calling directly into task_info.
4411  * This gives the possibility to perform more security
4412  * checks on task_port.
4413  *
4414  * In the case of TASK_DYLD_INFO, we require the more
4415  * privileged task_port not the less-privileged task_name_port.
4416  *
4417  */
4418 kern_return_t
4419 task_info_from_user(
4420         mach_port_t             task_port,
4421         task_flavor_t           flavor,
4422         task_info_t             task_info_out,
4423         mach_msg_type_number_t  *task_info_count)
4424 {
4425         task_t task;
4426         kern_return_t ret;
4427
4428         if (flavor == TASK_DYLD_INFO)
4429                 task = convert_port_to_task(task_port);
4430         else
4431                 task = convert_port_to_task_name(task_port);
4432
4433         ret = task_info(task, flavor, task_info_out, task_info_count);
4434
4435         task_deallocate(task);
4436
4437         return ret;
4438 }
4439
4440 /*
4441  *      task_power_info
4442  *
4443  *      Returns power stats for the task.
4444  *      Note: Called with task locked.
4445  */
4446 void
4447 task_power_info_locked(
4448         task_t                  task,
4449         task_power_info_t       info,
4450         gpu_energy_data_t       ginfo,
4451         task_power_info_v2_t    infov2)
4452 {
4453         thread_t                thread;
4454         ledger_amount_t         tmp;
4455
4456         task_lock_assert_owned(task);
4457
4458         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
4459                 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
4460         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
4461                 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
4462
4463         info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
4464         info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
4465
4466         info->total_user = task->total_user_time;
4467         info->total_system = task->total_system_time;
4468
4469 #if CONFIG_EMBEDDED
4470         if (infov2) {
4471                 infov2->task_energy = task->task_energy;
4472         }
4473 #endif
4474
4475         if (ginfo) {
4476                 ginfo->task_gpu_utilisation = task->task_gpu_ns;
4477         }
4478
4479         if (infov2) {
4480                 infov2->task_ptime = task->total_ptime;
4481                 infov2->task_pset_switches = task->ps_switch;
4482         }
4483
4484         queue_iterate(&task->threads, thread, thread_t, task_threads) {
4485                 uint64_t        tval;
4486                 spl_t           x;
4487
4488                 if (thread->options & TH_OPT_IDLE_THREAD)
4489                         continue;
4490
4491                 x = splsched();
4492                 thread_lock(thread);
4493
4494                 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
4495                 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
4496
4497 #if CONFIG_EMBEDDED
4498                 if (infov2) {
4499                         infov2->task_energy += ml_energy_stat(thread);
4500                 }
4501 #endif
4502
4503                 tval = timer_grab(&thread->user_timer);
4504                 info->total_user += tval;
4505
4506                 if (infov2) {
4507                         tval = timer_grab(&thread->ptime);
4508                         infov2->task_ptime += tval;
4509                         infov2->task_pset_switches += thread->ps_switch;
4510                 }
4511
4512                 tval = timer_grab(&thread->system_timer);
4513                 if (thread->precise_user_kernel_time) {
4514                         info->total_system += tval;
4515                 } else {
4516                         /* system_timer may represent either sys or user */
4517                         info->total_user += tval;
4518                 }
4519
4520                 if (ginfo) {
4521                         ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
4522                 }
4523                 thread_unlock(thread);
4524                 splx(x);
4525         }
4526 }
4527
4528 /*
4529  *      task_gpu_utilisation
4530  *
4531  *      Returns the total gpu time used by the all the threads of the task
4532  *  (both dead and alive)
4533  */
4534 uint64_t
4535 task_gpu_utilisation(
4536         task_t  task)
4537 {
4538         uint64_t gpu_time = 0;
4539 #if !CONFIG_EMBEDDED
4540         thread_t thread;
4541
4542         task_lock(task);
4543         gpu_time += task->task_gpu_ns;
4544
4545         queue_iterate(&task->threads, thread, thread_t, task_threads) {
4546                 spl_t x;
4547                 x = splsched();
4548                 thread_lock(thread);
4549                 gpu_time += ml_gpu_stat(thread);
4550                 thread_unlock(thread);
4551                 splx(x);
4552         }
4553
4554         task_unlock(task);
4555 #else /* CONFIG_EMBEDDED */
4556         /* silence compiler warning */
4557         (void)task;
4558 #endif /* !CONFIG_EMBEDDED */
4559         return gpu_time;
4560 }
4561
4562 /*
4563  *      task_energy
4564  *
4565  *      Returns the total energy used by the all the threads of the task
4566  *  (both dead and alive)
4567  */
4568 uint64_t
4569 task_energy(
4570         task_t  task)
4571 {
4572         uint64_t energy = 0;
4573         thread_t thread;
4574
4575         task_lock(task);
4576         energy += task->task_energy;
4577
4578         queue_iterate(&task->threads, thread, thread_t, task_threads) {
4579                 spl_t x;
4580                 x = splsched();
4581                 thread_lock(thread);
4582                 energy += ml_energy_stat(thread);
4583                 thread_unlock(thread);
4584                 splx(x);
4585         }
4586
4587         task_unlock(task);
4588         return energy;
4589 }
4590
4591
4592 uint64_t
4593 task_cpu_ptime(
4594         __unused task_t  task)
4595 {
4596     return 0;
4597 }
4598
4599
4600 /* This function updates the cpu time in the arrays for each
4601  * effective and requested QoS class
4602  */
4603 void
4604 task_update_cpu_time_qos_stats(
4605         task_t  task,
4606         uint64_t *eqos_stats,
4607         uint64_t *rqos_stats)
4608 {
4609         if (!eqos_stats && !rqos_stats) {
4610                 return;
4611         }
4612
4613         task_lock(task);
4614         thread_t thread;
4615         queue_iterate(&task->threads, thread, thread_t, task_threads) {
4616                 if (thread->options & TH_OPT_IDLE_THREAD) {
4617                         continue;
4618                 }
4619
4620                 thread_update_qos_cpu_time(thread);
4621         }
4622
4623         if (eqos_stats) {
4624                 eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
4625                 eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
4626                 eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
4627                 eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
4628                 eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
4629                 eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
4630                 eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
4631         }
4632
4633         if (rqos_stats) {
4634                 rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
4635                 rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
4636                 rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
4637                 rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
4638                 rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
4639                 rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
4640                 rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
4641         }
4642
4643         task_unlock(task);
4644 }
4645
4646 kern_return_t
4647 task_purgable_info(
4648         task_t                  task,
4649         task_purgable_info_t    *stats)
4650 {
4651         if (task == TASK_NULL || stats == NULL)
4652                 return KERN_INVALID_ARGUMENT;
4653         /* Take task reference */
4654         task_reference(task);
4655         vm_purgeable_stats((vm_purgeable_info_t)stats, task);
4656         /* Drop task reference */
4657         task_deallocate(task);
4658         return KERN_SUCCESS;
4659 }
4660
4661 void
4662 task_vtimer_set(
4663         task_t          task,
4664         integer_t       which)
4665 {
4666         thread_t        thread;
4667         spl_t           x;
4668
4669         task_lock(task);
4670
4671         task->vtimers |= which;
4672
4673         switch (which) {
4674
4675         case TASK_VTIMER_USER:
4676                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4677                         x = splsched();
4678                         thread_lock(thread);
4679                         if (thread->precise_user_kernel_time)
4680                                 thread->vtimer_user_save = timer_grab(&thread->user_timer);
4681                         else
4682                                 thread->vtimer_user_save = timer_grab(&thread->system_timer);
4683                         thread_unlock(thread);
4684                         splx(x);
4685                 }
4686                 break;
4687
4688         case TASK_VTIMER_PROF:
4689                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4690                         x = splsched();
4691                         thread_lock(thread);
4692                         thread->vtimer_prof_save = timer_grab(&thread->user_timer);
4693                         thread->vtimer_prof_save += timer_grab(&thread->system_timer);
4694                         thread_unlock(thread);
4695                         splx(x);
4696                 }
4697                 break;
4698
4699         case TASK_VTIMER_RLIM:
4700                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4701                         x = splsched();
4702                         thread_lock(thread);
4703                         thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
4704                         thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
4705                         thread_unlock(thread);
4706                         splx(x);
4707                 }
4708                 break;
4709         }
4710
4711         task_unlock(task);
4712 }
4713
4714 void
4715 task_vtimer_clear(
4716         task_t          task,
4717         integer_t       which)
4718 {
4719         assert(task == current_task());
4720
4721         task_lock(task);
4722
4723         task->vtimers &= ~which;
4724
4725         task_unlock(task);
4726 }
4727
4728 void
4729 task_vtimer_update(
4730 __unused
4731         task_t          task,
4732         integer_t       which,
4733         uint32_t        *microsecs)
4734 {
4735         thread_t        thread = current_thread();
4736         uint32_t        tdelt = 0;
4737         clock_sec_t     secs = 0;
4738         uint64_t        tsum;
4739
4740         assert(task == current_task());
4741
4742         spl_t s = splsched();
4743         thread_lock(thread);
4744
4745         if ((task->vtimers & which) != (uint32_t)which) {
4746                 thread_unlock(thread);
4747                 splx(s);
4748                 return;
4749         }
4750
4751         switch (which) {
4752
4753         case TASK_VTIMER_USER:
4754                 if (thread->precise_user_kernel_time) {
4755                         tdelt = (uint32_t)timer_delta(&thread->user_timer,
4756                                                                 &thread->vtimer_user_save);
4757                 } else {
4758                         tdelt = (uint32_t)timer_delta(&thread->system_timer,
4759                                                                 &thread->vtimer_user_save);
4760                 }
4761                 absolutetime_to_microtime(tdelt, &secs, microsecs);
4762                 break;
4763
4764         case TASK_VTIMER_PROF:
4765                 tsum = timer_grab(&thread->user_timer);
4766                 tsum += timer_grab(&thread->system_timer);
4767                 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
4768                 absolutetime_to_microtime(tdelt, &secs, microsecs);
4769                 /* if the time delta is smaller than a usec, ignore */
4770                 if (*microsecs != 0)
4771                         thread->vtimer_prof_save = tsum;
4772                 break;
4773
4774         case TASK_VTIMER_RLIM:
4775                 tsum = timer_grab(&thread->user_timer);
4776                 tsum += timer_grab(&thread->system_timer);
4777                 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
4778                 thread->vtimer_rlim_save = tsum;
4779                 absolutetime_to_microtime(tdelt, &secs, microsecs);
4780                 break;
4781         }
4782
4783         thread_unlock(thread);
4784         splx(s);
4785 }
4786
4787 /*
4788  *      task_assign:
4789  *
4790  *      Change the assigned processor set for the task
4791  */
4792 kern_return_t
4793 task_assign(
4794         __unused task_t         task,
4795         __unused processor_set_t        new_pset,
4796         __unused boolean_t      assign_threads)
4797 {
4798         return(KERN_FAILURE);
4799 }
4800
4801 /*
4802  *      task_assign_default:
4803  *
4804  *      Version of task_assign to assign to default processor set.
4805  */
4806 kern_return_t
4807 task_assign_default(
4808         task_t          task,
4809         boolean_t       assign_threads)
4810 {
4811     return (task_assign(task, &pset0, assign_threads));
4812 }
4813
4814 /*
4815  *      task_get_assignment
4816  *
4817  *      Return name of processor set that task is assigned to.
4818  */
4819 kern_return_t
4820 task_get_assignment(
4821         task_t          task,
4822         processor_set_t *pset)
4823 {
4824         if (!task || !task->active)
4825                 return KERN_FAILURE;
4826
4827         *pset = &pset0;
4828
4829         return KERN_SUCCESS;
4830 }
4831
4832 uint64_t
4833 get_task_dispatchqueue_offset(
4834                 task_t          task)
4835 {
4836         return task->dispatchqueue_offset;
4837 }
4838
4839 /*
4840  *      task_policy
4841  *
4842  *      Set scheduling policy and parameters, both base and limit, for
4843  *      the given task. Policy must be a policy which is enabled for the
4844  *      processor set. Change contained threads if requested.
4845  */
4846 kern_return_t
4847 task_policy(
4848         __unused task_t                 task,
4849         __unused policy_t                       policy_id,
4850         __unused policy_base_t          base,
4851         __unused mach_msg_type_number_t count,
4852         __unused boolean_t                      set_limit,
4853         __unused boolean_t                      change)
4854 {
4855         return(KERN_FAILURE);
4856 }
4857
4858 /*
4859  *      task_set_policy
4860  *
4861  *      Set scheduling policy and parameters, both base and limit, for
4862  *      the given task. Policy can be any policy implemented by the
4863  *      processor set, whether enabled or not. Change contained threads
4864  *      if requested.
4865  */
4866 kern_return_t
4867 task_set_policy(
4868         __unused task_t                 task,
4869         __unused processor_set_t                pset,
4870         __unused policy_t                       policy_id,
4871         __unused policy_base_t          base,
4872         __unused mach_msg_type_number_t base_count,
4873         __unused policy_limit_t         limit,
4874         __unused mach_msg_type_number_t limit_count,
4875         __unused boolean_t                      change)
4876 {
4877         return(KERN_FAILURE);
4878 }
4879
4880 kern_return_t
4881 task_set_ras_pc(
4882         __unused task_t task,
4883         __unused vm_offset_t    pc,
4884         __unused vm_offset_t    endpc)
4885 {
4886         return KERN_FAILURE;
4887 }
4888
4889 void
4890 task_synchronizer_destroy_all(task_t task)
4891 {
4892         /*
4893          *  Destroy owned semaphores
4894          */
4895         semaphore_destroy_all(task);
4896 }
4897
4898 /*
4899  * Install default (machine-dependent) initial thread state
4900  * on the task.  Subsequent thread creation will have this initial
4901  * state set on the thread by machine_thread_inherit_taskwide().
4902  * Flavors and structures are exactly the same as those to thread_set_state()
4903  */
4904 kern_return_t
4905 task_set_state(
4906         task_t task,
4907         int flavor,
4908         thread_state_t state,
4909         mach_msg_type_number_t state_count)
4910 {
4911         kern_return_t ret;
4912
4913         if (task == TASK_NULL) {
4914                 return (KERN_INVALID_ARGUMENT);
4915         }
4916
4917         task_lock(task);
4918
4919         if (!task->active) {
4920                 task_unlock(task);
4921                 return (KERN_FAILURE);
4922         }
4923
4924         ret = machine_task_set_state(task, flavor, state, state_count);
4925
4926         task_unlock(task);
4927         return ret;
4928 }
4929
4930 /*
4931  * Examine the default (machine-dependent) initial thread state
4932  * on the task, as set by task_set_state().  Flavors and structures
4933  * are exactly the same as those passed to thread_get_state().
4934  */
4935 kern_return_t
4936 task_get_state(
4937         task_t  task,
4938         int     flavor,
4939         thread_state_t state,
4940         mach_msg_type_number_t *state_count)
4941 {
4942         kern_return_t ret;
4943
4944         if (task == TASK_NULL) {
4945                 return (KERN_INVALID_ARGUMENT);
4946         }
4947
4948         task_lock(task);
4949
4950         if (!task->active) {
4951                 task_unlock(task);
4952                 return (KERN_FAILURE);
4953         }
4954
4955         ret = machine_task_get_state(task, flavor, state, state_count);
4956
4957         task_unlock(task);
4958         return ret;
4959 }
4960
4961
4962 static kern_return_t __attribute__((noinline,not_tail_called))
4963 PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(
4964         mach_exception_code_t code,
4965         mach_exception_subcode_t subcode,
4966         void *reason)
4967 {
4968 #ifdef MACH_BSD
4969         if (1 == proc_selfpid())
4970                 return KERN_NOT_SUPPORTED;              // initproc is immune
4971 #endif
4972         mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
4973                 [0] = code,
4974                 [1] = subcode,
4975         };
4976         task_t task = current_task();
4977         kern_return_t kr;
4978
4979         /* (See jetsam-related comments below) */
4980
4981         proc_memstat_terminated(task->bsd_info, TRUE);
4982         kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason);
4983         proc_memstat_terminated(task->bsd_info, FALSE);
4984         return kr;
4985 }
4986
4987 extern kern_return_t
4988 task_violated_guard(mach_exception_code_t, mach_exception_subcode_t, void *);
4989
4990 kern_return_t
4991 task_violated_guard(
4992         mach_exception_code_t code,
4993         mach_exception_subcode_t subcode,
4994         void *reason)
4995 {
4996         return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code, subcode, reason);
4997 }
4998
4999
5000 #if CONFIG_MEMORYSTATUS
5001
5002 boolean_t
5003 task_get_memlimit_is_active(task_t task)
5004 {
5005         assert (task != NULL);
5006
5007         if (task->memlimit_is_active == 1) {
5008                 return(TRUE);
5009         } else {
5010                 return (FALSE);
5011         }
5012 }
5013
5014 void
5015 task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
5016 {
5017         assert (task != NULL);
5018
5019         if (memlimit_is_active) {
5020                 task->memlimit_is_active = 1;
5021         } else {
5022                 task->memlimit_is_active = 0;
5023         }
5024 }
5025
5026 boolean_t
5027 task_get_memlimit_is_fatal(task_t task)
5028 {
5029         assert(task != NULL);
5030
5031         if (task->memlimit_is_fatal == 1) {
5032                 return(TRUE);
5033         } else {
5034                 return(FALSE);
5035         }
5036 }
5037
5038 void
5039 task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
5040 {
5041         assert (task != NULL);
5042
5043         if (memlimit_is_fatal) {
5044                 task->memlimit_is_fatal = 1;
5045         } else {
5046                 task->memlimit_is_fatal = 0;
5047         }
5048 }
5049
5050 boolean_t
5051 task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
5052 {
5053         boolean_t triggered = FALSE;
5054
5055         assert(task == current_task());
5056
5057         /*
5058          * Returns true, if task has already triggered an exc_resource exception.
5059          */
5060
5061         if (memlimit_is_active) {
5062                 triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
5063         } else {
5064                 triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
5065         }
5066
5067         return(triggered);
5068 }
5069
5070 void
5071 task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
5072 {
5073         assert(task == current_task());
5074
5075         /*
5076          * We allow one exc_resource per process per active/inactive limit.
5077          * The limit's fatal attribute does not come into play.
5078          */
5079
5080         if (memlimit_is_active) {
5081                 task->memlimit_active_exc_resource = 1;
5082         } else {
5083                 task->memlimit_inactive_exc_resource = 1;
5084         }
5085 }
5086
5087 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
5088
5089 void __attribute__((noinline))
5090 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
5091 {
5092         task_t                                          task            = current_task();
5093         int                                                     pid         = 0;
5094         const char                                      *procname       = "unknown";
5095         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
5096
5097 #ifdef MACH_BSD
5098         pid = proc_selfpid();
5099
5100         if (pid == 1) {
5101                 /*
5102                  * Cannot have ReportCrash analyzing
5103                  * a suspended initproc.
5104                  */
5105                 return;
5106         }
5107
5108         if (task->bsd_info != NULL)
5109                 procname = proc_name_address(current_task()->bsd_info);
5110 #endif
5111 #if CONFIG_COREDUMP
5112         if (hwm_user_cores) {
5113                 int                             error;
5114                 uint64_t                starttime, end;
5115                 clock_sec_t             secs = 0;
5116                 uint32_t                microsecs = 0;
5117
5118                 starttime = mach_absolute_time();
5119                 /*
5120                  * Trigger a coredump of this process. Don't proceed unless we know we won't
5121                  * be filling up the disk; and ignore the core size resource limit for this
5122                  * core file.
5123                  */
5124                 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
5125                         printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
5126                 }
5127                 /*
5128                 * coredump() leaves the task suspended.
5129                 */
5130                 task_resume_internal(current_task());
5131
5132                 end = mach_absolute_time();
5133                 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
5134                 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
5135                        proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
5136         }
5137 #endif /* CONFIG_COREDUMP */
5138
5139         if (disable_exc_resource) {
5140                 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
5141                         "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
5142                 return;
5143         }
5144
5145         /*
5146          * A task that has triggered an EXC_RESOURCE, should not be
5147          * jetsammed when the device is under memory pressure.  Here
5148          * we set the P_MEMSTAT_TERMINATED flag so that the process
5149          * will be skipped if the memorystatus_thread wakes up.
5150          */
5151         proc_memstat_terminated(current_task()->bsd_info, TRUE);
5152
5153         code[0] = code[1] = 0;
5154         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
5155         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
5156         EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
5157
5158         /* Do not generate a corpse fork if the violation is a fatal one */
5159         if (is_fatal || exc_via_corpse_forking == 0) {
5160                 /* Do not send a EXC_RESOURCE is corpse_for_fatal_memkill is set */
5161                 if (corpse_for_fatal_memkill == 0) {
5162                         /*
5163                          * Use the _internal_ variant so that no user-space
5164                          * process can resume our task from under us.
5165                          */
5166                         task_suspend_internal(task);
5167                         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5168                         task_resume_internal(task);
5169                 }
5170         } else {
5171                 if (audio_active) {
5172                         printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
5173                         "supressed due to audio playback.\n", procname, pid, max_footprint_mb);
5174                 } else {
5175                         task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
5176                                 code, EXCEPTION_CODE_MAX, NULL);
5177                 }
5178         }
5179
5180         /*
5181          * After the EXC_RESOURCE has been handled, we must clear the
5182          * P_MEMSTAT_TERMINATED flag so that the process can again be
5183          * considered for jetsam if the memorystatus_thread wakes up.
5184          */
5185         proc_memstat_terminated(current_task()->bsd_info, FALSE);  /* clear the flag */
5186 }
5187
5188 /*
5189  * Callback invoked when a task exceeds its physical footprint limit.
5190  */
5191 void
5192 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
5193 {
5194         ledger_amount_t max_footprint, max_footprint_mb;
5195         task_t task;
5196         boolean_t is_warning;
5197         boolean_t memlimit_is_active;
5198         boolean_t memlimit_is_fatal;
5199
5200         if (warning == LEDGER_WARNING_DIPPED_BELOW) {
5201                 /*
5202                  * Task memory limits only provide a warning on the way up.
5203                  */
5204                 return;
5205         } else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
5206                 /*
5207                  * This task is in danger of violating a memory limit,
5208                  * It has exceeded a percentage level of the limit.
5209                  */
5210                 is_warning = TRUE;
5211         } else {
5212                 /*
5213                  * The task has exceeded the physical footprint limit.
5214                  * This is not a warning but a true limit violation.
5215                  */
5216                 is_warning = FALSE;
5217         }
5218
5219         task = current_task();
5220
5221         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
5222         max_footprint_mb = max_footprint >> 20;
5223
5224         memlimit_is_active = task_get_memlimit_is_active(task);
5225         memlimit_is_fatal = task_get_memlimit_is_fatal(task);
5226
5227         /*
5228          * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
5229          * We only generate the exception once per process per memlimit (active/inactive limit).
5230          * To enforce this, we monitor state based on the  memlimit's active/inactive attribute
5231          * and we disable it by marking that memlimit as exception triggered.
5232          */
5233         if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
5234                 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
5235                 memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
5236                 task_mark_has_triggered_exc_resource(task, memlimit_is_active);
5237         }
5238
5239         memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
5240 }
5241
5242 extern int proc_check_footprint_priv(void);
5243
5244 kern_return_t
5245 task_set_phys_footprint_limit(
5246         task_t task,
5247         int new_limit_mb,
5248         int *old_limit_mb)
5249 {
5250         kern_return_t error;
5251
5252         boolean_t memlimit_is_active;
5253         boolean_t memlimit_is_fatal;
5254
5255         if ((error = proc_check_footprint_priv())) {
5256                 return (KERN_NO_ACCESS);
5257         }
5258
5259         /*
5260          * This call should probably be obsoleted.
5261          * But for now, we default to current state.
5262          */
5263         memlimit_is_active = task_get_memlimit_is_active(task);
5264         memlimit_is_fatal = task_get_memlimit_is_fatal(task);
5265
5266         return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
5267 }
5268
5269 kern_return_t
5270 task_convert_phys_footprint_limit(
5271         int limit_mb,
5272         int *converted_limit_mb)
5273 {
5274         if (limit_mb == -1) {
5275                 /*
5276                  * No limit
5277                  */
5278                 if (max_task_footprint != 0) {
5279                         *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024);   /* bytes to MB */
5280                 } else {
5281                         *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
5282                 }
5283         } else {
5284                 /* nothing to convert */
5285                 *converted_limit_mb = limit_mb;
5286         }
5287         return (KERN_SUCCESS);
5288 }
5289
5290
5291 kern_return_t
5292 task_set_phys_footprint_limit_internal(
5293         task_t task,
5294         int new_limit_mb,
5295         int *old_limit_mb,
5296         boolean_t memlimit_is_active,
5297         boolean_t memlimit_is_fatal)
5298 {
5299         ledger_amount_t old;
5300
5301         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
5302
5303         /*
5304          * Check that limit >> 20 will not give an "unexpected" 32-bit
5305          * result. There are, however, implicit assumptions that -1 mb limit
5306          * equates to LEDGER_LIMIT_INFINITY.
5307          */
5308         assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
5309
5310         if (old_limit_mb) {
5311                 *old_limit_mb = (int)(old >> 20);
5312         }
5313
5314         if (new_limit_mb == -1) {
5315                 /*
5316                  * Caller wishes to remove the limit.
5317                  */
5318                 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
5319                                  max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
5320                                  max_task_footprint ? max_task_footprint_warning_level : 0);
5321
5322                 task_lock(task);
5323                 task_set_memlimit_is_active(task, memlimit_is_active);
5324                 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
5325                 task_unlock(task);
5326
5327                 return (KERN_SUCCESS);
5328         }
5329
5330 #ifdef CONFIG_NOMONITORS
5331         return (KERN_SUCCESS);
5332 #endif /* CONFIG_NOMONITORS */
5333
5334         task_lock(task);
5335
5336         if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
5337             (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
5338             (((ledger_amount_t)new_limit_mb << 20) == old)) {
5339                 /*
5340                  * memlimit state is not changing
5341                  */
5342                 task_unlock(task);
5343                 return(KERN_SUCCESS);
5344         }
5345
5346         task_set_memlimit_is_active(task, memlimit_is_active);
5347         task_set_memlimit_is_fatal(task, memlimit_is_fatal);
5348
5349         ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
5350                 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
5351
5352         if (task == current_task()) {
5353                 ledger_check_new_balance(current_thread(), task->ledger,
5354                                          task_ledgers.phys_footprint);
5355         }
5356
5357         task_unlock(task);
5358
5359         return (KERN_SUCCESS);
5360 }
5361
5362 kern_return_t
5363 task_get_phys_footprint_limit(
5364         task_t task,
5365         int *limit_mb)
5366 {
5367         ledger_amount_t limit;
5368
5369         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
5370         /*
5371          * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
5372          * result. There are, however, implicit assumptions that -1 mb limit
5373          * equates to LEDGER_LIMIT_INFINITY.
5374          */
5375         assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
5376         *limit_mb = (int)(limit >> 20);
5377
5378         return (KERN_SUCCESS);
5379 }
5380 #else /* CONFIG_MEMORYSTATUS */
5381 kern_return_t
5382 task_set_phys_footprint_limit(
5383         __unused task_t task,
5384         __unused int new_limit_mb,
5385         __unused int *old_limit_mb)
5386 {
5387         return (KERN_FAILURE);
5388 }
5389
5390 kern_return_t
5391 task_get_phys_footprint_limit(
5392         __unused task_t task,
5393         __unused int *limit_mb)
5394 {
5395         return (KERN_FAILURE);
5396 }
5397 #endif /* CONFIG_MEMORYSTATUS */
5398
5399 /*
5400  * We need to export some functions to other components that
5401  * are currently implemented in macros within the osfmk
5402  * component.  Just export them as functions of the same name.
5403  */
5404 boolean_t is_kerneltask(task_t t)
5405 {
5406         if (t == kernel_task)
5407                 return (TRUE);
5408
5409         return (FALSE);
5410 }
5411
5412 boolean_t is_corpsetask(task_t t)
5413 {
5414         return (task_is_a_corpse(t));
5415 }
5416
5417 #undef current_task
5418 task_t current_task(void);
5419 task_t current_task(void)
5420 {
5421         return (current_task_fast());
5422 }
5423
5424 #undef task_reference
5425 void task_reference(task_t task);
5426 void
5427 task_reference(
5428         task_t          task)
5429 {
5430         if (task != TASK_NULL)
5431                 task_reference_internal(task);
5432 }
5433
5434 /* defined in bsd/kern/kern_prot.c */
5435 extern int get_audit_token_pid(audit_token_t *audit_token);
5436
5437 int task_pid(task_t task)
5438 {
5439         if (task)
5440                 return get_audit_token_pid(&task->audit_token);
5441         return -1;
5442 }
5443
5444
5445 /*
5446  * This routine finds a thread in a task by its unique id
5447  * Returns a referenced thread or THREAD_NULL if the thread was not found
5448  *
5449  * TODO: This is super inefficient - it's an O(threads in task) list walk!
5450  *       We should make a tid hash, or transition all tid clients to thread ports
5451  *
5452  * Precondition: No locks held (will take task lock)
5453  */
5454 thread_t
5455 task_findtid(task_t task, uint64_t tid)
5456 {
5457         thread_t self           = current_thread();
5458         thread_t found_thread   = THREAD_NULL;
5459         thread_t iter_thread    = THREAD_NULL;
5460
5461         /* Short-circuit the lookup if we're looking up ourselves */
5462         if (tid == self->thread_id || tid == TID_NULL) {
5463                 assert(self->task == task);
5464
5465                 thread_reference(self);
5466
5467                 return self;
5468         }
5469
5470         task_lock(task);
5471
5472         queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
5473                 if (iter_thread->thread_id == tid) {
5474                         found_thread = iter_thread;
5475                         thread_reference(found_thread);
5476                         break;
5477                 }
5478         }
5479
5480         task_unlock(task);
5481
5482         return (found_thread);
5483 }
5484
5485 int pid_from_task(task_t task)
5486 {
5487         int pid = -1;
5488
5489         if (task->bsd_info) {
5490                 pid = proc_pid(task->bsd_info);
5491         } else {
5492                 pid = task_pid(task);
5493         }
5494
5495         return pid;
5496 }
5497
5498 /*
5499  * Control the CPU usage monitor for a task.
5500  */
5501 kern_return_t
5502 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
5503 {
5504         int error = KERN_SUCCESS;
5505
5506         if (*flags & CPUMON_MAKE_FATAL) {
5507                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
5508         } else {
5509                 error = KERN_INVALID_ARGUMENT;
5510         }
5511
5512         return error;
5513 }
5514
5515 /*
5516  * Control the wakeups monitor for a task.
5517  */
5518 kern_return_t
5519 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
5520 {
5521         ledger_t ledger = task->ledger;
5522
5523         task_lock(task);
5524         if (*flags & WAKEMON_GET_PARAMS) {
5525                 ledger_amount_t limit;
5526                 uint64_t                period;
5527
5528                 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
5529                 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
5530
5531                 if (limit != LEDGER_LIMIT_INFINITY) {
5532                         /*
5533                          * An active limit means the wakeups monitor is enabled.
5534                          */
5535                         *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
5536                         *flags = WAKEMON_ENABLE;
5537                         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
5538                                 *flags |= WAKEMON_MAKE_FATAL;
5539                         }
5540                 } else {
5541                         *flags = WAKEMON_DISABLE;
5542                         *rate_hz = -1;
5543                 }
5544
5545                 /*
5546                  * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
5547                  */
5548                 task_unlock(task);
5549                 return KERN_SUCCESS;
5550         }
5551
5552         if (*flags & WAKEMON_ENABLE) {
5553                 if (*flags & WAKEMON_SET_DEFAULTS) {
5554                         *rate_hz = task_wakeups_monitor_rate;
5555                 }
5556
5557 #ifndef CONFIG_NOMONITORS
5558                 if (*flags & WAKEMON_MAKE_FATAL) {
5559                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5560                 }
5561 #endif /* CONFIG_NOMONITORS */
5562
5563                 if (*rate_hz <= 0) {
5564                         task_unlock(task);
5565                         return KERN_INVALID_ARGUMENT;
5566                 }
5567
5568 #ifndef CONFIG_NOMONITORS
5569                 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
5570                         task_wakeups_monitor_ustackshots_trigger_pct);
5571                 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
5572                 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
5573 #endif /* CONFIG_NOMONITORS */
5574         } else if (*flags & WAKEMON_DISABLE) {
5575                 /*
5576                  * Caller wishes to disable wakeups monitor on the task.
5577                  *
5578                  * Disable telemetry if it was triggered by the wakeups monitor, and
5579                  * remove the limit & callback on the wakeups ledger entry.
5580                  */
5581 #if CONFIG_TELEMETRY
5582                 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
5583 #endif
5584                 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
5585                 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
5586         }
5587
5588         task_unlock(task);
5589         return KERN_SUCCESS;
5590 }
5591
5592 void
5593 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
5594 {
5595         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
5596 #if CONFIG_TELEMETRY
5597                 /*
5598                  * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
5599                  * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
5600                  */
5601                 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
5602 #endif
5603                 return;
5604         }
5605
5606 #if CONFIG_TELEMETRY
5607         /*
5608          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
5609          * exceeded the limit, turn telemetry off for the task.
5610          */
5611         telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
5612 #endif
5613
5614         if (warning == 0) {
5615                 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
5616         }
5617 }
5618
5619 void __attribute__((noinline))
5620 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
5621 {
5622         task_t                      task        = current_task();
5623         int                         pid         = 0;
5624         const char                  *procname   = "unknown";
5625         boolean_t                   fatal;
5626         kern_return_t               kr;
5627 #ifdef EXC_RESOURCE_MONITORS
5628         mach_exception_data_type_t  code[EXCEPTION_CODE_MAX];
5629 #endif /* EXC_RESOURCE_MONITORS */
5630         struct ledger_entry_info    lei;
5631
5632 #ifdef MACH_BSD
5633         pid = proc_selfpid();
5634         if (task->bsd_info != NULL)
5635                 procname = proc_name_address(current_task()->bsd_info);
5636 #endif
5637
5638         ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
5639
5640         /*
5641          * Disable the exception notification so we don't overwhelm
5642          * the listener with an endless stream of redundant exceptions.
5643          * TODO: detect whether another thread is already reporting the violation.
5644          */
5645         uint32_t flags = WAKEMON_DISABLE;
5646         task_wakeups_monitor_ctl(task, &flags, NULL);
5647
5648         fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5649         trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
5650         os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
5651                "over ~%llu seconds, averaging %llu wakes / second and "
5652                "violating a %slimit of %llu wakes over %llu seconds.\n",
5653                procname, pid,
5654                lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
5655                    lei.lei_last_refill == 0 ? 0 :
5656                                 (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
5657                fatal ? "FATAL " : "",
5658                    lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
5659
5660         kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
5661                                      fatal ? kRNFatalLimitFlag : 0);
5662         if (kr) {
5663                 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
5664         }
5665
5666 #ifdef EXC_RESOURCE_MONITORS
5667         if (disable_exc_resource) {
5668                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5669                         "supressed by a boot-arg\n", procname, pid);
5670                 return;
5671         }
5672         if (audio_active) {
5673                 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5674                        "supressed due to audio playback\n", procname, pid);
5675                 return;
5676         }
5677         if (lei.lei_last_refill == 0) {
5678                 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5679                        "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
5680         }
5681
5682         code[0] = code[1] = 0;
5683         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
5684         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
5685         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
5686                             NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
5687         EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
5688                             lei.lei_last_refill);
5689         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
5690                             NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
5691         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5692 #endif /* EXC_RESOURCE_MONITORS */
5693
5694         if (fatal) {
5695                 task_terminate_internal(task);
5696         }
5697 }
5698
5699 static boolean_t
5700 global_update_logical_writes(int64_t io_delta)
5701 {
5702         int64_t old_count, new_count;
5703         boolean_t needs_telemetry;
5704
5705         do {
5706                 new_count = old_count = global_logical_writes_count;
5707                 new_count += io_delta;
5708                 if (new_count >= io_telemetry_limit) {
5709                         new_count = 0;
5710                         needs_telemetry = TRUE;
5711                 } else {
5712                         needs_telemetry = FALSE;
5713                 }
5714         } while(!OSCompareAndSwap64(old_count, new_count, &global_logical_writes_count));
5715         return needs_telemetry;
5716 }
5717
5718 void task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
5719 {
5720         int64_t io_delta = 0;
5721         boolean_t needs_telemetry = FALSE;
5722
5723         if ((!task) || (!io_size) || (!vp))
5724                 return;
5725
5726         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
5727                                                         task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
5728         DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
5729         switch(flags) {
5730                 case TASK_WRITE_IMMEDIATE:
5731                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_immediate_writes));
5732                         ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5733                         break;
5734                 case TASK_WRITE_DEFERRED:
5735                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_deferred_writes));
5736                         ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5737                         break;
5738                 case TASK_WRITE_INVALIDATED:
5739                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_invalidated_writes));
5740                         ledger_debit(task->ledger, task_ledgers.logical_writes, io_size);
5741                         break;
5742                 case TASK_WRITE_METADATA:
5743                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_metadata_writes));
5744                         ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5745                         break;
5746         }
5747
5748         io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
5749         if (io_telemetry_limit != 0) {
5750                 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
5751                 needs_telemetry = global_update_logical_writes(io_delta);
5752                 if (needs_telemetry) {
5753                         act_set_io_telemetry_ast(current_thread());
5754                 }
5755         }
5756 }
5757
5758 /*
5759  * Control the I/O monitor for a task.
5760  */
5761 kern_return_t
5762 task_io_monitor_ctl(task_t task, uint32_t *flags)
5763 {
5764         ledger_t ledger = task->ledger;
5765
5766         task_lock(task);
5767         if (*flags & IOMON_ENABLE) {
5768                 /* Configure the physical I/O ledger */
5769                 ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
5770                 ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5771
5772                 /* Configure the logical I/O ledger */
5773                 ledger_set_limit(ledger, task_ledgers.logical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
5774                 ledger_set_period(ledger, task_ledgers.logical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5775
5776         } else if (*flags & IOMON_DISABLE) {
5777                 /*
5778                  * Caller wishes to disable I/O monitor on the task.
5779                  */
5780                 ledger_disable_refill(ledger, task_ledgers.physical_writes);
5781                 ledger_disable_callback(ledger, task_ledgers.physical_writes);
5782                 ledger_disable_refill(ledger, task_ledgers.logical_writes);
5783                 ledger_disable_callback(ledger, task_ledgers.logical_writes);
5784         }
5785
5786         task_unlock(task);
5787         return KERN_SUCCESS;
5788 }
5789
5790 void
5791 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
5792 {
5793         if (warning == 0) {
5794                 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
5795         }
5796 }
5797
5798 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
5799 {
5800         int                             pid = 0;
5801         task_t                          task = current_task();
5802 #ifdef EXC_RESOURCE_MONITORS
5803         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
5804 #endif /* EXC_RESOURCE_MONITORS */
5805         struct ledger_entry_info        lei;
5806         kern_return_t                   kr;
5807
5808 #ifdef MACH_BSD
5809         pid = proc_selfpid();
5810 #endif
5811         /*
5812          * Get the ledger entry info. We need to do this before disabling the exception
5813          * to get correct values for all fields.
5814          */
5815         switch(flavor) {
5816                 case FLAVOR_IO_PHYSICAL_WRITES:
5817                         ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
5818                         break;
5819                 case FLAVOR_IO_LOGICAL_WRITES:
5820                         ledger_get_entry_info(task->ledger, task_ledgers.logical_writes, &lei);
5821                         break;
5822         }
5823
5824
5825         /*
5826          * Disable the exception notification so we don't overwhelm
5827          * the listener with an endless stream of redundant exceptions.
5828          * TODO: detect whether another thread is already reporting the violation.
5829          */
5830         uint32_t flags = IOMON_DISABLE;
5831         task_io_monitor_ctl(task, &flags);
5832
5833         if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
5834                 trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
5835         }
5836         os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
5837                 pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
5838
5839         kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
5840         if (kr) {
5841                 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
5842         }
5843
5844 #ifdef EXC_RESOURCE_MONITORS
5845         code[0] = code[1] = 0;
5846         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
5847         EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
5848         EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
5849         EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
5850         EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
5851         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5852 #endif /* EXC_RESOURCE_MONITORS */
5853 }
5854
5855 /* Placeholders for the task set/get voucher interfaces */
5856 kern_return_t
5857 task_get_mach_voucher(
5858         task_t                  task,
5859         mach_voucher_selector_t __unused which,
5860         ipc_voucher_t           *voucher)
5861 {
5862         if (TASK_NULL == task)
5863                 return KERN_INVALID_TASK;
5864
5865         *voucher = NULL;
5866         return KERN_SUCCESS;
5867 }
5868
5869 kern_return_t
5870 task_set_mach_voucher(
5871         task_t                  task,
5872         ipc_voucher_t           __unused voucher)
5873 {
5874         if (TASK_NULL == task)
5875                 return KERN_INVALID_TASK;
5876
5877         return KERN_SUCCESS;
5878 }
5879
5880 kern_return_t
5881 task_swap_mach_voucher(
5882         task_t                  task,
5883         ipc_voucher_t           new_voucher,
5884         ipc_voucher_t           *in_out_old_voucher)
5885 {
5886         if (TASK_NULL == task)
5887                 return KERN_INVALID_TASK;
5888
5889         *in_out_old_voucher = new_voucher;
5890         return KERN_SUCCESS;
5891 }
5892
5893 void task_set_gpu_denied(task_t task, boolean_t denied)
5894 {
5895         task_lock(task);
5896
5897         if (denied) {
5898                 task->t_flags |= TF_GPU_DENIED;
5899         } else {
5900                 task->t_flags &= ~TF_GPU_DENIED;
5901         }
5902
5903         task_unlock(task);
5904 }
5905
5906 boolean_t task_is_gpu_denied(task_t task)
5907 {
5908         /* We don't need the lock to read this flag */
5909         return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
5910 }
5911
5912
5913 uint64_t get_task_memory_region_count(task_t task)
5914 {
5915         vm_map_t map;
5916         map = (task == kernel_task) ? kernel_map: task->map;
5917         return((uint64_t)get_map_nentries(map));
5918 }
5919
5920 static void
5921 kdebug_trace_dyld_internal(uint32_t base_code,
5922         struct dyld_kernel_image_info *info)
5923 {
5924         static_assert(sizeof(info->uuid) >= 16);
5925
5926 #if defined(__LP64__)
5927         uint64_t *uuid = (uint64_t *)&(info->uuid);
5928
5929         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5930                 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
5931                 uuid[1], info->load_addr,
5932                 (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
5933                 0);
5934         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5935                 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
5936                 (uint64_t)info->fsobjid.fid_objno |
5937                 ((uint64_t)info->fsobjid.fid_generation << 32),
5938                 0, 0, 0, 0);
5939 #else /* defined(__LP64__) */
5940         uint32_t *uuid = (uint32_t *)&(info->uuid);
5941
5942         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5943                 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
5944                 uuid[1], uuid[2], uuid[3], 0);
5945         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5946                 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
5947                 (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
5948                 info->fsobjid.fid_objno, 0);
5949         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5950                 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
5951                 info->fsobjid.fid_generation, 0, 0, 0, 0);
5952 #endif /* !defined(__LP64__) */
5953 }
5954
5955 static kern_return_t
5956 kdebug_trace_dyld(task_t task, uint32_t base_code,
5957         vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
5958 {
5959         kern_return_t kr;
5960         dyld_kernel_image_info_array_t infos;
5961         vm_map_offset_t map_data;
5962         vm_offset_t data;
5963
5964         if (!infos_copy) {
5965                 return KERN_INVALID_ADDRESS;
5966         }
5967
5968         if (!kdebug_enable ||
5969                 !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0)))
5970         {
5971                 vm_map_copy_discard(infos_copy);
5972                 return KERN_SUCCESS;
5973         }
5974
5975         if (task == NULL || task != current_task()) {
5976                 return KERN_INVALID_TASK;
5977         }
5978
5979         kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
5980         if (kr != KERN_SUCCESS) {
5981                 return kr;
5982         }
5983
5984         infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
5985
5986         for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
5987                 kdebug_trace_dyld_internal(base_code, &(infos[i]));
5988         }
5989
5990         data = CAST_DOWN(vm_offset_t, map_data);
5991         mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
5992         return KERN_SUCCESS;
5993 }
5994
5995 kern_return_t
5996 task_register_dyld_image_infos(task_t task,
5997                                dyld_kernel_image_info_array_t infos_copy,
5998                                mach_msg_type_number_t infos_len)
5999 {
6000         return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
6001                 (vm_map_copy_t)infos_copy, infos_len);
6002 }
6003
6004 kern_return_t
6005 task_unregister_dyld_image_infos(task_t task,
6006                                  dyld_kernel_image_info_array_t infos_copy,
6007                                  mach_msg_type_number_t infos_len)
6008 {
6009         return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
6010                 (vm_map_copy_t)infos_copy, infos_len);
6011 }
6012
6013 kern_return_t
6014 task_get_dyld_image_infos(__unused task_t task,
6015                           __unused dyld_kernel_image_info_array_t * dyld_images,
6016                           __unused mach_msg_type_number_t * dyld_imagesCnt)
6017 {
6018         return KERN_NOT_SUPPORTED;
6019 }
6020
6021 kern_return_t
6022 task_register_dyld_shared_cache_image_info(task_t task,
6023                                            dyld_kernel_image_info_t cache_img,
6024                                            __unused boolean_t no_cache,
6025                                            __unused boolean_t private_cache)
6026 {
6027         if (task == NULL || task != current_task()) {
6028                 return KERN_INVALID_TASK;
6029         }
6030
6031         kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
6032         return KERN_SUCCESS;
6033 }
6034
6035 kern_return_t
6036 task_register_dyld_set_dyld_state(__unused task_t task,
6037                                   __unused uint8_t dyld_state)
6038 {
6039         return KERN_NOT_SUPPORTED;
6040 }
6041
6042 kern_return_t
6043 task_register_dyld_get_process_state(__unused task_t task,
6044                                      __unused dyld_kernel_process_info_t * dyld_process_state)
6045 {
6046         return KERN_NOT_SUPPORTED;
6047 }
6048
6049 kern_return_t
6050 task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
6051                 task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
6052 {
6053 #if MONOTONIC
6054         task_t task = (task_t)task_insp;
6055         kern_return_t kr = KERN_SUCCESS;
6056         mach_msg_type_number_t size;
6057
6058         if (task == TASK_NULL) {
6059                 return KERN_INVALID_ARGUMENT;
6060         }
6061
6062         size = *size_in_out;
6063
6064         switch (flavor) {
6065         case TASK_INSPECT_BASIC_COUNTS: {
6066                 struct task_inspect_basic_counts *bc;
6067                 uint64_t task_counts[MT_CORE_NFIXED];
6068
6069                 if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
6070                         kr = KERN_INVALID_ARGUMENT;
6071                         break;
6072                 }
6073
6074                 mt_fixed_task_counts(task, task_counts);
6075                 bc = (struct task_inspect_basic_counts *)info_out;
6076 #ifdef MT_CORE_INSTRS
6077                 bc->instructions = task_counts[MT_CORE_INSTRS];
6078 #else /* defined(MT_CORE_INSTRS) */
6079                 bc->instructions = 0;
6080 #endif /* !defined(MT_CORE_INSTRS) */
6081                 bc->cycles = task_counts[MT_CORE_CYCLES];
6082                 size = TASK_INSPECT_BASIC_COUNTS_COUNT;
6083                 break;
6084         }
6085         default:
6086                 kr = KERN_INVALID_ARGUMENT;
6087                 break;
6088         }
6089
6090         if (kr == KERN_SUCCESS) {
6091                 *size_in_out = size;
6092         }
6093         return kr;
6094 #else /* MONOTONIC */
6095 #pragma unused(task_insp, flavor, info_out, size_in_out)
6096         return KERN_NOT_SUPPORTED;
6097 #endif /* !MONOTONIC */
6098 }
6099
6100 #if CONFIG_SECLUDED_MEMORY
6101 int num_tasks_can_use_secluded_mem = 0;
6102
6103 void
6104 task_set_can_use_secluded_mem(
6105         task_t          task,
6106         boolean_t       can_use_secluded_mem)
6107 {
6108         if (!task->task_could_use_secluded_mem) {
6109                 return;
6110         }
6111         task_lock(task);
6112         task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
6113         task_unlock(task);
6114 }
6115
6116 void
6117 task_set_can_use_secluded_mem_locked(
6118         task_t          task,
6119         boolean_t       can_use_secluded_mem)
6120 {
6121         assert(task->task_could_use_secluded_mem);
6122         if (can_use_secluded_mem &&
6123             secluded_for_apps && /* global boot-arg */
6124             !task->task_can_use_secluded_mem) {
6125                 assert(num_tasks_can_use_secluded_mem >= 0);
6126                 OSAddAtomic(+1,
6127                             (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
6128                 task->task_can_use_secluded_mem = TRUE;
6129         } else if (!can_use_secluded_mem &&
6130                    task->task_can_use_secluded_mem) {
6131                 assert(num_tasks_can_use_secluded_mem > 0);
6132                 OSAddAtomic(-1,
6133                             (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
6134                 task->task_can_use_secluded_mem = FALSE;
6135         }
6136 }
6137
6138 void
6139 task_set_could_use_secluded_mem(
6140         task_t          task,
6141         boolean_t       could_use_secluded_mem)
6142 {
6143         task->task_could_use_secluded_mem = could_use_secluded_mem;
6144 }
6145
6146 void
6147 task_set_could_also_use_secluded_mem(
6148         task_t          task,
6149         boolean_t       could_also_use_secluded_mem)
6150 {
6151         task->task_could_also_use_secluded_mem = could_also_use_secluded_mem;
6152 }
6153
6154 boolean_t
6155 task_can_use_secluded_mem(
6156         task_t  task)
6157 {
6158         if (task->task_can_use_secluded_mem) {
6159                 assert(task->task_could_use_secluded_mem);
6160                 assert(num_tasks_can_use_secluded_mem > 0);
6161                 return TRUE;
6162         }
6163         if (task->task_could_also_use_secluded_mem &&
6164             num_tasks_can_use_secluded_mem > 0) {
6165                 assert(num_tasks_can_use_secluded_mem > 0);
6166                 return TRUE;
6167         }
6168         return FALSE;
6169 }
6170
6171 boolean_t
6172 task_could_use_secluded_mem(
6173         task_t  task)
6174 {
6175         return task->task_could_use_secluded_mem;
6176 }
6177 #endif /* CONFIG_SECLUDED_MEMORY */
6178
6179 queue_head_t *
6180 task_io_user_clients(task_t task)
6181 {
6182     return (&task->io_user_clients);
6183 }
6184
6185 void
6186 task_copy_fields_for_exec(task_t dst_task, task_t src_task)
6187 {
6188         dst_task->vtimers = src_task->vtimers;
6189 }
6190
6191 #if DEVELOPMENT || DEBUG
6192 int vm_region_footprint = 0;
6193 #endif /* DEVELOPMENT || DEBUG */
6194
6195 boolean_t
6196 task_self_region_footprint(void)
6197 {
6198 #if DEVELOPMENT || DEBUG
6199         if (vm_region_footprint) {
6200                 /* system-wide override */
6201                 return TRUE;
6202         }
6203 #endif /* DEVELOPMENT || DEBUG */
6204         return current_task()->task_region_footprint;
6205 }
6206
6207 void
6208 task_self_region_footprint_set(
6209         boolean_t newval)
6210 {
6211         task_t  curtask;
6212
6213         curtask = current_task();
6214         task_lock(curtask);
6215         if (newval) {
6216                 curtask->task_region_footprint = TRUE;
6217         } else {
6218                 curtask->task_region_footprint = FALSE;
6219         }
6220         task_unlock(curtask);
6221 }