osfmk/kern/task.c

   1 /*
   2  * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  *      File:   kern/task.c
  58  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
  59  *              David Black
  60  *
  61  *      Task management primitives implementation.
  62  */
  63 /*
  64  * Copyright (c) 1993 The University of Utah and
  65  * the Computer Systems Laboratory (CSL).  All rights reserved.
  66  *
  67  * Permission to use, copy, modify and distribute this software and its
  68  * documentation is hereby granted, provided that both the copyright
  69  * notice and this permission notice appear in all copies of the
  70  * software, derivative works or modified versions, and any portions
  71  * thereof, and that both notices appear in supporting documentation.
  72  *
  73  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  74  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  75  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  76  *
  77  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  78  * improvements that they make and grant CSL redistribution rights.
  79  *
  80  */
  81 /*
  82  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
  83  * support for mandatory and extensible security protections.  This notice
  84  * is included in support of clause 2.2 (b) of the Apple Public License,
  85  * Version 2.0.
  86  * Copyright (c) 2005 SPARTA, Inc.
  87  */
  88
  89 #include <mach/mach_types.h>
  90 #include <mach/boolean.h>
  91 #include <mach/host_priv.h>
  92 #include <mach/machine/vm_types.h>
  93 #include <mach/vm_param.h>
  94 #include <mach/mach_vm.h>
  95 #include <mach/semaphore.h>
  96 #include <mach/task_info.h>
  97 #include <mach/task_special_ports.h>
  98 #include <mach/sdt.h>
  99
 100 #include <ipc/ipc_importance.h>
 101 #include <ipc/ipc_types.h>
 102 #include <ipc/ipc_space.h>
 103 #include <ipc/ipc_entry.h>
 104 #include <ipc/ipc_hash.h>
 105
 106 #include <kern/kern_types.h>
 107 #include <kern/mach_param.h>
 108 #include <kern/misc_protos.h>
 109 #include <kern/task.h>
 110 #include <kern/thread.h>
 111 #include <kern/coalition.h>
 112 #include <kern/zalloc.h>
 113 #include <kern/kalloc.h>
 114 #include <kern/kern_cdata.h>
 115 #include <kern/processor.h>
 116 #include <kern/sched_prim.h>    /* for thread_wakeup */
 117 #include <kern/ipc_tt.h>
 118 #include <kern/host.h>
 119 #include <kern/clock.h>
 120 #include <kern/timer.h>
 121 #include <kern/assert.h>
 122 #include <kern/sync_lock.h>
 123 #include <kern/affinity.h>
 124 #include <kern/exc_resource.h>
 125 #include <kern/machine.h>
 126 #include <kern/policy_internal.h>
 127
 128 #include <corpses/task_corpse.h>
 129 #if CONFIG_TELEMETRY
 130 #include <kern/telemetry.h>
 131 #endif
 132
 133 #include <vm/pmap.h>
 134 #include <vm/vm_map.h>
 135 #include <vm/vm_kern.h>         /* for kernel_map, ipc_kernel_map */
 136 #include <vm/vm_pageout.h>
 137 #include <vm/vm_protos.h>
 138 #include <vm/vm_purgeable_internal.h>
 139
 140 #include <sys/resource.h>
 141 #include <sys/signalvar.h> /* for coredump */
 142
 143 /*
 144  * Exported interfaces
 145  */
 146
 147 #include <mach/task_server.h>
 148 #include <mach/mach_host_server.h>
 149 #include <mach/host_security_server.h>
 150 #include <mach/mach_port_server.h>
 151
 152 #include <vm/vm_shared_region.h>
 153
 154 #include <libkern/OSDebug.h>
 155 #include <libkern/OSAtomic.h>
 156
 157 #if CONFIG_ATM
 158 #include <atm/atm_internal.h>
 159 #endif
 160
 161 #include <kern/sfi.h>           /* picks up ledger.h */
 162
 163 #if CONFIG_MACF
 164 #include <security/mac_mach_internal.h>
 165 #endif
 166
 167 #if KPERF
 168 extern int kpc_force_all_ctrs(task_t, int);
 169 #endif
 170
 171 task_t                  kernel_task;
 172 zone_t                  task_zone;
 173 lck_attr_t      task_lck_attr;
 174 lck_grp_t       task_lck_grp;
 175 lck_grp_attr_t  task_lck_grp_attr;
 176
 177 extern int exc_via_corpse_forking;
 178 extern int unify_corpse_blob_alloc;
 179 extern int corpse_for_fatal_memkill;
 180
 181 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
 182 int audio_active = 0;
 183
 184 zinfo_usage_store_t tasks_tkm_private;
 185 zinfo_usage_store_t tasks_tkm_shared;
 186
 187 /* A container to accumulate statistics for expired tasks */
 188 expired_task_statistics_t               dead_task_statistics;
 189 lck_spin_t              dead_task_statistics_lock;
 190
 191 ledger_template_t task_ledger_template = NULL;
 192
 193 struct _task_ledger_indices task_ledgers __attribute__((used)) =
 194         {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
 195          { 0 /* initialized at runtime */},
 196 #ifdef CONFIG_BANK
 197          -1, -1,
 198 #endif
 199          -1, -1,
 200         };
 201
 202 /* System sleep state */
 203 boolean_t tasks_suspend_state;
 204
 205
 206 void init_task_ledgers(void);
 207 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 208 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 209 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
 210 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
 211 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
 212 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
 213
 214 kern_return_t task_suspend_internal(task_t);
 215 kern_return_t task_resume_internal(task_t);
 216 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
 217 int proc_list_uptrs(void *p, uint64_t *udata_buffer, int size);
 218
 219 extern kern_return_t iokit_task_terminate(task_t task);
 220
 221 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
 222 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
 223
 224 // Warn tasks when they hit 80% of their memory limit.
 225 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
 226
 227 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT              150 /* wakeups per second */
 228 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL   300 /* in seconds. */
 229
 230 /*
 231  * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
 232  *
 233  * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
 234  *  stacktraces, aka micro-stackshots)
 235  */
 236 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER        70
 237
 238 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
 239 int task_wakeups_monitor_rate;     /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
 240
 241 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 242
 243 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
 244
 245 ledger_amount_t max_task_footprint = 0;  /* Per-task limit on physical memory consumption in bytes     */
 246 int max_task_footprint_warning_level = 0;  /* Per-task limit warning percentage */
 247 int max_task_footprint_mb = 0;  /* Per-task limit on physical memory consumption in megabytes */
 248
 249 /* I/O Monitor Limits */
 250 #define IOMON_DEFAULT_LIMIT                     (20480ull)      /* MB of logical/physical I/O */
 251 #define IOMON_DEFAULT_INTERVAL                  (86400ull)      /* in seconds */
 252
 253 uint64_t task_iomon_limit_mb;           /* Per-task I/O monitor limit in MBs */
 254 uint64_t task_iomon_interval_secs;      /* Per-task I/O monitor interval in secs */
 255
 256 #define IO_TELEMETRY_DEFAULT_LIMIT              (10ll * 1024ll * 1024ll)
 257 int64_t io_telemetry_limit;                     /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
 258 int64_t global_logical_writes_count = 0;        /* Global count for logical writes */
 259 static boolean_t global_update_logical_writes(int64_t);
 260
 261 #if MACH_ASSERT
 262 int pmap_ledgers_panic = 1;
 263 #endif /* MACH_ASSERT */
 264
 265 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
 266
 267 #if CONFIG_COREDUMP
 268 int hwm_user_cores = 0; /* high watermark violations generate user core files */
 269 #endif
 270
 271 #ifdef MACH_BSD
 272 extern void     proc_getexecutableuuid(void *, unsigned char *, unsigned long);
 273 extern int      proc_pid(struct proc *p);
 274 extern int      proc_selfpid(void);
 275 extern char     *proc_name_address(struct proc *p);
 276 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
 277
 278 #if CONFIG_MEMORYSTATUS
 279 extern void     proc_memstat_terminated(struct proc* p, boolean_t set);
 280 extern boolean_t memorystatus_turnoff_exception_and_get_fatalness(boolean_t warning, const int max_footprint_mb);
 281 extern void     memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t is_fatal);
 282 #endif /* CONFIG_MEMORYSTATUS */
 283
 284 #endif /* MACH_BSD */
 285
 286 /* Forwards */
 287
 288 static void task_hold_locked(task_t task);
 289 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
 290 static void task_release_locked(task_t task);
 291
 292 static void task_synchronizer_destroy_all(task_t task);
 293
 294 void
 295 task_backing_store_privileged(
 296                         task_t task)
 297 {
 298         task_lock(task);
 299         task->priv_flags |= VM_BACKING_STORE_PRIV;
 300         task_unlock(task);
 301         return;
 302 }
 303
 304
 305 void
 306 task_set_64bit(
 307                 task_t task,
 308                 boolean_t is64bit)
 309 {
 310 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
 311         thread_t thread;
 312 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
 313
 314         task_lock(task);
 315
 316         if (is64bit) {
 317                 if (task_has_64BitAddr(task))
 318                         goto out;
 319                 task_set_64BitAddr(task);
 320         } else {
 321                 if ( !task_has_64BitAddr(task))
 322                         goto out;
 323                 task_clear_64BitAddr(task);
 324         }
 325         /* FIXME: On x86, the thread save state flavor can diverge from the
 326          * task's 64-bit feature flag due to the 32-bit/64-bit register save
 327          * state dichotomy. Since we can be pre-empted in this interval,
 328          * certain routines may observe the thread as being in an inconsistent
 329          * state with respect to its task's 64-bitness.
 330          */
 331
 332 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
 333         queue_iterate(&task->threads, thread, thread_t, task_threads) {
 334                 thread_mtx_lock(thread);
 335                 machine_thread_switch_addrmode(thread);
 336                 thread_mtx_unlock(thread);
 337
 338                 if (thread == current_thread()) {
 339                         uint64_t arg1, arg2;
 340                         int urgency;
 341                         spl_t spl = splsched();
 342                         /*
 343                          * This call tell that the current thread changed it's 32bitness.
 344                          * Other thread were no more on core when 32bitness was changed,
 345                          * but current_thread() is on core and the previous call to
 346                          * machine_thread_going_on_core() gave 32bitness which is now wrong.
 347                          *
 348                          * This is needed for bring-up, a different callback should be used
 349                          * in the future.
 350                          */
 351                         thread_lock(thread);
 352                         urgency = thread_get_urgency(thread, &arg1, &arg2);
 353                         machine_thread_going_on_core(thread, urgency, 0);
 354                         thread_unlock(thread);
 355                         splx(spl);
 356                 }
 357         }
 358 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
 359
 360 out:
 361         task_unlock(task);
 362 }
 363
 364
 365 void
 366 task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size)
 367 {
 368         task_lock(task);
 369         task->all_image_info_addr = addr;
 370         task->all_image_info_size = size;
 371         task_unlock(task);
 372 }
 373
 374 void
 375 task_atm_reset(__unused task_t task) {
 376
 377 #if CONFIG_ATM
 378         if (task->atm_context != NULL) {
 379                  atm_task_descriptor_destroy(task->atm_context);
 380                  task->atm_context = NULL;
 381         }
 382 #endif
 383
 384 }
 385
 386 void
 387 task_bank_reset(__unused task_t task) {
 388
 389 #if CONFIG_BANK
 390         if (task->bank_context != NULL) {
 391                  bank_task_destroy(task);
 392         }
 393 #endif
 394
 395 }
 396
 397 /*
 398  * NOTE: This should only be called when the P_LINTRANSIT
 399  *       flag is set (the proc_trans lock is held) on the
 400  *       proc associated with the task.
 401  */
 402 void
 403 task_bank_init(__unused task_t task) {
 404
 405 #if CONFIG_BANK
 406         if (task->bank_context != NULL) {
 407                 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
 408         }
 409         bank_task_initialize(task);
 410 #endif
 411
 412 }
 413
 414 #if TASK_REFERENCE_LEAK_DEBUG
 415 #include <kern/btlog.h>
 416
 417 static btlog_t *task_ref_btlog;
 418 #define TASK_REF_OP_INCR        0x1
 419 #define TASK_REF_OP_DECR        0x2
 420
 421 #define TASK_REF_NUM_RECORDS    100000
 422 #define TASK_REF_BTDEPTH        7
 423
 424 void
 425 task_reference_internal(task_t task)
 426 {
 427         void *       bt[TASK_REF_BTDEPTH];
 428         int             numsaved = 0;
 429
 430         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 431
 432         (void)hw_atomic_add(&(task)->ref_count, 1);
 433         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
 434                                         bt, numsaved);
 435 }
 436
 437 uint32_t
 438 task_deallocate_internal(task_t task)
 439 {
 440         void *       bt[TASK_REF_BTDEPTH];
 441         int             numsaved = 0;
 442
 443         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 444
 445         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
 446                                         bt, numsaved);
 447         return hw_atomic_sub(&(task)->ref_count, 1);
 448 }
 449
 450 #endif /* TASK_REFERENCE_LEAK_DEBUG */
 451
 452 void
 453 task_init(void)
 454 {
 455
 456         lck_grp_attr_setdefault(&task_lck_grp_attr);
 457         lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
 458         lck_attr_setdefault(&task_lck_attr);
 459         lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
 460         lck_mtx_init(&tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
 461
 462         task_zone = zinit(
 463                         sizeof(struct task),
 464                         task_max * sizeof(struct task),
 465                         TASK_CHUNK * sizeof(struct task),
 466                         "tasks");
 467
 468         zone_change(task_zone, Z_NOENCRYPT, TRUE);
 469
 470
 471         /*
 472          * Configure per-task memory limit.
 473          * The boot-arg is interpreted as Megabytes,
 474          * and takes precedence over the device tree.
 475          * Setting the boot-arg to 0 disables task limits.
 476          */
 477         if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
 478                         sizeof (max_task_footprint_mb))) {
 479                 /*
 480                  * No limit was found in boot-args, so go look in the device tree.
 481                  */
 482                 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
 483                                 sizeof(max_task_footprint_mb))) {
 484                         /*
 485                          * No limit was found in device tree.
 486                          */
 487                         max_task_footprint_mb = 0;
 488                 }
 489         }
 490
 491         if (max_task_footprint_mb != 0) {
 492 #if CONFIG_MEMORYSTATUS
 493                 if (max_task_footprint_mb < 50) {
 494                                 printf("Warning: max_task_pmem %d below minimum.\n",
 495                                 max_task_footprint_mb);
 496                                 max_task_footprint_mb = 50;
 497                 }
 498                 printf("Limiting task physical memory footprint to %d MB\n",
 499                         max_task_footprint_mb);
 500
 501                 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
 502
 503                 /*
 504                  * Configure the per-task memory limit warning level.
 505                  * This is computed as a percentage.
 506                  */
 507                 max_task_footprint_warning_level = 0;
 508
 509                 if (max_mem < 0x40000000) {
 510                         /*
 511                          * On devices with < 1GB of memory:
 512                          *    -- set warnings to 50MB below the per-task limit.
 513                          */
 514                         if (max_task_footprint_mb > 50) {
 515                                 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
 516                         }
 517                 } else {
 518                         /*
 519                          * On devices with >= 1GB of memory:
 520                          *    -- set warnings to 100MB below the per-task limit.
 521                          */
 522                         if (max_task_footprint_mb > 100) {
 523                                 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
 524                         }
 525                 }
 526
 527                 /*
 528                  * Never allow warning level to land below the default.
 529                  */
 530                 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
 531                         max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
 532                 }
 533
 534                 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
 535
 536 #else
 537                 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
 538 #endif /* CONFIG_MEMORYSTATUS */
 539         }
 540
 541 #if MACH_ASSERT
 542         PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
 543                           sizeof (pmap_ledgers_panic));
 544 #endif /* MACH_ASSERT */
 545
 546 #if CONFIG_COREDUMP
 547         if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
 548                         sizeof (hwm_user_cores))) {
 549                 hwm_user_cores = 0;
 550         }
 551 #endif
 552
 553         proc_init_cpumon_params();
 554
 555         if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
 556                 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
 557         }
 558
 559         if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
 560                 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
 561         }
 562
 563         if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
 564                 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
 565                 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
 566         }
 567
 568         if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
 569                 sizeof (disable_exc_resource))) {
 570                 disable_exc_resource = 0;
 571         }
 572
 573         if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof (task_iomon_limit_mb))) {
 574                 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
 575         }
 576
 577         if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof (task_iomon_interval_secs))) {
 578                 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
 579         }
 580
 581         if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof (io_telemetry_limit))) {
 582                 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
 583         }
 584
 585 /*
 586  * If we have coalitions, coalition_init() will call init_task_ledgers() as it
 587  * sets up the ledgers for the default coalition. If we don't have coalitions,
 588  * then we have to call it now.
 589  */
 590 #if CONFIG_COALITIONS
 591         assert(task_ledger_template);
 592 #else /* CONFIG_COALITIONS */
 593         init_task_ledgers();
 594 #endif /* CONFIG_COALITIONS */
 595
 596 #if TASK_REFERENCE_LEAK_DEBUG
 597         task_ref_btlog = btlog_create(TASK_REF_NUM_RECORDS, TASK_REF_BTDEPTH, TRUE /* caller_will_remove_entries_for_element? */);
 598         assert(task_ref_btlog);
 599 #endif
 600
 601         /*
 602          * Create the kernel task as the first task.
 603          */
 604 #ifdef __LP64__
 605         if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TF_NONE, &kernel_task) != KERN_SUCCESS)
 606 #else
 607         if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, TF_NONE, &kernel_task) != KERN_SUCCESS)
 608 #endif
 609                 panic("task_init\n");
 610
 611         vm_map_deallocate(kernel_task->map);
 612         kernel_task->map = kernel_map;
 613         lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
 614 }
 615
 616 /*
 617  * Create a task running in the kernel address space.  It may
 618  * have its own map of size mem_size and may have ipc privileges.
 619  */
 620 kern_return_t
 621 kernel_task_create(
 622         __unused task_t         parent_task,
 623         __unused vm_offset_t            map_base,
 624         __unused vm_size_t              map_size,
 625         __unused task_t         *child_task)
 626 {
 627         return (KERN_INVALID_ARGUMENT);
 628 }
 629
 630 kern_return_t
 631 task_create(
 632         task_t                          parent_task,
 633         __unused ledger_port_array_t    ledger_ports,
 634         __unused mach_msg_type_number_t num_ledger_ports,
 635         __unused boolean_t              inherit_memory,
 636         __unused task_t                 *child_task)    /* OUT */
 637 {
 638         if (parent_task == TASK_NULL)
 639                 return(KERN_INVALID_ARGUMENT);
 640
 641         /*
 642          * No longer supported: too many calls assume that a task has a valid
 643          * process attached.
 644          */
 645         return(KERN_FAILURE);
 646 }
 647
 648 kern_return_t
 649 host_security_create_task_token(
 650         host_security_t                 host_security,
 651         task_t                          parent_task,
 652         __unused security_token_t       sec_token,
 653         __unused audit_token_t          audit_token,
 654         __unused host_priv_t            host_priv,
 655         __unused ledger_port_array_t    ledger_ports,
 656         __unused mach_msg_type_number_t num_ledger_ports,
 657         __unused boolean_t              inherit_memory,
 658         __unused task_t                 *child_task)    /* OUT */
 659 {
 660         if (parent_task == TASK_NULL)
 661                 return(KERN_INVALID_ARGUMENT);
 662
 663         if (host_security == HOST_NULL)
 664                 return(KERN_INVALID_SECURITY);
 665
 666         /*
 667          * No longer supported.
 668          */
 669         return(KERN_FAILURE);
 670 }
 671
 672 /*
 673  * Task ledgers
 674  * ------------
 675  *
 676  * phys_footprint
 677  *   Physical footprint: This is the sum of:
 678  *     + (internal - alternate_accounting)
 679  *     + (internal_compressed - alternate_accounting_compressed)
 680  *     + iokit_mapped
 681  *     + purgeable_nonvolatile
 682  *     + purgeable_nonvolatile_compressed
 683  *     + page_table
 684  *
 685  * internal
 686  *   The task's anonymous memory, which on iOS is always resident.
 687  *
 688  * internal_compressed
 689  *   Amount of this task's internal memory which is held by the compressor.
 690  *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
 691  *   and could be either decompressed back into memory, or paged out to storage, depending
 692  *   on our implementation.
 693  *
 694  * iokit_mapped
 695  *   IOKit mappings: The total size of all IOKit mappings in this task, regardless of
 696      clean/dirty or internal/external state].
 697  *
 698  * alternate_accounting
 699  *   The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
 700  *   are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
 701  *   double counting.
 702  */
 703 void
 704 init_task_ledgers(void)
 705 {
 706         ledger_template_t t;
 707
 708         assert(task_ledger_template == NULL);
 709         assert(kernel_task == TASK_NULL);
 710
 711 #if MACH_ASSERT
 712         PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
 713                           sizeof (pmap_ledgers_panic));
 714 #endif /* MACH_ASSERT */
 715
 716         if ((t = ledger_template_create("Per-task ledger")) == NULL)
 717                 panic("couldn't create task ledger template");
 718
 719         task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
 720         task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
 721             "physmem", "bytes");
 722         task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
 723             "bytes");
 724         task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
 725             "bytes");
 726         task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
 727             "bytes");
 728         task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
 729             "bytes");
 730         task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
 731             "bytes");
 732         task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
 733             "bytes");
 734         task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
 735             "bytes");
 736         task_ledgers.page_table = ledger_entry_add(t, "page_table", "physmem",
 737             "bytes");
 738         task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
 739             "bytes");
 740         task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
 741             "bytes");
 742         task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
 743         task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
 744         task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
 745         task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
 746         task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
 747             "count");
 748         task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
 749             "count");
 750
 751 #if CONFIG_SCHED_SFI
 752         sfi_class_id_t class_id, ledger_alias;
 753         for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
 754                 task_ledgers.sfi_wait_times[class_id] = -1;
 755         }
 756
 757         /* don't account for UNSPECIFIED */
 758         for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
 759                 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
 760                 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
 761                         /* Check to see if alias has been registered yet */
 762                         if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
 763                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
 764                         } else {
 765                                 /* Otherwise, initialize it first */
 766                                 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
 767                         }
 768                 } else {
 769                         task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
 770                 }
 771
 772                 if (task_ledgers.sfi_wait_times[class_id] < 0) {
 773                         panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
 774                 }
 775         }
 776
 777         assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
 778 #endif /* CONFIG_SCHED_SFI */
 779
 780 #ifdef CONFIG_BANK
 781         task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
 782         task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
 783 #endif
 784         task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
 785         task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
 786
 787         if ((task_ledgers.cpu_time < 0) ||
 788             (task_ledgers.tkm_private < 0) ||
 789             (task_ledgers.tkm_shared < 0) ||
 790             (task_ledgers.phys_mem < 0) ||
 791             (task_ledgers.wired_mem < 0) ||
 792             (task_ledgers.internal < 0) ||
 793             (task_ledgers.iokit_mapped < 0) ||
 794             (task_ledgers.alternate_accounting < 0) ||
 795             (task_ledgers.alternate_accounting_compressed < 0) ||
 796             (task_ledgers.page_table < 0) ||
 797             (task_ledgers.phys_footprint < 0) ||
 798             (task_ledgers.internal_compressed < 0) ||
 799             (task_ledgers.purgeable_volatile < 0) ||
 800             (task_ledgers.purgeable_nonvolatile < 0) ||
 801             (task_ledgers.purgeable_volatile_compressed < 0) ||
 802             (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
 803             (task_ledgers.platform_idle_wakeups < 0) ||
 804             (task_ledgers.interrupt_wakeups < 0) ||
 805 #ifdef CONFIG_BANK
 806             (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
 807 #endif
 808             (task_ledgers.physical_writes < 0) ||
 809             (task_ledgers.logical_writes < 0)
 810             ) {
 811                 panic("couldn't create entries for task ledger template");
 812         }
 813
 814         ledger_track_credit_only(t, task_ledgers.phys_footprint);
 815         ledger_track_credit_only(t, task_ledgers.internal);
 816         ledger_track_credit_only(t, task_ledgers.internal_compressed);
 817         ledger_track_credit_only(t, task_ledgers.iokit_mapped);
 818         ledger_track_credit_only(t, task_ledgers.alternate_accounting);
 819         ledger_track_credit_only(t, task_ledgers.alternate_accounting_compressed);
 820         ledger_track_credit_only(t, task_ledgers.purgeable_volatile);
 821         ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile);
 822         ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed);
 823         ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed);
 824
 825         ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
 826 #if MACH_ASSERT
 827         if (pmap_ledgers_panic) {
 828                 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
 829                 ledger_panic_on_negative(t, task_ledgers.page_table);
 830                 ledger_panic_on_negative(t, task_ledgers.internal);
 831                 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
 832                 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
 833                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
 834                 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
 835                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
 836                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
 837                 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
 838                 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
 839         }
 840 #endif /* MACH_ASSERT */
 841
 842 #if CONFIG_MEMORYSTATUS
 843         ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
 844 #endif /* CONFIG_MEMORYSTATUS */
 845
 846         ledger_set_callback(t, task_ledgers.interrupt_wakeups,
 847                 task_wakeups_rate_exceeded, NULL, NULL);
 848         ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
 849         ledger_set_callback(t, task_ledgers.logical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_LOGICAL_WRITES, NULL);
 850         task_ledger_template = t;
 851 }
 852
 853 kern_return_t
 854 task_create_internal(
 855         task_t          parent_task,
 856         coalition_t     *parent_coalitions __unused,
 857         boolean_t       inherit_memory,
 858         boolean_t       is_64bit,
 859         uint32_t        t_flags,
 860         task_t          *child_task)            /* OUT */
 861 {
 862         task_t                  new_task;
 863         vm_shared_region_t      shared_region;
 864         ledger_t                ledger = NULL;
 865
 866         new_task = (task_t) zalloc(task_zone);
 867
 868         if (new_task == TASK_NULL)
 869                 return(KERN_RESOURCE_SHORTAGE);
 870
 871         /* one ref for just being alive; one for our caller */
 872         new_task->ref_count = 2;
 873
 874         /* allocate with active entries */
 875         assert(task_ledger_template != NULL);
 876         if ((ledger = ledger_instantiate(task_ledger_template,
 877                         LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
 878                 zfree(task_zone, new_task);
 879                 return(KERN_RESOURCE_SHORTAGE);
 880         }
 881
 882         new_task->ledger = ledger;
 883
 884 #if defined(CONFIG_SCHED_MULTIQ)
 885         new_task->sched_group = sched_group_create();
 886 #endif
 887
 888         /* if inherit_memory is true, parent_task MUST not be NULL */
 889         if (!(t_flags & TF_CORPSE_FORK) && inherit_memory)
 890                 new_task->map = vm_map_fork(ledger, parent_task->map, 0);
 891         else
 892                 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
 893                                 (vm_map_offset_t)(VM_MIN_ADDRESS),
 894                                 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
 895
 896         /* Inherit memlock limit from parent */
 897         if (parent_task)
 898                 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
 899
 900         lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
 901         queue_init(&new_task->threads);
 902         new_task->suspend_count = 0;
 903         new_task->thread_count = 0;
 904         new_task->active_thread_count = 0;
 905         new_task->user_stop_count = 0;
 906         new_task->legacy_stop_count = 0;
 907         new_task->active = TRUE;
 908         new_task->halting = FALSE;
 909         new_task->user_data = NULL;
 910         new_task->priv_flags = 0;
 911         new_task->t_flags = t_flags;
 912         new_task->importance = 0;
 913         new_task->corpse_info_kernel = NULL;
 914         new_task->exec_token = 0;
 915
 916 #if CONFIG_ATM
 917         new_task->atm_context = NULL;
 918 #endif
 919 #if CONFIG_BANK
 920         new_task->bank_context = NULL;
 921 #endif
 922
 923 #ifdef MACH_BSD
 924         new_task->bsd_info = NULL;
 925         new_task->corpse_info = NULL;
 926 #endif /* MACH_BSD */
 927
 928 #if CONFIG_MACF
 929         new_task->crash_label = NULL;
 930 #endif
 931
 932 #if CONFIG_MEMORYSTATUS
 933         if (max_task_footprint != 0) {
 934                 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
 935         }
 936 #endif /* CONFIG_MEMORYSTATUS */
 937
 938         if (task_wakeups_monitor_rate != 0) {
 939                 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
 940                 int32_t  rate; // Ignored because of WAKEMON_SET_DEFAULTS
 941                 task_wakeups_monitor_ctl(new_task, &flags, &rate);
 942         }
 943
 944 #if CONFIG_IO_ACCOUNTING
 945         uint32_t flags = IOMON_ENABLE;
 946         task_io_monitor_ctl(new_task, &flags);
 947 #endif /* CONFIG_IO_ACCOUNTING */
 948
 949 #if defined(__i386__) || defined(__x86_64__)
 950         new_task->i386_ldt = 0;
 951 #endif
 952
 953         new_task->task_debug = NULL;
 954
 955 #if DEVELOPMENT || DEBUG
 956         new_task->task_unnested = FALSE;
 957         new_task->task_disconnected_count = 0;
 958 #endif
 959         queue_init(&new_task->semaphore_list);
 960         new_task->semaphores_owned = 0;
 961
 962         ipc_task_init(new_task, parent_task);
 963
 964         new_task->vtimers = 0;
 965
 966         new_task->shared_region = NULL;
 967
 968         new_task->affinity_space = NULL;
 969
 970         new_task->pidsuspended = FALSE;
 971         new_task->frozen = FALSE;
 972         new_task->changing_freeze_state = FALSE;
 973         new_task->rusage_cpu_flags = 0;
 974         new_task->rusage_cpu_percentage = 0;
 975         new_task->rusage_cpu_interval = 0;
 976         new_task->rusage_cpu_deadline = 0;
 977         new_task->rusage_cpu_callt = NULL;
 978 #if MACH_ASSERT
 979         new_task->suspends_outstanding = 0;
 980 #endif
 981
 982 #if HYPERVISOR
 983         new_task->hv_task_target = NULL;
 984 #endif /* HYPERVISOR */
 985
 986
 987         new_task->mem_notify_reserved = 0;
 988 #if IMPORTANCE_INHERITANCE
 989         new_task->task_imp_base = NULL;
 990 #endif /* IMPORTANCE_INHERITANCE */
 991
 992 #if     defined(__x86_64__)
 993         new_task->uexc_range_start = new_task->uexc_range_size = new_task->uexc_handler = 0;
 994 #endif
 995
 996         new_task->requested_policy = default_task_requested_policy;
 997         new_task->effective_policy = default_task_effective_policy;
 998
 999         if (parent_task != TASK_NULL) {
1000                 new_task->sec_token = parent_task->sec_token;
1001                 new_task->audit_token = parent_task->audit_token;
1002
1003                 /* inherit the parent's shared region */
1004                 shared_region = vm_shared_region_get(parent_task);
1005                 vm_shared_region_set(new_task, shared_region);
1006
1007                 if(task_has_64BitAddr(parent_task))
1008                         task_set_64BitAddr(new_task);
1009                 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1010                 new_task->all_image_info_size = parent_task->all_image_info_size;
1011
1012 #if defined(__i386__) || defined(__x86_64__)
1013                 if (inherit_memory && parent_task->i386_ldt)
1014                         new_task->i386_ldt = user_ldt_copy(parent_task->i386_ldt);
1015 #endif
1016                 if (inherit_memory && parent_task->affinity_space)
1017                         task_affinity_create(parent_task, new_task);
1018
1019                 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1020
1021 #if IMPORTANCE_INHERITANCE
1022                 ipc_importance_task_t new_task_imp = IIT_NULL;
1023
1024                 if (task_is_marked_importance_donor(parent_task)) {
1025                         new_task_imp = ipc_importance_for_task(new_task, FALSE);
1026                         assert(IIT_NULL != new_task_imp);
1027                         ipc_importance_task_mark_donor(new_task_imp, TRUE);
1028                 }
1029                 /* Embedded doesn't want this to inherit */
1030                 if (task_is_marked_importance_receiver(parent_task)) {
1031                         if (IIT_NULL == new_task_imp)
1032                                 new_task_imp = ipc_importance_for_task(new_task, FALSE);
1033                         assert(IIT_NULL != new_task_imp);
1034                         ipc_importance_task_mark_receiver(new_task_imp, TRUE);
1035                 }
1036                 if (task_is_marked_importance_denap_receiver(parent_task)) {
1037                         if (IIT_NULL == new_task_imp)
1038                                 new_task_imp = ipc_importance_for_task(new_task, FALSE);
1039                         assert(IIT_NULL != new_task_imp);
1040                         ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
1041                 }
1042
1043                 if (IIT_NULL != new_task_imp) {
1044                         assert(new_task->task_imp_base == new_task_imp);
1045                         ipc_importance_task_release(new_task_imp);
1046                 }
1047 #endif /* IMPORTANCE_INHERITANCE */
1048
1049                 new_task->priority = BASEPRI_DEFAULT;
1050                 new_task->max_priority = MAXPRI_USER;
1051
1052                 task_policy_create(new_task, parent_task);
1053         } else {
1054                 new_task->sec_token = KERNEL_SECURITY_TOKEN;
1055                 new_task->audit_token = KERNEL_AUDIT_TOKEN;
1056 #ifdef __LP64__
1057                 if(is_64bit)
1058                         task_set_64BitAddr(new_task);
1059 #endif
1060                 new_task->all_image_info_addr = (mach_vm_address_t)0;
1061                 new_task->all_image_info_size = (mach_vm_size_t)0;
1062
1063                 new_task->pset_hint = PROCESSOR_SET_NULL;
1064
1065                 if (kernel_task == TASK_NULL) {
1066                         new_task->priority = BASEPRI_KERNEL;
1067                         new_task->max_priority = MAXPRI_KERNEL;
1068                 } else {
1069                         new_task->priority = BASEPRI_DEFAULT;
1070                         new_task->max_priority = MAXPRI_USER;
1071                 }
1072         }
1073
1074         bzero(new_task->coalition, sizeof(new_task->coalition));
1075         for (int i = 0; i < COALITION_NUM_TYPES; i++)
1076                 queue_chain_init(new_task->task_coalition[i]);
1077
1078         /* Allocate I/O Statistics */
1079         new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1080         assert(new_task->task_io_stats != NULL);
1081         bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1082
1083         bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats));
1084
1085         bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1086
1087         /* Copy resource acc. info from Parent for Corpe Forked task. */
1088         if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1089                 new_task->total_user_time = parent_task->total_user_time;
1090                 new_task->total_system_time = parent_task->total_system_time;
1091                 ledger_rollup(new_task->ledger, parent_task->ledger);
1092                 new_task->faults = parent_task->faults;
1093                 new_task->pageins = parent_task->pageins;
1094                 new_task->cow_faults = parent_task->cow_faults;
1095                 new_task->messages_sent = parent_task->messages_sent;
1096                 new_task->messages_received = parent_task->messages_received;
1097                 new_task->syscalls_mach = parent_task->syscalls_mach;
1098                 new_task->syscalls_unix = parent_task->syscalls_unix;
1099                 new_task->c_switch = parent_task->c_switch;
1100                 new_task->p_switch = parent_task->p_switch;
1101                 new_task->ps_switch = parent_task->ps_switch;
1102                 new_task->extmod_statistics = parent_task->extmod_statistics;
1103                 new_task->low_mem_notified_warn = parent_task->low_mem_notified_warn;
1104                 new_task->low_mem_notified_critical = parent_task->low_mem_notified_critical;
1105                 new_task->purged_memory_warn = parent_task->purged_memory_warn;
1106                 new_task->purged_memory_critical = parent_task->purged_memory_critical;
1107                 new_task->low_mem_privileged_listener = parent_task->low_mem_privileged_listener;
1108                 *new_task->task_io_stats = *parent_task->task_io_stats;
1109                 new_task->cpu_time_qos_stats = parent_task->cpu_time_qos_stats;
1110                 new_task->task_timer_wakeups_bin_1 = parent_task->task_timer_wakeups_bin_1;
1111                 new_task->task_timer_wakeups_bin_2 = parent_task->task_timer_wakeups_bin_2;
1112                 new_task->task_gpu_ns = parent_task->task_gpu_ns;
1113                 new_task->task_immediate_writes = parent_task->task_immediate_writes;
1114                 new_task->task_deferred_writes = parent_task->task_deferred_writes;
1115                 new_task->task_invalidated_writes = parent_task->task_invalidated_writes;
1116                 new_task->task_metadata_writes = parent_task->task_metadata_writes;
1117                 new_task->task_energy = parent_task->task_energy;
1118         } else {
1119                 /* Initialize to zero for standard fork/spawn case */
1120                 new_task->total_user_time = 0;
1121                 new_task->total_system_time = 0;
1122                 new_task->faults = 0;
1123                 new_task->pageins = 0;
1124                 new_task->cow_faults = 0;
1125                 new_task->messages_sent = 0;
1126                 new_task->messages_received = 0;
1127                 new_task->syscalls_mach = 0;
1128                 new_task->syscalls_unix = 0;
1129                 new_task->c_switch = 0;
1130                 new_task->p_switch = 0;
1131                 new_task->ps_switch = 0;
1132                 new_task->low_mem_notified_warn = 0;
1133                 new_task->low_mem_notified_critical = 0;
1134                 new_task->purged_memory_warn = 0;
1135                 new_task->purged_memory_critical = 0;
1136                 new_task->low_mem_privileged_listener = 0;
1137                 new_task->task_timer_wakeups_bin_1 = 0;
1138                 new_task->task_timer_wakeups_bin_2 = 0;
1139                 new_task->task_gpu_ns = 0;
1140                 new_task->task_immediate_writes = 0;
1141                 new_task->task_deferred_writes = 0;
1142                 new_task->task_invalidated_writes = 0;
1143                 new_task->task_metadata_writes = 0;
1144                 new_task->task_energy = 0;
1145         }
1146
1147
1148 #if CONFIG_COALITIONS
1149         if (!(t_flags & TF_CORPSE_FORK)) {
1150                 /* TODO: there is no graceful failure path here... */
1151                 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1152                         coalitions_adopt_task(parent_coalitions, new_task);
1153                 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1154                         /*
1155                          * all tasks at least have a resource coalition, so
1156                          * if the parent has one then inherit all coalitions
1157                          * the parent is a part of
1158                          */
1159                         coalitions_adopt_task(parent_task->coalition, new_task);
1160                 } else {
1161                         /* TODO: assert that new_task will be PID 1 (launchd) */
1162                         coalitions_adopt_init_task(new_task);
1163                 }
1164         } else {
1165                 coalitions_adopt_corpse_task(new_task);
1166         }
1167
1168         if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1169                 panic("created task is not a member of a resource coalition");
1170         }
1171 #endif /* CONFIG_COALITIONS */
1172
1173         new_task->dispatchqueue_offset = 0;
1174         if (parent_task != NULL) {
1175                 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1176         }
1177
1178         if (vm_backing_store_low && parent_task != NULL)
1179                 new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
1180
1181         new_task->task_volatile_objects = 0;
1182         new_task->task_nonvolatile_objects = 0;
1183         new_task->task_purgeable_disowning = FALSE;
1184         new_task->task_purgeable_disowned = FALSE;
1185
1186 #if CONFIG_SECLUDED_MEMORY
1187         new_task->task_can_use_secluded_mem = FALSE;
1188         new_task->task_could_use_secluded_mem = FALSE;
1189         new_task->task_could_also_use_secluded_mem = FALSE;
1190 #endif /* CONFIG_SECLUDED_MEMORY */
1191
1192         queue_init(&new_task->io_user_clients);
1193
1194         ipc_task_enable(new_task);
1195
1196         lck_mtx_lock(&tasks_threads_lock);
1197         queue_enter(&tasks, new_task, task_t, tasks);
1198         tasks_count++;
1199         if (tasks_suspend_state) {
1200             task_suspend_internal(new_task);
1201         }
1202         lck_mtx_unlock(&tasks_threads_lock);
1203
1204         *child_task = new_task;
1205         return(KERN_SUCCESS);
1206 }
1207
1208 int task_dropped_imp_count = 0;
1209
1210 /*
1211  *      task_deallocate:
1212  *
1213  *      Drop a reference on a task.
1214  */
1215 void
1216 task_deallocate(
1217         task_t          task)
1218 {
1219         ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1220         uint32_t refs;
1221
1222         if (task == TASK_NULL)
1223             return;
1224
1225         refs = task_deallocate_internal(task);
1226
1227 #if IMPORTANCE_INHERITANCE
1228         if (refs > 1)
1229                 return;
1230
1231         if (refs == 1) {
1232                 /*
1233                  * If last ref potentially comes from the task's importance,
1234                  * disconnect it.  But more task refs may be added before
1235                  * that completes, so wait for the reference to go to zero
1236                  * naturually (it may happen on a recursive task_deallocate()
1237                  * from the ipc_importance_disconnect_task() call).
1238                  */
1239                 if (IIT_NULL != task->task_imp_base)
1240                         ipc_importance_disconnect_task(task);
1241                 return;
1242         }
1243 #else
1244         if (refs > 0)
1245                 return;
1246 #endif /* IMPORTANCE_INHERITANCE */
1247
1248         lck_mtx_lock(&tasks_threads_lock);
1249         queue_remove(&terminated_tasks, task, task_t, tasks);
1250         terminated_tasks_count--;
1251         lck_mtx_unlock(&tasks_threads_lock);
1252
1253         /*
1254          * remove the reference on atm descriptor
1255          */
1256         task_atm_reset(task);
1257
1258         /*
1259          * remove the reference on bank context
1260          */
1261         task_bank_reset(task);
1262
1263         if (task->task_io_stats)
1264                 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1265
1266         /*
1267          *      Give the machine dependent code a chance
1268          *      to perform cleanup before ripping apart
1269          *      the task.
1270          */
1271         machine_task_terminate(task);
1272
1273         ipc_task_terminate(task);
1274
1275         /* let iokit know */
1276         iokit_task_terminate(task);
1277
1278         if (task->affinity_space)
1279                 task_affinity_deallocate(task);
1280
1281 #if MACH_ASSERT
1282         if (task->ledger != NULL &&
1283             task->map != NULL &&
1284             task->map->pmap != NULL &&
1285             task->map->pmap->ledger != NULL) {
1286                 assert(task->ledger == task->map->pmap->ledger);
1287         }
1288 #endif /* MACH_ASSERT */
1289
1290         vm_purgeable_disown(task);
1291         assert(task->task_purgeable_disowned);
1292         if (task->task_volatile_objects != 0 ||
1293             task->task_nonvolatile_objects != 0) {
1294                 panic("task_deallocate(%p): "
1295                       "volatile_objects=%d nonvolatile_objects=%d\n",
1296                       task,
1297                       task->task_volatile_objects,
1298                       task->task_nonvolatile_objects);
1299         }
1300
1301         vm_map_deallocate(task->map);
1302         is_release(task->itk_space);
1303
1304         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1305                            &interrupt_wakeups, &debit);
1306         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1307                            &platform_idle_wakeups, &debit);
1308
1309 #if defined(CONFIG_SCHED_MULTIQ)
1310         sched_group_destroy(task->sched_group);
1311 #endif
1312
1313         /* Accumulate statistics for dead tasks */
1314         lck_spin_lock(&dead_task_statistics_lock);
1315         dead_task_statistics.total_user_time += task->total_user_time;
1316         dead_task_statistics.total_system_time += task->total_system_time;
1317
1318         dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1319         dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1320
1321         dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1322         dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1323
1324         lck_spin_unlock(&dead_task_statistics_lock);
1325         lck_mtx_destroy(&task->lock, &task_lck_grp);
1326
1327         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1328             &debit)) {
1329                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1330                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1331         }
1332         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1333             &debit)) {
1334                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1335                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1336         }
1337         ledger_dereference(task->ledger);
1338
1339 #if TASK_REFERENCE_LEAK_DEBUG
1340         btlog_remove_entries_for_element(task_ref_btlog, task);
1341 #endif
1342
1343 #if CONFIG_COALITIONS
1344         task_release_coalitions(task);
1345 #endif /* CONFIG_COALITIONS */
1346
1347         bzero(task->coalition, sizeof(task->coalition));
1348
1349 #if MACH_BSD
1350         /* clean up collected information since last reference to task is gone */
1351         if (task->corpse_info) {
1352                 task_crashinfo_destroy(task->corpse_info, RELEASE_CORPSE_REF);
1353                 task->corpse_info = NULL;
1354         }
1355 #endif
1356         if (task->corpse_info_kernel) {
1357                 kfree(task->corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
1358         }
1359
1360 #if CONFIG_MACF
1361         if (task->crash_label) {
1362                 mac_exc_action_label_task_destroy(task);
1363         }
1364 #endif
1365
1366         zfree(task_zone, task);
1367 }
1368
1369 /*
1370  *      task_name_deallocate:
1371  *
1372  *      Drop a reference on a task name.
1373  */
1374 void
1375 task_name_deallocate(
1376         task_name_t             task_name)
1377 {
1378         return(task_deallocate((task_t)task_name));
1379 }
1380
1381 /*
1382  *      task_suspension_token_deallocate:
1383  *
1384  *      Drop a reference on a task suspension token.
1385  */
1386 void
1387 task_suspension_token_deallocate(
1388         task_suspension_token_t         token)
1389 {
1390         return(task_deallocate((task_t)token));
1391 }
1392
1393
1394 /*
1395  * task_collect_crash_info:
1396  *
1397  * collect crash info from bsd and mach based data
1398  */
1399 kern_return_t
1400 task_collect_crash_info(task_t task, struct proc *proc, int is_corpse_fork)
1401 {
1402         kern_return_t kr = KERN_SUCCESS;
1403
1404         kcdata_descriptor_t crash_data = NULL;
1405         kcdata_descriptor_t crash_data_release = NULL;
1406         mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1407         mach_vm_offset_t crash_data_ptr = 0;
1408         void *crash_data_kernel = NULL;
1409         void *crash_data_kernel_release = NULL;
1410         int corpse_blob_kernel_alloc = (is_corpse_fork || unify_corpse_blob_alloc);
1411
1412         if (!corpses_enabled()) {
1413                 return KERN_NOT_SUPPORTED;
1414         }
1415
1416         task_lock(task);
1417
1418         assert(is_corpse_fork || task->bsd_info != NULL);
1419         if (task->corpse_info == NULL && (is_corpse_fork || task->bsd_info != NULL)) {
1420 #if CONFIG_MACF
1421                 /* Update the corpse label, used by the exception delivery mac hook */
1422                 mac_exc_action_label_task_update(task, proc);
1423 #endif
1424                 task_unlock(task);
1425
1426                 if (!corpse_blob_kernel_alloc) {
1427                         /* map crash data memory in task's vm map */
1428                         kr = mach_vm_allocate(task->map, &crash_data_ptr, size, (VM_MAKE_TAG(VM_MEMORY_CORPSEINFO) | VM_FLAGS_ANYWHERE));
1429                 } else {
1430                         crash_data_kernel = (void *) kalloc(CORPSEINFO_ALLOCATION_SIZE);
1431                         if (crash_data_kernel == 0)
1432                                 kr = KERN_RESOURCE_SHORTAGE;
1433                         bzero(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1434                         crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
1435                 }
1436                 if (kr != KERN_SUCCESS)
1437                         goto out_no_lock;
1438
1439                 /* Do not get a corpse ref for corpse fork */
1440                 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size, is_corpse_fork ? !GET_CORPSE_REF : GET_CORPSE_REF, corpse_blob_kernel_alloc ? KCFLAG_USE_MEMCOPY: KCFLAG_USE_COPYOUT);
1441                 if (crash_data) {
1442                         task_lock(task);
1443                         crash_data_release = task->corpse_info;
1444                         crash_data_kernel_release = task->corpse_info_kernel;
1445                         task->corpse_info = crash_data;
1446                         task->corpse_info_kernel = crash_data_kernel;
1447
1448                         task_unlock(task);
1449                         kr = KERN_SUCCESS;
1450                 } else {
1451                         /* if failed to create corpse info, free the mapping */
1452                         if (!corpse_blob_kernel_alloc) {
1453                                 if (KERN_SUCCESS != mach_vm_deallocate(task->map, crash_data_ptr, size)) {
1454                                         printf("mach_vm_deallocate failed to clear corpse_data for pid %d.\n", task_pid(task));
1455                                 }
1456                         } else {
1457                                 kfree(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1458                         }
1459                         kr = KERN_FAILURE;
1460                 }
1461
1462                 if (crash_data_release != NULL) {
1463                         task_crashinfo_destroy(crash_data_release, is_corpse_fork ? !RELEASE_CORPSE_REF : RELEASE_CORPSE_REF);
1464                 }
1465                 if (crash_data_kernel_release != NULL) {
1466                         kfree(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
1467                 }
1468         } else {
1469                 task_unlock(task);
1470         }
1471
1472 out_no_lock:
1473         return kr;
1474 }
1475
1476 /*
1477  * task_deliver_crash_notification:
1478  *
1479  * Makes outcall to registered host port for a corpse.
1480  */
1481 kern_return_t
1482 task_deliver_crash_notification(task_t task, thread_t thread, mach_exception_data_type_t subcode)
1483 {
1484         kcdata_descriptor_t crash_info = task->corpse_info;
1485         thread_t th_iter = NULL;
1486         kern_return_t kr = KERN_SUCCESS;
1487         wait_interrupt_t wsave;
1488         mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1489         ipc_port_t task_port, old_notify;
1490
1491         if (crash_info == NULL)
1492                 return KERN_FAILURE;
1493
1494         task_lock(task);
1495         if (task_is_a_corpse_fork(task)) {
1496                 /* Populate code with EXC_RESOURCE for corpse fork */
1497                 code[0] = EXC_RESOURCE;
1498                 code[1] = subcode;
1499         } else if (unify_corpse_blob_alloc) {
1500                 /* Populate code with EXC_CRASH for corpses */
1501                 code[0] = EXC_CRASH;
1502                 code[1] = 0;
1503                 /* Update the code[1] if the boot-arg corpse_for_fatal_memkill is set */
1504                 if (corpse_for_fatal_memkill) {
1505                         code[1] = subcode;
1506                 }
1507         } else {
1508                 /* Populate code with address and length for EXC_CRASH */
1509                 code[0] = crash_info->kcd_addr_begin;
1510                 code[1] = crash_info->kcd_length;
1511         }
1512         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1513         {
1514                 if (th_iter->corpse_dup == FALSE) {
1515                         ipc_thread_reset(th_iter);
1516                 }
1517         }
1518         task_unlock(task);
1519
1520         /* Arm the no-sender notification for taskport */
1521         task_reference(task);
1522         task_port = convert_task_to_port(task);
1523         ip_lock(task_port);
1524         assert(ip_active(task_port));
1525         ipc_port_nsrequest(task_port, task_port->ip_mscount, ipc_port_make_sonce_locked(task_port), &old_notify);
1526         /* port unlocked */
1527         assert(IP_NULL == old_notify);
1528
1529         wsave = thread_interrupt_level(THREAD_UNINT);
1530         kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
1531         if (kr != KERN_SUCCESS) {
1532                 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
1533         }
1534
1535         (void)thread_interrupt_level(wsave);
1536
1537         /*
1538          * Drop the send right on task port, will fire the
1539          * no-sender notification if exception deliver failed.
1540          */
1541         ipc_port_release_send(task_port);
1542         return kr;
1543 }
1544
1545 /*
1546  *      task_terminate:
1547  *
1548  *      Terminate the specified task.  See comments on thread_terminate
1549  *      (kern/thread.c) about problems with terminating the "current task."
1550  */
1551
1552 kern_return_t
1553 task_terminate(
1554         task_t          task)
1555 {
1556         if (task == TASK_NULL)
1557                 return (KERN_INVALID_ARGUMENT);
1558
1559         if (task->bsd_info)
1560                 return (KERN_FAILURE);
1561
1562         return (task_terminate_internal(task));
1563 }
1564
1565 #if MACH_ASSERT
1566 extern int proc_pid(struct proc *);
1567 extern void proc_name_kdp(task_t t, char *buf, int size);
1568 #endif /* MACH_ASSERT */
1569
1570 #define VM_MAP_PARTIAL_REAP 0x54  /* 0x150 */
1571 static void
1572 __unused task_partial_reap(task_t task, __unused int pid)
1573 {
1574         unsigned int    reclaimed_resident = 0;
1575         unsigned int    reclaimed_compressed = 0;
1576         uint64_t        task_page_count;
1577
1578         task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1579
1580         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
1581                               pid, task_page_count, 0, 0, 0);
1582
1583         vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1584
1585         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
1586                               pid, reclaimed_resident, reclaimed_compressed, 0, 0);
1587 }
1588
1589 kern_return_t
1590 task_mark_corpse(task_t task)
1591 {
1592         kern_return_t kr = KERN_SUCCESS;
1593         thread_t self_thread;
1594         (void) self_thread;
1595         wait_interrupt_t wsave;
1596
1597         assert(task != kernel_task);
1598         assert(task == current_task());
1599         assert(!task_is_a_corpse(task));
1600
1601         kr = task_collect_crash_info(task, (struct proc*)task->bsd_info, FALSE);
1602         if (kr != KERN_SUCCESS) {
1603                 return kr;
1604         }
1605
1606         self_thread = current_thread();
1607
1608         wsave = thread_interrupt_level(THREAD_UNINT);
1609         task_lock(task);
1610
1611         task_set_corpse_pending_report(task);
1612         task_set_corpse(task);
1613
1614         kr = task_start_halt_locked(task, TRUE);
1615         assert(kr == KERN_SUCCESS);
1616
1617         ipc_task_reset(task);
1618         /* Remove the naked send right for task port, needed to arm no sender notification */
1619         task_set_special_port(task, TASK_KERNEL_PORT, IPC_PORT_NULL);
1620         ipc_task_enable(task);
1621
1622         task_unlock(task);
1623         /* terminate the ipc space */
1624         ipc_space_terminate(task->itk_space);
1625
1626         /* Add it to global corpse task list */
1627         task_add_to_corpse_task_list(task);
1628
1629         task_start_halt(task);
1630         thread_terminate_internal(self_thread);
1631
1632         (void) thread_interrupt_level(wsave);
1633         assert(task->halting == TRUE);
1634         return kr;
1635 }
1636
1637 /*
1638  *      task_clear_corpse
1639  *
1640  *      Clears the corpse pending bit on task.
1641  *      Removes inspection bit on the threads.
1642  */
1643 void
1644 task_clear_corpse(task_t task)
1645 {
1646         thread_t th_iter = NULL;
1647
1648         task_lock(task);
1649         queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1650         {
1651                 thread_mtx_lock(th_iter);
1652                 th_iter->inspection = FALSE;
1653                 thread_mtx_unlock(th_iter);
1654         }
1655
1656         thread_terminate_crashed_threads();
1657         /* remove the pending corpse report flag */
1658         task_clear_corpse_pending_report(task);
1659
1660         task_unlock(task);
1661 }
1662
1663 /*
1664  *      task_port_notify
1665  *
1666  *      Called whenever the Mach port system detects no-senders on
1667  *      the task port of a corpse.
1668  *      Each notification that comes in should terminate the task (corpse).
1669  */
1670 void
1671 task_port_notify(mach_msg_header_t *msg)
1672 {
1673         mach_no_senders_notification_t *notification = (void *)msg;
1674         ipc_port_t port = notification->not_header.msgh_remote_port;
1675         task_t task;
1676
1677         assert(ip_active(port));
1678         assert(IKOT_TASK == ip_kotype(port));
1679         task = (task_t) port->ip_kobject;
1680
1681         assert(task_is_a_corpse(task));
1682
1683         /* Remove the task from global corpse task list */
1684         task_remove_from_corpse_task_list(task);
1685
1686         task_clear_corpse(task);
1687         task_terminate_internal(task);
1688 }
1689
1690 /*
1691  *      task_wait_till_threads_terminate_locked
1692  *
1693  *      Wait till all the threads in the task are terminated.
1694  *      Might release the task lock and re-acquire it.
1695  */
1696 void
1697 task_wait_till_threads_terminate_locked(task_t task)
1698 {
1699         /* wait for all the threads in the task to terminate */
1700         while (task->active_thread_count != 0) {
1701                 assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
1702                 task_unlock(task);
1703                 thread_block(THREAD_CONTINUE_NULL);
1704
1705                 task_lock(task);
1706         }
1707 }
1708
1709 /*
1710  *      task_duplicate_map_and_threads
1711  *
1712  *      Copy vmmap of source task.
1713  *      Copy active threads from source task to destination task.
1714  *      Source task would be suspended during the copy.
1715  */
1716 kern_return_t
1717 task_duplicate_map_and_threads(
1718                 task_t task,
1719                 void *p,
1720                 task_t new_task,
1721                 thread_t *thread_ret,
1722                 int is64bit,
1723                 uint64_t **udata_buffer,
1724                 int *size,
1725                 int *num_udata)
1726 {
1727         kern_return_t kr = KERN_SUCCESS;
1728         int active;
1729         thread_t thread, self, thread_return = THREAD_NULL;
1730         thread_t new_thread = THREAD_NULL;
1731         thread_t *thread_array;
1732         uint32_t active_thread_count = 0, array_count = 0, i;
1733         vm_map_t oldmap;
1734         uint64_t *buffer = NULL;
1735         int buf_size = 0;
1736         int est_knotes = 0, num_knotes = 0;
1737
1738         self = current_thread();
1739
1740         /*
1741          * Suspend the task to copy thread state, use the internal
1742          * variant so that no user-space process can resume
1743          * the task from under us
1744          */
1745         kr = task_suspend_internal(task);
1746         if (kr != KERN_SUCCESS) {
1747                 return kr;
1748         }
1749
1750         if (task->map->disable_vmentry_reuse == TRUE) {
1751                 /*
1752                  * Quite likely GuardMalloc (or some debugging tool)
1753                  * is being used on this task. And it has gone through
1754                  * its limit. Making a corpse will likely encounter
1755                  * a lot of VM entries that will need COW.
1756                  *
1757                  * Skip it.
1758                  */
1759                 task_resume_internal(task);
1760                 return KERN_FAILURE;
1761         }
1762
1763         /* Setup new task's vmmap, switch from parent task's map to it COW map */
1764         oldmap = new_task->map;
1765         new_task->map = vm_map_fork(new_task->ledger,
1766                                     task->map,
1767                                     (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
1768                                      VM_MAP_FORK_PRESERVE_PURGEABLE));
1769         vm_map_deallocate(oldmap);
1770
1771         if (is64bit) {
1772                 vm_map_set_64bit(get_task_map(new_task));
1773         } else {
1774                 vm_map_set_32bit(get_task_map(new_task));
1775         }
1776
1777         /* Get all the udata pointers from kqueue */
1778         est_knotes = proc_list_uptrs(p, NULL, 0);
1779         if (est_knotes > 0) {
1780                 buf_size = (est_knotes + 32) * sizeof(uint64_t);
1781                 buffer = (uint64_t *) kalloc(buf_size);
1782                 num_knotes = proc_list_uptrs(p, buffer, buf_size);
1783                 if (num_knotes > est_knotes + 32) {
1784                         num_knotes = est_knotes + 32;
1785                 }
1786         }
1787
1788         active_thread_count = task->active_thread_count;
1789         if (active_thread_count == 0) {
1790                 if (buffer != NULL) {
1791                         kfree(buffer, buf_size);
1792                 }
1793                 task_resume_internal(task);
1794                 return KERN_FAILURE;
1795         }
1796
1797         thread_array = (thread_t *) kalloc(sizeof(thread_t) * active_thread_count);
1798
1799         /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
1800         task_lock(task);
1801         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1802                 /* Skip inactive threads */
1803                 active = thread->active;
1804                 if (!active) {
1805                         continue;
1806                 }
1807
1808                 if (array_count >= active_thread_count) {
1809                         break;
1810                 }
1811
1812                 thread_array[array_count++] = thread;
1813                 thread_reference(thread);
1814         }
1815         task_unlock(task);
1816
1817         for (i = 0; i < array_count; i++) {
1818
1819                 kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
1820                 if (kr != KERN_SUCCESS) {
1821                         break;
1822                 }
1823
1824                 /* Equivalent of current thread in corpse */
1825                 if (thread_array[i] == self) {
1826                         thread_return = new_thread;
1827                 } else {
1828                         /* drop the extra ref returned by thread_create_with_continuation */
1829                         thread_deallocate(new_thread);
1830                 }
1831
1832                 kr = thread_dup2(thread_array[i], new_thread);
1833                 if (kr != KERN_SUCCESS) {
1834                         thread_mtx_lock(new_thread);
1835                         new_thread->corpse_dup = TRUE;
1836                         thread_mtx_unlock(new_thread);
1837                         continue;
1838                 }
1839
1840                 /* Copy thread name */
1841                 bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread);
1842                 thread_copy_resource_info(new_thread, thread_array[i]);
1843         }
1844
1845         task_resume_internal(task);
1846
1847         for (i = 0; i < array_count; i++) {
1848                 thread_deallocate(thread_array[i]);
1849         }
1850         kfree(thread_array, sizeof(thread_t) * active_thread_count);
1851
1852         if (kr == KERN_SUCCESS) {
1853                 *thread_ret = thread_return;
1854                 *udata_buffer = buffer;
1855                 *size = buf_size;
1856                 *num_udata = num_knotes;
1857         } else {
1858                 if (thread_return != THREAD_NULL) {
1859                         thread_deallocate(thread_return);
1860                 }
1861                 if (buffer != NULL) {
1862                         kfree(buffer, buf_size);
1863                 }
1864         }
1865
1866         return kr;
1867 }
1868
1869 #if CONFIG_SECLUDED_MEMORY
1870 extern void task_set_can_use_secluded_mem_locked(
1871         task_t          task,
1872         boolean_t       can_use_secluded_mem);
1873 #endif /* CONFIG_SECLUDED_MEMORY */
1874
1875 kern_return_t
1876 task_terminate_internal(
1877         task_t                  task)
1878 {
1879         thread_t                        thread, self;
1880         task_t                          self_task;
1881         boolean_t                       interrupt_save;
1882         int                             pid = 0;
1883
1884         assert(task != kernel_task);
1885
1886         self = current_thread();
1887         self_task = self->task;
1888
1889         /*
1890          *      Get the task locked and make sure that we are not racing
1891          *      with someone else trying to terminate us.
1892          */
1893         if (task == self_task)
1894                 task_lock(task);
1895         else
1896         if (task < self_task) {
1897                 task_lock(task);
1898                 task_lock(self_task);
1899         }
1900         else {
1901                 task_lock(self_task);
1902                 task_lock(task);
1903         }
1904
1905 #if CONFIG_SECLUDED_MEMORY
1906         if (task->task_can_use_secluded_mem) {
1907                 task_set_can_use_secluded_mem_locked(task, FALSE);
1908         }
1909         task->task_could_use_secluded_mem = FALSE;
1910         task->task_could_also_use_secluded_mem = FALSE;
1911 #endif /* CONFIG_SECLUDED_MEMORY */
1912
1913         if (!task->active) {
1914                 /*
1915                  *      Task is already being terminated.
1916                  *      Just return an error. If we are dying, this will
1917                  *      just get us to our AST special handler and that
1918                  *      will get us to finalize the termination of ourselves.
1919                  */
1920                 task_unlock(task);
1921                 if (self_task != task)
1922                         task_unlock(self_task);
1923
1924                 return (KERN_FAILURE);
1925         }
1926
1927         if (task_corpse_pending_report(task)) {
1928                 /*
1929                  *      Task is marked for reporting as corpse.
1930                  *      Just return an error. This will
1931                  *      just get us to our AST special handler and that
1932                  *      will get us to finish the path to death
1933                  */
1934                 task_unlock(task);
1935                 if (self_task != task)
1936                         task_unlock(self_task);
1937
1938                 return (KERN_FAILURE);
1939         }
1940
1941         if (self_task != task)
1942                 task_unlock(self_task);
1943
1944         /*
1945          * Make sure the current thread does not get aborted out of
1946          * the waits inside these operations.
1947          */
1948         interrupt_save = thread_interrupt_level(THREAD_UNINT);
1949
1950         /*
1951          *      Indicate that we want all the threads to stop executing
1952          *      at user space by holding the task (we would have held
1953          *      each thread independently in thread_terminate_internal -
1954          *      but this way we may be more likely to already find it
1955          *      held there).  Mark the task inactive, and prevent
1956          *      further task operations via the task port.
1957          */
1958         task_hold_locked(task);
1959         task->active = FALSE;
1960         ipc_task_disable(task);
1961
1962 #if CONFIG_TELEMETRY
1963         /*
1964          * Notify telemetry that this task is going away.
1965          */
1966         telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
1967 #endif
1968
1969         /*
1970          *      Terminate each thread in the task.
1971          */
1972         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1973                         thread_terminate_internal(thread);
1974         }
1975
1976 #ifdef MACH_BSD
1977         if (task->bsd_info != NULL) {
1978                 pid = proc_pid(task->bsd_info);
1979         }
1980 #endif /* MACH_BSD */
1981
1982         task_unlock(task);
1983
1984         proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
1985                              TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
1986
1987         /* Early object reap phase */
1988
1989 // PR-17045188: Revisit implementation
1990 //        task_partial_reap(task, pid);
1991
1992
1993         /*
1994          *      Destroy all synchronizers owned by the task.
1995          */
1996         task_synchronizer_destroy_all(task);
1997
1998         /*
1999          *      Destroy the IPC space, leaving just a reference for it.
2000          */
2001         ipc_space_terminate(task->itk_space);
2002
2003 #if 00
2004         /* if some ledgers go negative on tear-down again... */
2005         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2006                                          task_ledgers.phys_footprint);
2007         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2008                                          task_ledgers.internal);
2009         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2010                                          task_ledgers.internal_compressed);
2011         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2012                                          task_ledgers.iokit_mapped);
2013         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2014                                          task_ledgers.alternate_accounting);
2015         ledger_disable_panic_on_negative(task->map->pmap->ledger,
2016                                          task_ledgers.alternate_accounting_compressed);
2017 #endif
2018
2019         /*
2020          * If the current thread is a member of the task
2021          * being terminated, then the last reference to
2022          * the task will not be dropped until the thread
2023          * is finally reaped.  To avoid incurring the
2024          * expense of removing the address space regions
2025          * at reap time, we do it explictly here.
2026          */
2027
2028         vm_map_lock(task->map);
2029         vm_map_disable_hole_optimization(task->map);
2030         vm_map_unlock(task->map);
2031
2032         vm_map_remove(task->map,
2033                       task->map->min_offset,
2034                       task->map->max_offset,
2035                       /* no unnesting on final cleanup: */
2036                       VM_MAP_REMOVE_NO_UNNESTING);
2037
2038         /* release our shared region */
2039         vm_shared_region_set(task, NULL);
2040
2041
2042 #if MACH_ASSERT
2043         /*
2044          * Identify the pmap's process, in case the pmap ledgers drift
2045          * and we have to report it.
2046          */
2047         char procname[17];
2048         if (task->bsd_info) {
2049                 pid = proc_pid(task->bsd_info);
2050                 proc_name_kdp(task, procname, sizeof (procname));
2051         } else {
2052                 pid = 0;
2053                 strlcpy(procname, "<unknown>", sizeof (procname));
2054         }
2055         pmap_set_process(task->map->pmap, pid, procname);
2056 #endif /* MACH_ASSERT */
2057
2058         lck_mtx_lock(&tasks_threads_lock);
2059         queue_remove(&tasks, task, task_t, tasks);
2060         queue_enter(&terminated_tasks, task, task_t, tasks);
2061         tasks_count--;
2062         terminated_tasks_count++;
2063         lck_mtx_unlock(&tasks_threads_lock);
2064
2065         /*
2066          * We no longer need to guard against being aborted, so restore
2067          * the previous interruptible state.
2068          */
2069         thread_interrupt_level(interrupt_save);
2070
2071 #if KPERF
2072         /* force the task to release all ctrs */
2073         if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS)
2074                 kpc_force_all_ctrs(task, 0);
2075 #endif
2076
2077 #if CONFIG_COALITIONS
2078         /*
2079          * Leave our coalitions. (drop activation but not reference)
2080          */
2081         coalitions_remove_task(task);
2082 #endif
2083
2084         /*
2085          * Get rid of the task active reference on itself.
2086          */
2087         task_deallocate(task);
2088
2089         return (KERN_SUCCESS);
2090 }
2091
2092 void
2093 tasks_system_suspend(boolean_t suspend)
2094 {
2095         task_t task;
2096
2097         lck_mtx_lock(&tasks_threads_lock);
2098         assert(tasks_suspend_state != suspend);
2099         tasks_suspend_state = suspend;
2100         queue_iterate(&tasks, task, task_t, tasks) {
2101                 if (task == kernel_task) {
2102                         continue;
2103                 }
2104                 suspend ? task_suspend_internal(task) : task_resume_internal(task);
2105         }
2106         lck_mtx_unlock(&tasks_threads_lock);
2107 }
2108
2109 /*
2110  * task_start_halt:
2111  *
2112  *      Shut the current task down (except for the current thread) in
2113  *      preparation for dramatic changes to the task (probably exec).
2114  *      We hold the task and mark all other threads in the task for
2115  *      termination.
2116  */
2117 kern_return_t
2118 task_start_halt(task_t task)
2119 {
2120         kern_return_t kr = KERN_SUCCESS;
2121         task_lock(task);
2122         kr = task_start_halt_locked(task, FALSE);
2123         task_unlock(task);
2124         return kr;
2125 }
2126
2127 static kern_return_t
2128 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
2129 {
2130         thread_t thread, self;
2131         uint64_t dispatchqueue_offset;
2132
2133         assert(task != kernel_task);
2134
2135         self = current_thread();
2136
2137         if (task != self->task && !task_is_a_corpse_fork(task))
2138                 return (KERN_INVALID_ARGUMENT);
2139
2140         if (task->halting || !task->active || !self->active) {
2141                 /*
2142                  * Task or current thread is already being terminated.
2143                  * Hurry up and return out of the current kernel context
2144                  * so that we run our AST special handler to terminate
2145                  * ourselves.
2146                  */
2147                 return (KERN_FAILURE);
2148         }
2149
2150         task->halting = TRUE;
2151
2152         /*
2153          * Mark all the threads to keep them from starting any more
2154          * user-level execution.  The thread_terminate_internal code
2155          * would do this on a thread by thread basis anyway, but this
2156          * gives us a better chance of not having to wait there.
2157          */
2158         task_hold_locked(task);
2159         dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
2160
2161         /*
2162          * Terminate all the other threads in the task.
2163          */
2164         queue_iterate(&task->threads, thread, thread_t, task_threads)
2165         {
2166                 if (should_mark_corpse) {
2167                         thread_mtx_lock(thread);
2168                         thread->inspection = TRUE;
2169                         thread_mtx_unlock(thread);
2170                 }
2171                 if (thread != self)
2172                         thread_terminate_internal(thread);
2173         }
2174         task->dispatchqueue_offset = dispatchqueue_offset;
2175
2176         task_release_locked(task);
2177
2178         return KERN_SUCCESS;
2179 }
2180
2181
2182 /*
2183  * task_complete_halt:
2184  *
2185  *      Complete task halt by waiting for threads to terminate, then clean
2186  *      up task resources (VM, port namespace, etc...) and then let the
2187  *      current thread go in the (practically empty) task context.
2188  */
2189 void
2190 task_complete_halt(task_t task)
2191 {
2192         task_lock(task);
2193         assert(task->halting);
2194         assert(task == current_task());
2195
2196         /*
2197          *      Wait for the other threads to get shut down.
2198          *      When the last other thread is reaped, we'll be
2199          *      woken up.
2200          */
2201         if (task->thread_count > 1) {
2202                 assert_wait((event_t)&task->halting, THREAD_UNINT);
2203                 task_unlock(task);
2204                 thread_block(THREAD_CONTINUE_NULL);
2205         } else {
2206                 task_unlock(task);
2207         }
2208
2209         /*
2210          *      Give the machine dependent code a chance
2211          *      to perform cleanup of task-level resources
2212          *      associated with the current thread before
2213          *      ripping apart the task.
2214          */
2215         machine_task_terminate(task);
2216
2217         /*
2218          *      Destroy all synchronizers owned by the task.
2219          */
2220         task_synchronizer_destroy_all(task);
2221
2222         /*
2223          *      Destroy the contents of the IPC space, leaving just
2224          *      a reference for it.
2225          */
2226         ipc_space_clean(task->itk_space);
2227
2228         /*
2229          * Clean out the address space, as we are going to be
2230          * getting a new one.
2231          */
2232         vm_map_remove(task->map, task->map->min_offset,
2233                       task->map->max_offset,
2234                       /* no unnesting on final cleanup: */
2235                       VM_MAP_REMOVE_NO_UNNESTING);
2236
2237         /*
2238          * Kick out any IOKitUser handles to the task. At best they're stale,
2239          * at worst someone is racing a SUID exec.
2240          */
2241         iokit_task_terminate(task);
2242
2243         task->halting = FALSE;
2244 }
2245
2246 /*
2247  *      task_hold_locked:
2248  *
2249  *      Suspend execution of the specified task.
2250  *      This is a recursive-style suspension of the task, a count of
2251  *      suspends is maintained.
2252  *
2253  *      CONDITIONS: the task is locked and active.
2254  */
2255 void
2256 task_hold_locked(
2257         task_t          task)
2258 {
2259         thread_t        thread;
2260
2261         assert(task->active);
2262
2263         if (task->suspend_count++ > 0)
2264                 return;
2265
2266         /*
2267          *      Iterate through all the threads and hold them.
2268          */
2269         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2270                 thread_mtx_lock(thread);
2271                 thread_hold(thread);
2272                 thread_mtx_unlock(thread);
2273         }
2274 }
2275
2276 /*
2277  *      task_hold:
2278  *
2279  *      Same as the internal routine above, except that is must lock
2280  *      and verify that the task is active.  This differs from task_suspend
2281  *      in that it places a kernel hold on the task rather than just a
2282  *      user-level hold.  This keeps users from over resuming and setting
2283  *      it running out from under the kernel.
2284  *
2285  *      CONDITIONS: the caller holds a reference on the task
2286  */
2287 kern_return_t
2288 task_hold(
2289         task_t          task)
2290 {
2291         if (task == TASK_NULL)
2292                 return (KERN_INVALID_ARGUMENT);
2293
2294         task_lock(task);
2295
2296         if (!task->active) {
2297                 task_unlock(task);
2298
2299                 return (KERN_FAILURE);
2300         }
2301
2302         task_hold_locked(task);
2303         task_unlock(task);
2304
2305         return (KERN_SUCCESS);
2306 }
2307
2308 kern_return_t
2309 task_wait(
2310                 task_t          task,
2311                 boolean_t       until_not_runnable)
2312 {
2313         if (task == TASK_NULL)
2314                 return (KERN_INVALID_ARGUMENT);
2315
2316         task_lock(task);
2317
2318         if (!task->active) {
2319                 task_unlock(task);
2320
2321                 return (KERN_FAILURE);
2322         }
2323
2324         task_wait_locked(task, until_not_runnable);
2325         task_unlock(task);
2326
2327         return (KERN_SUCCESS);
2328 }
2329
2330 /*
2331  *      task_wait_locked:
2332  *
2333  *      Wait for all threads in task to stop.
2334  *
2335  * Conditions:
2336  *      Called with task locked, active, and held.
2337  */
2338 void
2339 task_wait_locked(
2340         task_t          task,
2341         boolean_t               until_not_runnable)
2342 {
2343         thread_t        thread, self;
2344
2345         assert(task->active);
2346         assert(task->suspend_count > 0);
2347
2348         self = current_thread();
2349
2350         /*
2351          *      Iterate through all the threads and wait for them to
2352          *      stop.  Do not wait for the current thread if it is within
2353          *      the task.
2354          */
2355         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2356                 if (thread != self)
2357                         thread_wait(thread, until_not_runnable);
2358         }
2359 }
2360
2361 /*
2362  *      task_release_locked:
2363  *
2364  *      Release a kernel hold on a task.
2365  *
2366  *      CONDITIONS: the task is locked and active
2367  */
2368 void
2369 task_release_locked(
2370         task_t          task)
2371 {
2372         thread_t        thread;
2373
2374         assert(task->active);
2375         assert(task->suspend_count > 0);
2376
2377         if (--task->suspend_count > 0)
2378                 return;
2379
2380         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2381                 thread_mtx_lock(thread);
2382                 thread_release(thread);
2383                 thread_mtx_unlock(thread);
2384         }
2385 }
2386
2387 /*
2388  *      task_release:
2389  *
2390  *      Same as the internal routine above, except that it must lock
2391  *      and verify that the task is active.
2392  *
2393  *      CONDITIONS: The caller holds a reference to the task
2394  */
2395 kern_return_t
2396 task_release(
2397         task_t          task)
2398 {
2399         if (task == TASK_NULL)
2400                 return (KERN_INVALID_ARGUMENT);
2401
2402         task_lock(task);
2403
2404         if (!task->active) {
2405                 task_unlock(task);
2406
2407                 return (KERN_FAILURE);
2408         }
2409
2410         task_release_locked(task);
2411         task_unlock(task);
2412
2413         return (KERN_SUCCESS);
2414 }
2415
2416 kern_return_t
2417 task_threads(
2418         task_t                                  task,
2419         thread_act_array_t              *threads_out,
2420         mach_msg_type_number_t  *count)
2421 {
2422         mach_msg_type_number_t  actual;
2423         thread_t                                *thread_list;
2424         thread_t                                thread;
2425         vm_size_t                               size, size_needed;
2426         void                                    *addr;
2427         unsigned int                    i, j;
2428
2429         if (task == TASK_NULL)
2430                 return (KERN_INVALID_ARGUMENT);
2431
2432         size = 0; addr = NULL;
2433
2434         for (;;) {
2435                 task_lock(task);
2436                 if (!task->active) {
2437                         task_unlock(task);
2438
2439                         if (size != 0)
2440                                 kfree(addr, size);
2441
2442                         return (KERN_FAILURE);
2443                 }
2444
2445                 actual = task->thread_count;
2446
2447                 /* do we have the memory we need? */
2448                 size_needed = actual * sizeof (mach_port_t);
2449                 if (size_needed <= size)
2450                         break;
2451
2452                 /* unlock the task and allocate more memory */
2453                 task_unlock(task);
2454
2455                 if (size != 0)
2456                         kfree(addr, size);
2457
2458                 assert(size_needed > 0);
2459                 size = size_needed;
2460
2461                 addr = kalloc(size);
2462                 if (addr == 0)
2463                         return (KERN_RESOURCE_SHORTAGE);
2464         }
2465
2466         /* OK, have memory and the task is locked & active */
2467         thread_list = (thread_t *)addr;
2468
2469         i = j = 0;
2470
2471         for (thread = (thread_t)queue_first(&task->threads); i < actual;
2472                                 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
2473                 thread_reference_internal(thread);
2474                 thread_list[j++] = thread;
2475         }
2476
2477         assert(queue_end(&task->threads, (queue_entry_t)thread));
2478
2479         actual = j;
2480         size_needed = actual * sizeof (mach_port_t);
2481
2482         /* can unlock task now that we've got the thread refs */
2483         task_unlock(task);
2484
2485         if (actual == 0) {
2486                 /* no threads, so return null pointer and deallocate memory */
2487
2488                 *threads_out = NULL;
2489                 *count = 0;
2490
2491                 if (size != 0)
2492                         kfree(addr, size);
2493         }
2494         else {
2495                 /* if we allocated too much, must copy */
2496
2497                 if (size_needed < size) {
2498                         void *newaddr;
2499
2500                         newaddr = kalloc(size_needed);
2501                         if (newaddr == 0) {
2502                                 for (i = 0; i < actual; ++i)
2503                                         thread_deallocate(thread_list[i]);
2504                                 kfree(addr, size);
2505                                 return (KERN_RESOURCE_SHORTAGE);
2506                         }
2507
2508                         bcopy(addr, newaddr, size_needed);
2509                         kfree(addr, size);
2510                         thread_list = (thread_t *)newaddr;
2511                 }
2512
2513                 *threads_out = thread_list;
2514                 *count = actual;
2515
2516                 /* do the conversion that Mig should handle */
2517
2518                 for (i = 0; i < actual; ++i)
2519                         ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
2520         }
2521
2522         return (KERN_SUCCESS);
2523 }
2524
2525 #define TASK_HOLD_NORMAL        0
2526 #define TASK_HOLD_PIDSUSPEND    1
2527 #define TASK_HOLD_LEGACY        2
2528 #define TASK_HOLD_LEGACY_ALL    3
2529
2530 static kern_return_t
2531 place_task_hold    (
2532         task_t task,
2533         int mode)
2534 {
2535         if (!task->active && !task_is_a_corpse(task)) {
2536                 return (KERN_FAILURE);
2537         }
2538
2539         /* Return success for corpse task */
2540         if (task_is_a_corpse(task)) {
2541                 return KERN_SUCCESS;
2542         }
2543
2544         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2545             MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
2546             task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2547             task->user_stop_count, task->user_stop_count + 1, 0);
2548
2549 #if MACH_ASSERT
2550         current_task()->suspends_outstanding++;
2551 #endif
2552
2553         if (mode == TASK_HOLD_LEGACY)
2554                 task->legacy_stop_count++;
2555
2556         if (task->user_stop_count++ > 0) {
2557                 /*
2558                  *      If the stop count was positive, the task is
2559                  *      already stopped and we can exit.
2560                  */
2561                 return (KERN_SUCCESS);
2562         }
2563
2564         /*
2565          * Put a kernel-level hold on the threads in the task (all
2566          * user-level task suspensions added together represent a
2567          * single kernel-level hold).  We then wait for the threads
2568          * to stop executing user code.
2569          */
2570         task_hold_locked(task);
2571         task_wait_locked(task, FALSE);
2572
2573         return (KERN_SUCCESS);
2574 }
2575
2576 static kern_return_t
2577 release_task_hold    (
2578         task_t          task,
2579         int                     mode)
2580 {
2581         boolean_t release = FALSE;
2582
2583         if (!task->active && !task_is_a_corpse(task)) {
2584                 return (KERN_FAILURE);
2585         }
2586
2587         /* Return success for corpse task */
2588         if (task_is_a_corpse(task)) {
2589                 return KERN_SUCCESS;
2590         }
2591
2592         if (mode == TASK_HOLD_PIDSUSPEND) {
2593             if (task->pidsuspended == FALSE) {
2594                     return (KERN_FAILURE);
2595             }
2596             task->pidsuspended = FALSE;
2597         }
2598
2599         if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
2600
2601                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2602                     MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
2603                     task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2604                     task->user_stop_count, mode, task->legacy_stop_count);
2605
2606 #if MACH_ASSERT
2607                 /*
2608                  * This is obviously not robust; if we suspend one task and then resume a different one,
2609                  * we'll fly under the radar. This is only meant to catch the common case of a crashed
2610                  * or buggy suspender.
2611                  */
2612                 current_task()->suspends_outstanding--;
2613 #endif
2614
2615                 if (mode == TASK_HOLD_LEGACY_ALL) {
2616                         if (task->legacy_stop_count >= task->user_stop_count) {
2617                                 task->user_stop_count = 0;
2618                                 release = TRUE;
2619                         } else {
2620                                 task->user_stop_count -= task->legacy_stop_count;
2621                         }
2622                         task->legacy_stop_count = 0;
2623                 } else {
2624                         if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
2625                                 task->legacy_stop_count--;
2626                         if (--task->user_stop_count == 0)
2627                                 release = TRUE;
2628                 }
2629         }
2630         else {
2631                 return (KERN_FAILURE);
2632         }
2633
2634         /*
2635          *      Release the task if necessary.
2636          */
2637         if (release)
2638                 task_release_locked(task);
2639
2640     return (KERN_SUCCESS);
2641 }
2642
2643
2644 /*
2645  *      task_suspend:
2646  *
2647  *      Implement an (old-fashioned) user-level suspension on a task.
2648  *
2649  *      Because the user isn't expecting to have to manage a suspension
2650  *      token, we'll track it for him in the kernel in the form of a naked
2651  *      send right to the task's resume port.  All such send rights
2652  *      account for a single suspension against the task (unlike task_suspend2()
2653  *      where each caller gets a unique suspension count represented by a
2654  *      unique send-once right).
2655  *
2656  * Conditions:
2657  *      The caller holds a reference to the task
2658  */
2659 kern_return_t
2660 task_suspend(
2661         task_t          task)
2662 {
2663         kern_return_t                   kr;
2664         mach_port_t                     port, send, old_notify;
2665         mach_port_name_t                name;
2666
2667         if (task == TASK_NULL || task == kernel_task)
2668                 return (KERN_INVALID_ARGUMENT);
2669
2670         task_lock(task);
2671
2672         /*
2673          * Claim a send right on the task resume port, and request a no-senders
2674          * notification on that port (if none outstanding).
2675          */
2676         if (task->itk_resume == IP_NULL) {
2677                 task->itk_resume = ipc_port_alloc_kernel();
2678                 if (!IP_VALID(task->itk_resume))
2679                         panic("failed to create resume port");
2680                 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
2681         }
2682
2683         port = task->itk_resume;
2684         ip_lock(port);
2685         assert(ip_active(port));
2686
2687         send = ipc_port_make_send_locked(port);
2688         assert(IP_VALID(send));
2689
2690         if (port->ip_nsrequest == IP_NULL) {
2691                 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2692                 assert(old_notify == IP_NULL);
2693                 /* port unlocked */
2694         } else {
2695                 ip_unlock(port);
2696         }
2697
2698         /*
2699          * place a legacy hold on the task.
2700          */
2701         kr = place_task_hold(task, TASK_HOLD_LEGACY);
2702         if (kr != KERN_SUCCESS) {
2703                 task_unlock(task);
2704                 ipc_port_release_send(send);
2705                 return kr;
2706         }
2707
2708         task_unlock(task);
2709
2710         /*
2711          * Copyout the send right into the calling task's IPC space.  It won't know it is there,
2712          * but we'll look it up when calling a traditional resume.  Any IPC operations that
2713          * deallocate the send right will auto-release the suspension.
2714          */
2715         if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
2716                 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
2717                 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
2718                                 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2719                                 task_pid(task), kr);
2720                 return (kr);
2721         }
2722
2723         return (kr);
2724 }
2725
2726 /*
2727  *      task_resume:
2728  *              Release a user hold on a task.
2729  *
2730  * Conditions:
2731  *              The caller holds a reference to the task
2732  */
2733 kern_return_t
2734 task_resume(
2735         task_t  task)
2736 {
2737         kern_return_t    kr;
2738         mach_port_name_t resume_port_name;
2739         ipc_entry_t              resume_port_entry;
2740         ipc_space_t              space = current_task()->itk_space;
2741
2742         if (task == TASK_NULL || task == kernel_task )
2743                 return (KERN_INVALID_ARGUMENT);
2744
2745         /* release a legacy task hold */
2746         task_lock(task);
2747         kr = release_task_hold(task, TASK_HOLD_LEGACY);
2748         task_unlock(task);
2749
2750         is_write_lock(space);
2751         if (is_active(space) && IP_VALID(task->itk_resume) &&
2752             ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
2753                 /*
2754                  * We found a suspension token in the caller's IPC space. Release a send right to indicate that
2755                  * we are holding one less legacy hold on the task from this caller.  If the release failed,
2756                  * go ahead and drop all the rights, as someone either already released our holds or the task
2757                  * is gone.
2758                  */
2759                 if (kr == KERN_SUCCESS)
2760                         ipc_right_dealloc(space, resume_port_name, resume_port_entry);
2761                 else
2762                         ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
2763                 /* space unlocked */
2764         } else {
2765                 is_write_unlock(space);
2766                 if (kr == KERN_SUCCESS)
2767                         printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
2768                                proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2769                                task_pid(task));
2770         }
2771
2772         return kr;
2773 }
2774
2775 /*
2776  * Suspend the target task.
2777  * Making/holding a token/reference/port is the callers responsibility.
2778  */
2779 kern_return_t
2780 task_suspend_internal(task_t task)
2781 {
2782         kern_return_t    kr;
2783
2784         if (task == TASK_NULL || task == kernel_task)
2785                 return (KERN_INVALID_ARGUMENT);
2786
2787         task_lock(task);
2788         kr = place_task_hold(task, TASK_HOLD_NORMAL);
2789         task_unlock(task);
2790         return (kr);
2791 }
2792
2793 /*
2794  * Suspend the target task, and return a suspension token. The token
2795  * represents a reference on the suspended task.
2796  */
2797 kern_return_t
2798 task_suspend2(
2799         task_t                  task,
2800         task_suspension_token_t *suspend_token)
2801 {
2802         kern_return_t    kr;
2803
2804         kr = task_suspend_internal(task);
2805         if (kr != KERN_SUCCESS) {
2806                 *suspend_token = TASK_NULL;
2807                 return (kr);
2808         }
2809
2810         /*
2811          * Take a reference on the target task and return that to the caller
2812          * as a "suspension token," which can be converted into an SO right to
2813          * the now-suspended task's resume port.
2814          */
2815         task_reference_internal(task);
2816         *suspend_token = task;
2817
2818         return (KERN_SUCCESS);
2819 }
2820
2821 /*
2822  * Resume the task
2823  * (reference/token/port management is caller's responsibility).
2824  */
2825 kern_return_t
2826 task_resume_internal(
2827         task_suspension_token_t         task)
2828 {
2829         kern_return_t kr;
2830
2831         if (task == TASK_NULL || task == kernel_task)
2832                 return (KERN_INVALID_ARGUMENT);
2833
2834         task_lock(task);
2835         kr = release_task_hold(task, TASK_HOLD_NORMAL);
2836         task_unlock(task);
2837         return (kr);
2838 }
2839
2840 /*
2841  * Resume the task using a suspension token. Consumes the token's ref.
2842  */
2843 kern_return_t
2844 task_resume2(
2845         task_suspension_token_t         task)
2846 {
2847         kern_return_t kr;
2848
2849         kr = task_resume_internal(task);
2850         task_suspension_token_deallocate(task);
2851
2852         return (kr);
2853 }
2854
2855 boolean_t
2856 task_suspension_notify(mach_msg_header_t *request_header)
2857 {
2858         ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
2859         task_t task = convert_port_to_task_suspension_token(port);
2860         mach_msg_type_number_t not_count;
2861
2862         if (task == TASK_NULL || task == kernel_task)
2863                 return TRUE;  /* nothing to do */
2864
2865         switch (request_header->msgh_id) {
2866
2867         case MACH_NOTIFY_SEND_ONCE:
2868                 /* release the hold held by this specific send-once right */
2869                 task_lock(task);
2870                 release_task_hold(task, TASK_HOLD_NORMAL);
2871                 task_unlock(task);
2872                 break;
2873
2874         case MACH_NOTIFY_NO_SENDERS:
2875                 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
2876
2877                 task_lock(task);
2878                 ip_lock(port);
2879                 if (port->ip_mscount == not_count) {
2880
2881                         /* release all the [remaining] outstanding legacy holds */
2882                         assert(port->ip_nsrequest == IP_NULL);
2883                         ip_unlock(port);
2884                         release_task_hold(task, TASK_HOLD_LEGACY_ALL);
2885                         task_unlock(task);
2886
2887                 } else if (port->ip_nsrequest == IP_NULL) {
2888                         ipc_port_t old_notify;
2889
2890                         task_unlock(task);
2891                         /* new send rights, re-arm notification at current make-send count */
2892                         ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2893                         assert(old_notify == IP_NULL);
2894                         /* port unlocked */
2895                 } else {
2896                         ip_unlock(port);
2897                         task_unlock(task);
2898                 }
2899                 break;
2900
2901         default:
2902                 break;
2903         }
2904
2905         task_suspension_token_deallocate(task); /* drop token reference */
2906         return TRUE;
2907 }
2908
2909 kern_return_t
2910 task_pidsuspend_locked(task_t task)
2911 {
2912         kern_return_t kr;
2913
2914         if (task->pidsuspended) {
2915                 kr = KERN_FAILURE;
2916                 goto out;
2917         }
2918
2919         task->pidsuspended = TRUE;
2920
2921         kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
2922         if (kr != KERN_SUCCESS) {
2923                 task->pidsuspended = FALSE;
2924         }
2925 out:
2926         return(kr);
2927 }
2928
2929
2930 /*
2931  *      task_pidsuspend:
2932  *
2933  *      Suspends a task by placing a hold on its threads.
2934  *
2935  * Conditions:
2936  *      The caller holds a reference to the task
2937  */
2938 kern_return_t
2939 task_pidsuspend(
2940         task_t          task)
2941 {
2942         kern_return_t    kr;
2943
2944         if (task == TASK_NULL || task == kernel_task)
2945                 return (KERN_INVALID_ARGUMENT);
2946
2947         task_lock(task);
2948
2949         kr = task_pidsuspend_locked(task);
2950
2951         task_unlock(task);
2952
2953         return (kr);
2954 }
2955
2956 /*
2957  *      task_pidresume:
2958  *              Resumes a previously suspended task.
2959  *
2960  * Conditions:
2961  *              The caller holds a reference to the task
2962  */
2963 kern_return_t
2964 task_pidresume(
2965         task_t  task)
2966 {
2967         kern_return_t    kr;
2968
2969         if (task == TASK_NULL || task == kernel_task)
2970                 return (KERN_INVALID_ARGUMENT);
2971
2972         task_lock(task);
2973
2974 #if CONFIG_FREEZE
2975
2976         while (task->changing_freeze_state) {
2977
2978                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2979                 task_unlock(task);
2980                 thread_block(THREAD_CONTINUE_NULL);
2981
2982                 task_lock(task);
2983         }
2984         task->changing_freeze_state = TRUE;
2985 #endif
2986
2987         kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
2988
2989         task_unlock(task);
2990
2991 #if CONFIG_FREEZE
2992
2993         task_lock(task);
2994
2995         if (kr == KERN_SUCCESS)
2996                 task->frozen = FALSE;
2997         task->changing_freeze_state = FALSE;
2998         thread_wakeup(&task->changing_freeze_state);
2999
3000         task_unlock(task);
3001 #endif
3002
3003         return (kr);
3004 }
3005
3006
3007 #if DEVELOPMENT || DEBUG
3008
3009 extern void IOSleep(int);
3010
3011 kern_return_t
3012 task_disconnect_page_mappings(task_t task)
3013 {
3014         int     n;
3015
3016         if (task == TASK_NULL || task == kernel_task)
3017                 return (KERN_INVALID_ARGUMENT);
3018
3019         /*
3020          * this function is used to strip all of the mappings from
3021          * the pmap for the specified task to force the task to
3022          * re-fault all of the pages it is actively using... this
3023          * allows us to approximate the true working set of the
3024          * specified task.  We only engage if at least 1 of the
3025          * threads in the task is runnable, but we want to continuously
3026          * sweep (at least for a while - I've arbitrarily set the limit at
3027          * 100 sweeps to be re-looked at as we gain experience) to get a better
3028          * view into what areas within a page are being visited (as opposed to only
3029          * seeing the first fault of a page after the task becomes
3030          * runnable)...  in the future I may
3031          * try to block until awakened by a thread in this task
3032          * being made runnable, but for now we'll periodically poll from the
3033          * user level debug tool driving the sysctl
3034          */
3035         for (n = 0; n < 100; n++) {
3036                 thread_t        thread;
3037                 boolean_t       runnable;
3038                 boolean_t       do_unnest;
3039                 int             page_count;
3040
3041                 runnable = FALSE;
3042                 do_unnest = FALSE;
3043
3044                 task_lock(task);
3045
3046                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3047
3048                         if (thread->state & TH_RUN) {
3049                                 runnable = TRUE;
3050                                 break;
3051                         }
3052                 }
3053                 if (n == 0)
3054                         task->task_disconnected_count++;
3055
3056                 if (task->task_unnested == FALSE) {
3057                         if (runnable == TRUE) {
3058                                 task->task_unnested = TRUE;
3059                                 do_unnest = TRUE;
3060                         }
3061                 }
3062                 task_unlock(task);
3063
3064                 if (runnable == FALSE)
3065                         break;
3066
3067                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
3068                                           task, do_unnest, task->task_disconnected_count, 0, 0);
3069
3070                 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
3071
3072                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
3073                                           task, page_count, 0, 0, 0);
3074
3075                 if ((n % 5) == 4)
3076                         IOSleep(1);
3077         }
3078         return (KERN_SUCCESS);
3079 }
3080
3081 #endif
3082
3083
3084 #if CONFIG_FREEZE
3085
3086 /*
3087  *      task_freeze:
3088  *
3089  *      Freeze a task.
3090  *
3091  * Conditions:
3092  *      The caller holds a reference to the task
3093  */
3094 extern void             vm_wake_compactor_swapper();
3095 extern queue_head_t     c_swapout_list_head;
3096
3097 kern_return_t
3098 task_freeze(
3099         task_t    task,
3100         uint32_t           *purgeable_count,
3101         uint32_t           *wired_count,
3102         uint32_t           *clean_count,
3103         uint32_t           *dirty_count,
3104         uint32_t           dirty_budget,
3105         boolean_t          *shared,
3106         boolean_t          walk_only)
3107 {
3108         kern_return_t kr = KERN_SUCCESS;
3109
3110         if (task == TASK_NULL || task == kernel_task)
3111                 return (KERN_INVALID_ARGUMENT);
3112
3113         task_lock(task);
3114
3115         while (task->changing_freeze_state) {
3116
3117                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3118                 task_unlock(task);
3119                 thread_block(THREAD_CONTINUE_NULL);
3120
3121                 task_lock(task);
3122         }
3123         if (task->frozen) {
3124                 task_unlock(task);
3125                 return (KERN_FAILURE);
3126         }
3127         task->changing_freeze_state = TRUE;
3128
3129         task_unlock(task);
3130
3131         if (walk_only) {
3132                 panic("task_freeze - walk_only == TRUE");
3133         } else {
3134                 kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
3135         }
3136
3137         task_lock(task);
3138
3139         if (walk_only == FALSE && kr == KERN_SUCCESS)
3140                 task->frozen = TRUE;
3141         task->changing_freeze_state = FALSE;
3142         thread_wakeup(&task->changing_freeze_state);
3143
3144         task_unlock(task);
3145
3146         if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3147                 vm_wake_compactor_swapper();
3148                 /*
3149                  * We do an explicit wakeup of the swapout thread here
3150                  * because the compact_and_swap routines don't have
3151                  * knowledge about these kind of "per-task packed c_segs"
3152                  * and so will not be evaluating whether we need to do
3153                  * a wakeup there.
3154                  */
3155                 thread_wakeup((event_t)&c_swapout_list_head);
3156         }
3157
3158         return (kr);
3159 }
3160
3161 /*
3162  *      task_thaw:
3163  *
3164  *      Thaw a currently frozen task.
3165  *
3166  * Conditions:
3167  *      The caller holds a reference to the task
3168  */
3169 kern_return_t
3170 task_thaw(
3171         task_t          task)
3172 {
3173         if (task == TASK_NULL || task == kernel_task)
3174                 return (KERN_INVALID_ARGUMENT);
3175
3176         task_lock(task);
3177
3178         while (task->changing_freeze_state) {
3179
3180                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3181                 task_unlock(task);
3182                 thread_block(THREAD_CONTINUE_NULL);
3183
3184                 task_lock(task);
3185         }
3186         if (!task->frozen) {
3187                 task_unlock(task);
3188                 return (KERN_FAILURE);
3189         }
3190         task->frozen = FALSE;
3191
3192         task_unlock(task);
3193
3194         return (KERN_SUCCESS);
3195 }
3196
3197 #endif /* CONFIG_FREEZE */
3198
3199 kern_return_t
3200 host_security_set_task_token(
3201         host_security_t  host_security,
3202         task_t           task,
3203         security_token_t sec_token,
3204         audit_token_t    audit_token,
3205         host_priv_t      host_priv)
3206 {
3207         ipc_port_t       host_port;
3208         kern_return_t    kr;
3209
3210         if (task == TASK_NULL)
3211                 return(KERN_INVALID_ARGUMENT);
3212
3213         if (host_security == HOST_NULL)
3214                 return(KERN_INVALID_SECURITY);
3215
3216         task_lock(task);
3217         task->sec_token = sec_token;
3218         task->audit_token = audit_token;
3219
3220         task_unlock(task);
3221
3222         if (host_priv != HOST_PRIV_NULL) {
3223                 kr = host_get_host_priv_port(host_priv, &host_port);
3224         } else {
3225                 kr = host_get_host_port(host_priv_self(), &host_port);
3226         }
3227         assert(kr == KERN_SUCCESS);
3228         kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
3229         return(kr);
3230 }
3231
3232 kern_return_t
3233 task_send_trace_memory(
3234         task_t        target_task,
3235         __unused uint32_t pid,
3236         __unused uint64_t uniqueid)
3237 {
3238         kern_return_t kr = KERN_INVALID_ARGUMENT;
3239         if (target_task == TASK_NULL)
3240                 return (KERN_INVALID_ARGUMENT);
3241
3242 #if CONFIG_ATM
3243         kr = atm_send_proc_inspect_notification(target_task,
3244                                   pid,
3245                                   uniqueid);
3246
3247 #endif
3248         return (kr);
3249 }
3250 /*
3251  * This routine was added, pretty much exclusively, for registering the
3252  * RPC glue vector for in-kernel short circuited tasks.  Rather than
3253  * removing it completely, I have only disabled that feature (which was
3254  * the only feature at the time).  It just appears that we are going to
3255  * want to add some user data to tasks in the future (i.e. bsd info,
3256  * task names, etc...), so I left it in the formal task interface.
3257  */
3258 kern_return_t
3259 task_set_info(
3260         task_t          task,
3261         task_flavor_t   flavor,
3262         __unused task_info_t    task_info_in,           /* pointer to IN array */
3263         __unused mach_msg_type_number_t task_info_count)
3264 {
3265         if (task == TASK_NULL)
3266                 return(KERN_INVALID_ARGUMENT);
3267
3268         switch (flavor) {
3269
3270 #if CONFIG_ATM
3271                 case TASK_TRACE_MEMORY_INFO:
3272                 {
3273                         if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
3274                                 return (KERN_INVALID_ARGUMENT);
3275
3276                         assert(task_info_in != NULL);
3277                         task_trace_memory_info_t mem_info;
3278                         mem_info = (task_trace_memory_info_t) task_info_in;
3279                         kern_return_t kr = atm_register_trace_memory(task,
3280                                                 mem_info->user_memory_address,
3281                                                 mem_info->buffer_size);
3282                         return kr;
3283                 }
3284
3285 #endif
3286             default:
3287                 return (KERN_INVALID_ARGUMENT);
3288         }
3289         return (KERN_SUCCESS);
3290 }
3291
3292 int radar_20146450 = 1;
3293 kern_return_t
3294 task_info(
3295         task_t                  task,
3296         task_flavor_t           flavor,
3297         task_info_t             task_info_out,
3298         mach_msg_type_number_t  *task_info_count)
3299 {
3300         kern_return_t error = KERN_SUCCESS;
3301         mach_msg_type_number_t  original_task_info_count;
3302
3303         if (task == TASK_NULL)
3304                 return (KERN_INVALID_ARGUMENT);
3305
3306         original_task_info_count = *task_info_count;
3307         task_lock(task);
3308
3309         if ((task != current_task()) && (!task->active)) {
3310                 task_unlock(task);
3311                 return (KERN_INVALID_ARGUMENT);
3312         }
3313
3314         switch (flavor) {
3315
3316         case TASK_BASIC_INFO_32:
3317         case TASK_BASIC2_INFO_32:
3318         {
3319                 task_basic_info_32_t    basic_info;
3320                 vm_map_t                                map;
3321                 clock_sec_t                             secs;
3322                 clock_usec_t                    usecs;
3323
3324                 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
3325                     error = KERN_INVALID_ARGUMENT;
3326                     break;
3327                 }
3328
3329                 basic_info = (task_basic_info_32_t)task_info_out;
3330
3331                 map = (task == kernel_task)? kernel_map: task->map;
3332                 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
3333                 if (flavor == TASK_BASIC2_INFO_32) {
3334                         /*
3335                          * The "BASIC2" flavor gets the maximum resident
3336                          * size instead of the current resident size...
3337                          */
3338                         basic_info->resident_size = pmap_resident_max(map->pmap);
3339                 } else {
3340                         basic_info->resident_size = pmap_resident_count(map->pmap);
3341                 }
3342                 basic_info->resident_size *= PAGE_SIZE;
3343
3344                 basic_info->policy = ((task != kernel_task)?
3345                                                                                   POLICY_TIMESHARE: POLICY_RR);
3346                 basic_info->suspend_count = task->user_stop_count;
3347
3348                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3349                 basic_info->user_time.seconds =
3350                         (typeof(basic_info->user_time.seconds))secs;
3351                 basic_info->user_time.microseconds = usecs;
3352
3353                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3354                 basic_info->system_time.seconds =
3355                         (typeof(basic_info->system_time.seconds))secs;
3356                 basic_info->system_time.microseconds = usecs;
3357
3358                 *task_info_count = TASK_BASIC_INFO_32_COUNT;
3359                 break;
3360         }
3361
3362         case TASK_BASIC_INFO_64:
3363         {
3364                 task_basic_info_64_t    basic_info;
3365                 vm_map_t                                map;
3366                 clock_sec_t                             secs;
3367                 clock_usec_t                    usecs;
3368
3369                 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
3370                     error = KERN_INVALID_ARGUMENT;
3371                     break;
3372                 }
3373
3374                 basic_info = (task_basic_info_64_t)task_info_out;
3375
3376                 map = (task == kernel_task)? kernel_map: task->map;
3377                 basic_info->virtual_size  = map->size;
3378                 basic_info->resident_size =
3379                         (mach_vm_size_t)(pmap_resident_count(map->pmap))
3380                         * PAGE_SIZE_64;
3381
3382                 basic_info->policy = ((task != kernel_task)?
3383                                                                                   POLICY_TIMESHARE: POLICY_RR);
3384                 basic_info->suspend_count = task->user_stop_count;
3385
3386                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3387                 basic_info->user_time.seconds =
3388                         (typeof(basic_info->user_time.seconds))secs;
3389                 basic_info->user_time.microseconds = usecs;
3390
3391                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3392                 basic_info->system_time.seconds =
3393                         (typeof(basic_info->system_time.seconds))secs;
3394                 basic_info->system_time.microseconds = usecs;
3395
3396                 *task_info_count = TASK_BASIC_INFO_64_COUNT;
3397                 break;
3398         }
3399
3400         case MACH_TASK_BASIC_INFO:
3401         {
3402                 mach_task_basic_info_t  basic_info;
3403                 vm_map_t                map;
3404                 clock_sec_t             secs;
3405                 clock_usec_t            usecs;
3406
3407                 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
3408                     error = KERN_INVALID_ARGUMENT;
3409                     break;
3410                 }
3411
3412                 basic_info = (mach_task_basic_info_t)task_info_out;
3413
3414                 map = (task == kernel_task) ? kernel_map : task->map;
3415
3416                 basic_info->virtual_size  = map->size;
3417
3418                 basic_info->resident_size =
3419                     (mach_vm_size_t)(pmap_resident_count(map->pmap));
3420                 basic_info->resident_size *= PAGE_SIZE_64;
3421
3422                 basic_info->resident_size_max =
3423                     (mach_vm_size_t)(pmap_resident_max(map->pmap));
3424                 basic_info->resident_size_max *= PAGE_SIZE_64;
3425
3426                 basic_info->policy = ((task != kernel_task) ?
3427                                       POLICY_TIMESHARE : POLICY_RR);
3428
3429                 basic_info->suspend_count = task->user_stop_count;
3430
3431                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3432                 basic_info->user_time.seconds =
3433                     (typeof(basic_info->user_time.seconds))secs;
3434                 basic_info->user_time.microseconds = usecs;
3435
3436                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3437                 basic_info->system_time.seconds =
3438                     (typeof(basic_info->system_time.seconds))secs;
3439                 basic_info->system_time.microseconds = usecs;
3440
3441                 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
3442                 break;
3443         }
3444
3445         case TASK_THREAD_TIMES_INFO:
3446         {
3447                 task_thread_times_info_t        times_info;
3448                 thread_t                                        thread;
3449
3450                 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
3451                     error = KERN_INVALID_ARGUMENT;
3452                     break;
3453                 }
3454
3455                 times_info = (task_thread_times_info_t) task_info_out;
3456                 times_info->user_time.seconds = 0;
3457                 times_info->user_time.microseconds = 0;
3458                 times_info->system_time.seconds = 0;
3459                 times_info->system_time.microseconds = 0;
3460
3461
3462                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3463                         time_value_t    user_time, system_time;
3464
3465                         if (thread->options & TH_OPT_IDLE_THREAD)
3466                                 continue;
3467
3468                         thread_read_times(thread, &user_time, &system_time);
3469
3470                         time_value_add(&times_info->user_time, &user_time);
3471                         time_value_add(&times_info->system_time, &system_time);
3472                 }
3473
3474                 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
3475                 break;
3476         }
3477
3478         case TASK_ABSOLUTETIME_INFO:
3479         {
3480                 task_absolutetime_info_t        info;
3481                 thread_t                        thread;
3482
3483                 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
3484                         error = KERN_INVALID_ARGUMENT;
3485                         break;
3486                 }
3487
3488                 info = (task_absolutetime_info_t)task_info_out;
3489                 info->threads_user = info->threads_system = 0;
3490
3491
3492                 info->total_user = task->total_user_time;
3493                 info->total_system = task->total_system_time;
3494
3495                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3496                         uint64_t        tval;
3497                         spl_t           x;
3498
3499                         if (thread->options & TH_OPT_IDLE_THREAD)
3500                                 continue;
3501
3502                         x = splsched();
3503                         thread_lock(thread);
3504
3505                         tval = timer_grab(&thread->user_timer);
3506                         info->threads_user += tval;
3507                         info->total_user += tval;
3508
3509                         tval = timer_grab(&thread->system_timer);
3510                         if (thread->precise_user_kernel_time) {
3511                                 info->threads_system += tval;
3512                                 info->total_system += tval;
3513                         } else {
3514                                 /* system_timer may represent either sys or user */
3515                                 info->threads_user += tval;
3516                                 info->total_user += tval;
3517                         }
3518
3519                         thread_unlock(thread);
3520                         splx(x);
3521                 }
3522
3523
3524                 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
3525                 break;
3526         }
3527
3528         case TASK_DYLD_INFO:
3529         {
3530                 task_dyld_info_t info;
3531
3532                 /*
3533                  * We added the format field to TASK_DYLD_INFO output.  For
3534                  * temporary backward compatibility, accept the fact that
3535                  * clients may ask for the old version - distinquished by the
3536                  * size of the expected result structure.
3537                  */
3538 #define TASK_LEGACY_DYLD_INFO_COUNT \
3539                 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
3540
3541                 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
3542                         error = KERN_INVALID_ARGUMENT;
3543                         break;
3544                 }
3545
3546                 info = (task_dyld_info_t)task_info_out;
3547                 info->all_image_info_addr = task->all_image_info_addr;
3548                 info->all_image_info_size = task->all_image_info_size;
3549
3550                 /* only set format on output for those expecting it */
3551                 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
3552                         info->all_image_info_format = task_has_64BitAddr(task) ?
3553                                                  TASK_DYLD_ALL_IMAGE_INFO_64 :
3554                                                  TASK_DYLD_ALL_IMAGE_INFO_32 ;
3555                         *task_info_count = TASK_DYLD_INFO_COUNT;
3556                 } else {
3557                         *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
3558                 }
3559                 break;
3560         }
3561
3562         case TASK_EXTMOD_INFO:
3563         {
3564                 task_extmod_info_t info;
3565                 void *p;
3566
3567                 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
3568                         error = KERN_INVALID_ARGUMENT;
3569                         break;
3570                 }
3571
3572                 info = (task_extmod_info_t)task_info_out;
3573
3574                 p = get_bsdtask_info(task);
3575                 if (p) {
3576                         proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
3577                 } else {
3578                         bzero(info->task_uuid, sizeof(info->task_uuid));
3579                 }
3580                 info->extmod_statistics = task->extmod_statistics;
3581                 *task_info_count = TASK_EXTMOD_INFO_COUNT;
3582
3583                 break;
3584         }
3585
3586         case TASK_KERNELMEMORY_INFO:
3587         {
3588                 task_kernelmemory_info_t        tkm_info;
3589                 ledger_amount_t                 credit, debit;
3590
3591                 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
3592                    error = KERN_INVALID_ARGUMENT;
3593                    break;
3594                 }
3595
3596                 tkm_info = (task_kernelmemory_info_t) task_info_out;
3597                 tkm_info->total_palloc = 0;
3598                 tkm_info->total_pfree = 0;
3599                 tkm_info->total_salloc = 0;
3600                 tkm_info->total_sfree = 0;
3601
3602                 if (task == kernel_task) {
3603                         /*
3604                          * All shared allocs/frees from other tasks count against
3605                          * the kernel private memory usage.  If we are looking up
3606                          * info for the kernel task, gather from everywhere.
3607                          */
3608                         task_unlock(task);
3609
3610                         /* start by accounting for all the terminated tasks against the kernel */
3611                         tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
3612                         tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
3613
3614                         /* count all other task/thread shared alloc/free against the kernel */
3615                         lck_mtx_lock(&tasks_threads_lock);
3616
3617                         /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
3618                         queue_iterate(&tasks, task, task_t, tasks) {
3619                                 if (task == kernel_task) {
3620                                         if (ledger_get_entries(task->ledger,
3621                                             task_ledgers.tkm_private, &credit,
3622                                             &debit) == KERN_SUCCESS) {
3623                                                 tkm_info->total_palloc += credit;
3624                                                 tkm_info->total_pfree += debit;
3625                                         }
3626                                 }
3627                                 if (!ledger_get_entries(task->ledger,
3628                                     task_ledgers.tkm_shared, &credit, &debit)) {
3629                                         tkm_info->total_palloc += credit;
3630                                         tkm_info->total_pfree += debit;
3631                                 }
3632                         }
3633                         lck_mtx_unlock(&tasks_threads_lock);
3634                 } else {
3635                         if (!ledger_get_entries(task->ledger,
3636                             task_ledgers.tkm_private, &credit, &debit)) {
3637                                 tkm_info->total_palloc = credit;
3638                                 tkm_info->total_pfree = debit;
3639                         }
3640                         if (!ledger_get_entries(task->ledger,
3641                             task_ledgers.tkm_shared, &credit, &debit)) {
3642                                 tkm_info->total_salloc = credit;
3643                                 tkm_info->total_sfree = debit;
3644                         }
3645                         task_unlock(task);
3646                 }
3647
3648                 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
3649                 return KERN_SUCCESS;
3650         }
3651
3652         /* OBSOLETE */
3653         case TASK_SCHED_FIFO_INFO:
3654         {
3655
3656                 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
3657                         error = KERN_INVALID_ARGUMENT;
3658                         break;
3659                 }
3660
3661                 error = KERN_INVALID_POLICY;
3662                 break;
3663         }
3664
3665         /* OBSOLETE */
3666         case TASK_SCHED_RR_INFO:
3667         {
3668                 policy_rr_base_t        rr_base;
3669                 uint32_t quantum_time;
3670                 uint64_t quantum_ns;
3671
3672                 if (*task_info_count < POLICY_RR_BASE_COUNT) {
3673                         error = KERN_INVALID_ARGUMENT;
3674                         break;
3675                 }
3676
3677                 rr_base = (policy_rr_base_t) task_info_out;
3678
3679                 if (task != kernel_task) {
3680                         error = KERN_INVALID_POLICY;
3681                         break;
3682                 }
3683
3684                 rr_base->base_priority = task->priority;
3685
3686                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
3687                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
3688
3689                 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
3690
3691                 *task_info_count = POLICY_RR_BASE_COUNT;
3692                 break;
3693         }
3694
3695         /* OBSOLETE */
3696         case TASK_SCHED_TIMESHARE_INFO:
3697         {
3698                 policy_timeshare_base_t ts_base;
3699
3700                 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
3701                         error = KERN_INVALID_ARGUMENT;
3702                         break;
3703                 }
3704
3705                 ts_base = (policy_timeshare_base_t) task_info_out;
3706
3707                 if (task == kernel_task) {
3708                         error = KERN_INVALID_POLICY;
3709                         break;
3710                 }
3711
3712                 ts_base->base_priority = task->priority;
3713
3714                 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
3715                 break;
3716         }
3717
3718         case TASK_SECURITY_TOKEN:
3719         {
3720                 security_token_t        *sec_token_p;
3721
3722                 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
3723                     error = KERN_INVALID_ARGUMENT;
3724                     break;
3725                 }
3726
3727                 sec_token_p = (security_token_t *) task_info_out;
3728
3729                 *sec_token_p = task->sec_token;
3730
3731                 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
3732                 break;
3733         }
3734
3735         case TASK_AUDIT_TOKEN:
3736         {
3737                 audit_token_t   *audit_token_p;
3738
3739                 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
3740                     error = KERN_INVALID_ARGUMENT;
3741                     break;
3742                 }
3743
3744                 audit_token_p = (audit_token_t *) task_info_out;
3745
3746                 *audit_token_p = task->audit_token;
3747
3748                 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
3749                 break;
3750         }
3751
3752         case TASK_SCHED_INFO:
3753                 error = KERN_INVALID_ARGUMENT;
3754                 break;
3755
3756         case TASK_EVENTS_INFO:
3757         {
3758                 task_events_info_t      events_info;
3759                 thread_t                        thread;
3760
3761                 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
3762                    error = KERN_INVALID_ARGUMENT;
3763                    break;
3764                 }
3765
3766                 events_info = (task_events_info_t) task_info_out;
3767
3768
3769                 events_info->faults = task->faults;
3770                 events_info->pageins = task->pageins;
3771                 events_info->cow_faults = task->cow_faults;
3772                 events_info->messages_sent = task->messages_sent;
3773                 events_info->messages_received = task->messages_received;
3774                 events_info->syscalls_mach = task->syscalls_mach;
3775                 events_info->syscalls_unix = task->syscalls_unix;
3776
3777                 events_info->csw = task->c_switch;
3778
3779                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3780                         events_info->csw           += thread->c_switch;
3781                         events_info->syscalls_mach += thread->syscalls_mach;
3782                         events_info->syscalls_unix += thread->syscalls_unix;
3783                 }
3784
3785
3786                 *task_info_count = TASK_EVENTS_INFO_COUNT;
3787                 break;
3788         }
3789         case TASK_AFFINITY_TAG_INFO:
3790         {
3791                 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
3792                     error = KERN_INVALID_ARGUMENT;
3793                     break;
3794                 }
3795
3796                 error = task_affinity_info(task, task_info_out, task_info_count);
3797                 break;
3798         }
3799         case TASK_POWER_INFO:
3800         {
3801                 if (*task_info_count < TASK_POWER_INFO_COUNT) {
3802                         error = KERN_INVALID_ARGUMENT;
3803                         break;
3804                 }
3805
3806                 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL);
3807                 break;
3808         }
3809
3810         case TASK_POWER_INFO_V2:
3811         {
3812                 if (*task_info_count < TASK_POWER_INFO_V2_COUNT) {
3813                         error = KERN_INVALID_ARGUMENT;
3814                         break;
3815                 }
3816                 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
3817
3818                 uint64_t *task_energy = NULL;
3819                 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, task_energy);
3820                 break;
3821         }
3822
3823         case TASK_VM_INFO:
3824         case TASK_VM_INFO_PURGEABLE:
3825         {
3826                 task_vm_info_t          vm_info;
3827                 vm_map_t                map;
3828
3829                 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
3830                     error = KERN_INVALID_ARGUMENT;
3831                     break;
3832                 }
3833
3834                 vm_info = (task_vm_info_t)task_info_out;
3835
3836                 if (task == kernel_task) {
3837                         map = kernel_map;
3838                         /* no lock */
3839                 } else {
3840                         map = task->map;
3841                         vm_map_lock_read(map);
3842                 }
3843
3844                 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
3845                 vm_info->region_count = map->hdr.nentries;
3846                 vm_info->page_size = vm_map_page_size(map);
3847
3848                 vm_info->resident_size = pmap_resident_count(map->pmap);
3849                 vm_info->resident_size *= PAGE_SIZE;
3850                 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
3851                 vm_info->resident_size_peak *= PAGE_SIZE;
3852
3853 #define _VM_INFO(_name) \
3854         vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
3855
3856                 _VM_INFO(device);
3857                 _VM_INFO(device_peak);
3858                 _VM_INFO(external);
3859                 _VM_INFO(external_peak);
3860                 _VM_INFO(internal);
3861                 _VM_INFO(internal_peak);
3862                 _VM_INFO(reusable);
3863                 _VM_INFO(reusable_peak);
3864                 _VM_INFO(compressed);
3865                 _VM_INFO(compressed_peak);
3866                 _VM_INFO(compressed_lifetime);
3867
3868                 vm_info->purgeable_volatile_pmap = 0;
3869                 vm_info->purgeable_volatile_resident = 0;
3870                 vm_info->purgeable_volatile_virtual = 0;
3871                 if (task == kernel_task) {
3872                         /*
3873                          * We do not maintain the detailed stats for the
3874                          * kernel_pmap, so just count everything as
3875                          * "internal"...
3876                          */
3877                         vm_info->internal = vm_info->resident_size;
3878                         /*
3879                          * ... but since the memory held by the VM compressor
3880                          * in the kernel address space ought to be attributed
3881                          * to user-space tasks, we subtract it from "internal"
3882                          * to give memory reporting tools a more accurate idea
3883                          * of what the kernel itself is actually using, instead
3884                          * of making it look like the kernel is leaking memory
3885                          * when the system is under memory pressure.
3886                          */
3887                         vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
3888                                               PAGE_SIZE);
3889                 } else {
3890                         mach_vm_size_t  volatile_virtual_size;
3891                         mach_vm_size_t  volatile_resident_size;
3892                         mach_vm_size_t  volatile_compressed_size;
3893                         mach_vm_size_t  volatile_pmap_size;
3894                         mach_vm_size_t  volatile_compressed_pmap_size;
3895                         kern_return_t   kr;
3896
3897                         if (flavor == TASK_VM_INFO_PURGEABLE) {
3898                                 kr = vm_map_query_volatile(
3899                                         map,
3900                                         &volatile_virtual_size,
3901                                         &volatile_resident_size,
3902                                         &volatile_compressed_size,
3903                                         &volatile_pmap_size,
3904                                         &volatile_compressed_pmap_size);
3905                                 if (kr == KERN_SUCCESS) {
3906                                         vm_info->purgeable_volatile_pmap =
3907                                                 volatile_pmap_size;
3908                                         if (radar_20146450) {
3909                                         vm_info->compressed -=
3910                                                 volatile_compressed_pmap_size;
3911                                         }
3912                                         vm_info->purgeable_volatile_resident =
3913                                                 volatile_resident_size;
3914                                         vm_info->purgeable_volatile_virtual =
3915                                                 volatile_virtual_size;
3916                                 }
3917                         }
3918                 }
3919                 *task_info_count = TASK_VM_INFO_REV0_COUNT;
3920
3921                 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
3922                         vm_info->phys_footprint =
3923                                 (mach_vm_size_t) get_task_phys_footprint(task);
3924                         *task_info_count = TASK_VM_INFO_REV1_COUNT;
3925                 }
3926                 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
3927                         vm_info->min_address = map->min_offset;
3928                         vm_info->max_address = map->max_offset;
3929                         *task_info_count = TASK_VM_INFO_REV2_COUNT;
3930                 }
3931
3932                 if (task != kernel_task) {
3933                         vm_map_unlock_read(map);
3934                 }
3935
3936                 break;
3937         }
3938
3939         case TASK_WAIT_STATE_INFO:
3940         {
3941                 /*
3942                  * Deprecated flavor. Currently allowing some results until all users
3943                  * stop calling it. The results may not be accurate.
3944          */
3945                 task_wait_state_info_t  wait_state_info;
3946                 uint64_t total_sfi_ledger_val = 0;
3947
3948                 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
3949                    error = KERN_INVALID_ARGUMENT;
3950                    break;
3951                 }
3952
3953                 wait_state_info = (task_wait_state_info_t) task_info_out;
3954
3955                 wait_state_info->total_wait_state_time = 0;
3956                 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
3957
3958 #if CONFIG_SCHED_SFI
3959                 int i, prev_lentry = -1;
3960                 int64_t  val_credit, val_debit;
3961
3962                 for (i = 0; i < MAX_SFI_CLASS_ID; i++){
3963                         val_credit =0;
3964                         /*
3965                          * checking with prev_lentry != entry ensures adjacent classes
3966                          * which share the same ledger do not add wait times twice.
3967                          * Note: Use ledger() call to get data for each individual sfi class.
3968                          */
3969                         if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
3970                                 KERN_SUCCESS == ledger_get_entries(task->ledger,
3971                                                 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
3972                                 total_sfi_ledger_val += val_credit;
3973                         }
3974                         prev_lentry = task_ledgers.sfi_wait_times[i];
3975                 }
3976
3977 #endif /* CONFIG_SCHED_SFI */
3978                 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
3979                 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
3980
3981                 break;
3982         }
3983         case TASK_VM_INFO_PURGEABLE_ACCOUNT:
3984         {
3985 #if DEVELOPMENT || DEBUG
3986                 pvm_account_info_t      acnt_info;
3987
3988                 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
3989                         error = KERN_INVALID_ARGUMENT;
3990                         break;
3991                 }
3992
3993                 if (task_info_out == NULL) {
3994                         error = KERN_INVALID_ARGUMENT;
3995                         break;
3996                 }
3997
3998                 acnt_info = (pvm_account_info_t) task_info_out;
3999
4000                 error = vm_purgeable_account(task, acnt_info);
4001
4002                 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
4003
4004                 break;
4005 #else /* DEVELOPMENT || DEBUG */
4006                 error = KERN_NOT_SUPPORTED;
4007                 break;
4008 #endif /* DEVELOPMENT || DEBUG */
4009         }
4010         case TASK_FLAGS_INFO:
4011         {
4012                 task_flags_info_t               flags_info;
4013
4014                 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
4015                     error = KERN_INVALID_ARGUMENT;
4016                     break;
4017                 }
4018
4019                 flags_info = (task_flags_info_t)task_info_out;
4020
4021                 /* only publish the 64-bit flag of the task */
4022                 flags_info->flags = task->t_flags & TF_64B_ADDR;
4023
4024                 *task_info_count = TASK_FLAGS_INFO_COUNT;
4025                 break;
4026         }
4027
4028         case TASK_DEBUG_INFO_INTERNAL:
4029         {
4030 #if DEVELOPMENT || DEBUG
4031                 task_debug_info_internal_t dbg_info;
4032                 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
4033                         error = KERN_NOT_SUPPORTED;
4034                         break;
4035                 }
4036
4037                 if (task_info_out == NULL) {
4038                         error = KERN_INVALID_ARGUMENT;
4039                         break;
4040                 }
4041                 dbg_info = (task_debug_info_internal_t) task_info_out;
4042                 dbg_info->ipc_space_size = 0;
4043                 if (task->itk_space){
4044                         dbg_info->ipc_space_size = task->itk_space->is_table_size;
4045                 }
4046
4047                 error = KERN_SUCCESS;
4048                 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
4049                 break;
4050 #else /* DEVELOPMENT || DEBUG */
4051                 error = KERN_NOT_SUPPORTED;
4052                 break;
4053 #endif /* DEVELOPMENT || DEBUG */
4054         }
4055         default:
4056                 error = KERN_INVALID_ARGUMENT;
4057         }
4058
4059         task_unlock(task);
4060         return (error);
4061 }
4062
4063 /*
4064  *      task_power_info
4065  *
4066  *      Returns power stats for the task.
4067  *      Note: Called with task locked.
4068  */
4069 void
4070 task_power_info_locked(
4071         task_t                  task,
4072         task_power_info_t       info,
4073         gpu_energy_data_t       ginfo,
4074         uint64_t *task_energy)
4075 {
4076         thread_t                thread;
4077         ledger_amount_t         tmp;
4078
4079         task_lock_assert_owned(task);
4080
4081         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
4082                 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
4083         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
4084                 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
4085
4086         info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
4087         info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
4088
4089         info->total_user = task->total_user_time;
4090         info->total_system = task->total_system_time;
4091
4092         if (task_energy) {
4093                 *task_energy = task->task_energy;
4094         }
4095
4096         if (ginfo) {
4097                 ginfo->task_gpu_utilisation = task->task_gpu_ns;
4098         }
4099
4100         queue_iterate(&task->threads, thread, thread_t, task_threads) {
4101                 uint64_t        tval;
4102                 spl_t           x;
4103
4104                 if (thread->options & TH_OPT_IDLE_THREAD)
4105                         continue;
4106
4107                 x = splsched();
4108                 thread_lock(thread);
4109
4110                 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
4111                 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
4112
4113                 if (task_energy) {
4114                         *task_energy += ml_energy_stat(thread);
4115                 }
4116
4117                 tval = timer_grab(&thread->user_timer);
4118                 info->total_user += tval;
4119
4120                 tval = timer_grab(&thread->system_timer);
4121                 if (thread->precise_user_kernel_time) {
4122                         info->total_system += tval;
4123                 } else {
4124                         /* system_timer may represent either sys or user */
4125                         info->total_user += tval;
4126                 }
4127
4128                 if (ginfo) {
4129                         ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
4130                 }
4131                 thread_unlock(thread);
4132                 splx(x);
4133         }
4134 }
4135
4136 /*
4137  *      task_gpu_utilisation
4138  *
4139  *      Returns the total gpu time used by the all the threads of the task
4140  *  (both dead and alive)
4141  */
4142 uint64_t
4143 task_gpu_utilisation(
4144         task_t  task)
4145 {
4146         uint64_t gpu_time = 0;
4147         thread_t thread;
4148
4149         task_lock(task);
4150         gpu_time += task->task_gpu_ns;
4151
4152         queue_iterate(&task->threads, thread, thread_t, task_threads) {
4153                 spl_t x;
4154                 x = splsched();
4155                 thread_lock(thread);
4156                 gpu_time += ml_gpu_stat(thread);
4157                 thread_unlock(thread);
4158                 splx(x);
4159         }
4160
4161         task_unlock(task);
4162         return gpu_time;
4163 }
4164
4165 /*
4166  *      task_energy
4167  *
4168  *      Returns the total energy used by the all the threads of the task
4169  *  (both dead and alive)
4170  */
4171 uint64_t
4172 task_energy(
4173         task_t  task)
4174 {
4175         uint64_t energy = 0;
4176         thread_t thread;
4177
4178         task_lock(task);
4179         energy += task->task_energy;
4180
4181         queue_iterate(&task->threads, thread, thread_t, task_threads) {
4182                 spl_t x;
4183                 x = splsched();
4184                 thread_lock(thread);
4185                 energy += ml_energy_stat(thread);
4186                 thread_unlock(thread);
4187                 splx(x);
4188         }
4189
4190         task_unlock(task);
4191         return energy;
4192 }
4193
4194 kern_return_t
4195 task_purgable_info(
4196         task_t                  task,
4197         task_purgable_info_t    *stats)
4198 {
4199         if (task == TASK_NULL || stats == NULL)
4200                 return KERN_INVALID_ARGUMENT;
4201         /* Take task reference */
4202         task_reference(task);
4203         vm_purgeable_stats((vm_purgeable_info_t)stats, task);
4204         /* Drop task reference */
4205         task_deallocate(task);
4206         return KERN_SUCCESS;
4207 }
4208
4209 void
4210 task_vtimer_set(
4211         task_t          task,
4212         integer_t       which)
4213 {
4214         thread_t        thread;
4215         spl_t           x;
4216
4217         task_lock(task);
4218
4219         task->vtimers |= which;
4220
4221         switch (which) {
4222
4223         case TASK_VTIMER_USER:
4224                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4225                         x = splsched();
4226                         thread_lock(thread);
4227                         if (thread->precise_user_kernel_time)
4228                                 thread->vtimer_user_save = timer_grab(&thread->user_timer);
4229                         else
4230                                 thread->vtimer_user_save = timer_grab(&thread->system_timer);
4231                         thread_unlock(thread);
4232                         splx(x);
4233                 }
4234                 break;
4235
4236         case TASK_VTIMER_PROF:
4237                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4238                         x = splsched();
4239                         thread_lock(thread);
4240                         thread->vtimer_prof_save = timer_grab(&thread->user_timer);
4241                         thread->vtimer_prof_save += timer_grab(&thread->system_timer);
4242                         thread_unlock(thread);
4243                         splx(x);
4244                 }
4245                 break;
4246
4247         case TASK_VTIMER_RLIM:
4248                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4249                         x = splsched();
4250                         thread_lock(thread);
4251                         thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
4252                         thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
4253                         thread_unlock(thread);
4254                         splx(x);
4255                 }
4256                 break;
4257         }
4258
4259         task_unlock(task);
4260 }
4261
4262 void
4263 task_vtimer_clear(
4264         task_t          task,
4265         integer_t       which)
4266 {
4267         assert(task == current_task());
4268
4269         task_lock(task);
4270
4271         task->vtimers &= ~which;
4272
4273         task_unlock(task);
4274 }
4275
4276 void
4277 task_vtimer_update(
4278 __unused
4279         task_t          task,
4280         integer_t       which,
4281         uint32_t        *microsecs)
4282 {
4283         thread_t        thread = current_thread();
4284         uint32_t        tdelt = 0;
4285         clock_sec_t     secs = 0;
4286         uint64_t        tsum;
4287
4288         assert(task == current_task());
4289
4290         spl_t s = splsched();
4291         thread_lock(thread);
4292
4293         if ((task->vtimers & which) != (uint32_t)which) {
4294                 thread_unlock(thread);
4295                 splx(s);
4296                 return;
4297         }
4298
4299         switch (which) {
4300
4301         case TASK_VTIMER_USER:
4302                 if (thread->precise_user_kernel_time) {
4303                         tdelt = (uint32_t)timer_delta(&thread->user_timer,
4304                                                                 &thread->vtimer_user_save);
4305                 } else {
4306                         tdelt = (uint32_t)timer_delta(&thread->system_timer,
4307                                                                 &thread->vtimer_user_save);
4308                 }
4309                 absolutetime_to_microtime(tdelt, &secs, microsecs);
4310                 break;
4311
4312         case TASK_VTIMER_PROF:
4313                 tsum = timer_grab(&thread->user_timer);
4314                 tsum += timer_grab(&thread->system_timer);
4315                 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
4316                 absolutetime_to_microtime(tdelt, &secs, microsecs);
4317                 /* if the time delta is smaller than a usec, ignore */
4318                 if (*microsecs != 0)
4319                         thread->vtimer_prof_save = tsum;
4320                 break;
4321
4322         case TASK_VTIMER_RLIM:
4323                 tsum = timer_grab(&thread->user_timer);
4324                 tsum += timer_grab(&thread->system_timer);
4325                 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
4326                 thread->vtimer_rlim_save = tsum;
4327                 absolutetime_to_microtime(tdelt, &secs, microsecs);
4328                 break;
4329         }
4330
4331         thread_unlock(thread);
4332         splx(s);
4333 }
4334
4335 /*
4336  *      task_assign:
4337  *
4338  *      Change the assigned processor set for the task
4339  */
4340 kern_return_t
4341 task_assign(
4342         __unused task_t         task,
4343         __unused processor_set_t        new_pset,
4344         __unused boolean_t      assign_threads)
4345 {
4346         return(KERN_FAILURE);
4347 }
4348
4349 /*
4350  *      task_assign_default:
4351  *
4352  *      Version of task_assign to assign to default processor set.
4353  */
4354 kern_return_t
4355 task_assign_default(
4356         task_t          task,
4357         boolean_t       assign_threads)
4358 {
4359     return (task_assign(task, &pset0, assign_threads));
4360 }
4361
4362 /*
4363  *      task_get_assignment
4364  *
4365  *      Return name of processor set that task is assigned to.
4366  */
4367 kern_return_t
4368 task_get_assignment(
4369         task_t          task,
4370         processor_set_t *pset)
4371 {
4372         if (!task || !task->active)
4373                 return KERN_FAILURE;
4374
4375         *pset = &pset0;
4376
4377         return KERN_SUCCESS;
4378 }
4379
4380 uint64_t
4381 get_task_dispatchqueue_offset(
4382                 task_t          task)
4383 {
4384         return task->dispatchqueue_offset;
4385 }
4386
4387 /*
4388  *      task_policy
4389  *
4390  *      Set scheduling policy and parameters, both base and limit, for
4391  *      the given task. Policy must be a policy which is enabled for the
4392  *      processor set. Change contained threads if requested.
4393  */
4394 kern_return_t
4395 task_policy(
4396         __unused task_t                 task,
4397         __unused policy_t                       policy_id,
4398         __unused policy_base_t          base,
4399         __unused mach_msg_type_number_t count,
4400         __unused boolean_t                      set_limit,
4401         __unused boolean_t                      change)
4402 {
4403         return(KERN_FAILURE);
4404 }
4405
4406 /*
4407  *      task_set_policy
4408  *
4409  *      Set scheduling policy and parameters, both base and limit, for
4410  *      the given task. Policy can be any policy implemented by the
4411  *      processor set, whether enabled or not. Change contained threads
4412  *      if requested.
4413  */
4414 kern_return_t
4415 task_set_policy(
4416         __unused task_t                 task,
4417         __unused processor_set_t                pset,
4418         __unused policy_t                       policy_id,
4419         __unused policy_base_t          base,
4420         __unused mach_msg_type_number_t base_count,
4421         __unused policy_limit_t         limit,
4422         __unused mach_msg_type_number_t limit_count,
4423         __unused boolean_t                      change)
4424 {
4425         return(KERN_FAILURE);
4426 }
4427
4428 kern_return_t
4429 task_set_ras_pc(
4430         __unused task_t task,
4431         __unused vm_offset_t    pc,
4432         __unused vm_offset_t    endpc)
4433 {
4434         return KERN_FAILURE;
4435 }
4436
4437 void
4438 task_synchronizer_destroy_all(task_t task)
4439 {
4440         /*
4441          *  Destroy owned semaphores
4442          */
4443         semaphore_destroy_all(task);
4444 }
4445
4446 /*
4447  * Install default (machine-dependent) initial thread state
4448  * on the task.  Subsequent thread creation will have this initial
4449  * state set on the thread by machine_thread_inherit_taskwide().
4450  * Flavors and structures are exactly the same as those to thread_set_state()
4451  */
4452 kern_return_t
4453 task_set_state(
4454         task_t task,
4455         int flavor,
4456         thread_state_t state,
4457         mach_msg_type_number_t state_count)
4458 {
4459         kern_return_t ret;
4460
4461         if (task == TASK_NULL) {
4462                 return (KERN_INVALID_ARGUMENT);
4463         }
4464
4465         task_lock(task);
4466
4467         if (!task->active) {
4468                 task_unlock(task);
4469                 return (KERN_FAILURE);
4470         }
4471
4472         ret = machine_task_set_state(task, flavor, state, state_count);
4473
4474         task_unlock(task);
4475         return ret;
4476 }
4477
4478 /*
4479  * Examine the default (machine-dependent) initial thread state
4480  * on the task, as set by task_set_state().  Flavors and structures
4481  * are exactly the same as those passed to thread_get_state().
4482  */
4483 kern_return_t
4484 task_get_state(
4485         task_t  task,
4486         int     flavor,
4487         thread_state_t state,
4488         mach_msg_type_number_t *state_count)
4489 {
4490         kern_return_t ret;
4491
4492         if (task == TASK_NULL) {
4493                 return (KERN_INVALID_ARGUMENT);
4494         }
4495
4496         task_lock(task);
4497
4498         if (!task->active) {
4499                 task_unlock(task);
4500                 return (KERN_FAILURE);
4501         }
4502
4503         ret = machine_task_get_state(task, flavor, state, state_count);
4504
4505         task_unlock(task);
4506         return ret;
4507 }
4508
4509 #if CONFIG_MEMORYSTATUS
4510 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
4511
4512 void __attribute__((noinline))
4513 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
4514 {
4515         task_t                                          task            = current_task();
4516         int                                                     pid         = 0;
4517         const char                                      *procname       = "unknown";
4518         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
4519
4520 #ifdef MACH_BSD
4521         pid = proc_selfpid();
4522
4523         if (pid == 1) {
4524                 /*
4525                  * Cannot have ReportCrash analyzing
4526                  * a suspended initproc.
4527                  */
4528                 return;
4529         }
4530
4531         if (task->bsd_info != NULL)
4532                 procname = proc_name_address(current_task()->bsd_info);
4533 #endif
4534 #if CONFIG_COREDUMP
4535         if (hwm_user_cores) {
4536                 int                             error;
4537                 uint64_t                starttime, end;
4538                 clock_sec_t             secs = 0;
4539                 uint32_t                microsecs = 0;
4540
4541                 starttime = mach_absolute_time();
4542                 /*
4543                  * Trigger a coredump of this process. Don't proceed unless we know we won't
4544                  * be filling up the disk; and ignore the core size resource limit for this
4545                  * core file.
4546                  */
4547                 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
4548                         printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
4549                 }
4550                 /*
4551                 * coredump() leaves the task suspended.
4552                 */
4553                 task_resume_internal(current_task());
4554
4555                 end = mach_absolute_time();
4556                 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
4557                 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
4558                        proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
4559         }
4560 #endif /* CONFIG_COREDUMP */
4561
4562         if (disable_exc_resource) {
4563                 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
4564                         "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
4565                 return;
4566         }
4567
4568         /*
4569          * A task that has triggered an EXC_RESOURCE, should not be
4570          * jetsammed when the device is under memory pressure.  Here
4571          * we set the P_MEMSTAT_TERMINATED flag so that the process
4572          * will be skipped if the memorystatus_thread wakes up.
4573          */
4574         proc_memstat_terminated(current_task()->bsd_info, TRUE);
4575
4576         printf("process %s[%d] crossed memory high watermark (%d MB); sending "
4577                 "EXC_RESOURCE.\n", procname, pid, max_footprint_mb);
4578
4579         code[0] = code[1] = 0;
4580         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
4581         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
4582         EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
4583
4584         /* Do not generate a corpse fork if the violation is a fatal one */
4585         if (is_fatal || exc_via_corpse_forking == 0) {
4586                 /* Do not send a EXC_RESOURCE is corpse_for_fatal_memkill is set */
4587                 if (corpse_for_fatal_memkill == 0) {
4588                         /*
4589                          * Use the _internal_ variant so that no user-space
4590                          * process can resume our task from under us.
4591                          */
4592                         task_suspend_internal(task);
4593                         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
4594                         task_resume_internal(task);
4595                 }
4596         } else {
4597                 task_enqueue_exception_with_corpse(task, code, EXCEPTION_CODE_MAX);
4598         }
4599
4600         /*
4601          * After the EXC_RESOURCE has been handled, we must clear the
4602          * P_MEMSTAT_TERMINATED flag so that the process can again be
4603          * considered for jetsam if the memorystatus_thread wakes up.
4604          */
4605         proc_memstat_terminated(current_task()->bsd_info, FALSE);  /* clear the flag */
4606 }
4607
4608 /*
4609  * Callback invoked when a task exceeds its physical footprint limit.
4610  */
4611 void
4612 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4613 {
4614         ledger_amount_t max_footprint, max_footprint_mb;
4615         task_t task;
4616         boolean_t is_fatal;
4617         boolean_t trigger_exception;
4618
4619         if (warning == LEDGER_WARNING_DIPPED_BELOW) {
4620                 /*
4621                  * Task memory limits only provide a warning on the way up.
4622                  */
4623                 return;
4624         }
4625
4626         task = current_task();
4627
4628         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
4629         max_footprint_mb = max_footprint >> 20;
4630
4631         /*
4632          * Capture the trigger exception flag before turning off the exception.
4633          */
4634         trigger_exception = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION ? TRUE : FALSE;
4635
4636         is_fatal = memorystatus_turnoff_exception_and_get_fatalness((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE, (int)max_footprint_mb);
4637
4638         /*
4639          * If this an actual violation (not a warning),
4640          * generate a non-fatal high watermark EXC_RESOURCE.
4641          */
4642         if ((warning == 0) && trigger_exception) {
4643                 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, is_fatal);
4644         }
4645
4646         memorystatus_on_ledger_footprint_exceeded((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE,
4647                 is_fatal);
4648 }
4649
4650 extern int proc_check_footprint_priv(void);
4651
4652 kern_return_t
4653 task_set_phys_footprint_limit(
4654         task_t task,
4655         int new_limit_mb,
4656         int *old_limit_mb)
4657 {
4658         kern_return_t error;
4659
4660         if ((error = proc_check_footprint_priv())) {
4661                 return (KERN_NO_ACCESS);
4662         }
4663
4664         return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, FALSE);
4665 }
4666
4667 kern_return_t
4668 task_convert_phys_footprint_limit(
4669         int limit_mb,
4670         int *converted_limit_mb)
4671 {
4672         if (limit_mb == -1) {
4673                 /*
4674                  * No limit
4675                  */
4676                 if (max_task_footprint != 0) {
4677                         *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024);   /* bytes to MB */
4678                 } else {
4679                         *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
4680                 }
4681         } else {
4682                 /* nothing to convert */
4683                 *converted_limit_mb = limit_mb;
4684         }
4685         return (KERN_SUCCESS);
4686 }
4687
4688
4689 kern_return_t
4690 task_set_phys_footprint_limit_internal(
4691         task_t task,
4692         int new_limit_mb,
4693         int *old_limit_mb,
4694         boolean_t trigger_exception)
4695 {
4696         ledger_amount_t old;
4697
4698         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
4699
4700         if (old_limit_mb) {
4701                 /*
4702                  * Check that limit >> 20 will not give an "unexpected" 32-bit
4703                  * result. There are, however, implicit assumptions that -1 mb limit
4704                  * equates to LEDGER_LIMIT_INFINITY.
4705                  */
4706                 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
4707                 *old_limit_mb = (int)(old >> 20);
4708         }
4709
4710         if (new_limit_mb == -1) {
4711                 /*
4712                  * Caller wishes to remove the limit.
4713                  */
4714                 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4715                                  max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
4716                                  max_task_footprint ? max_task_footprint_warning_level : 0);
4717                 return (KERN_SUCCESS);
4718         }
4719
4720 #ifdef CONFIG_NOMONITORS
4721         return (KERN_SUCCESS);
4722 #endif /* CONFIG_NOMONITORS */
4723
4724         task_lock(task);
4725
4726         if (trigger_exception) {
4727                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4728         } else {
4729                 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4730         }
4731
4732         ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4733                 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
4734
4735         if (task == current_task()) {
4736                 ledger_check_new_balance(task->ledger, task_ledgers.phys_footprint);
4737         }
4738
4739         task_unlock(task);
4740
4741         return (KERN_SUCCESS);
4742 }
4743
4744 kern_return_t
4745 task_get_phys_footprint_limit(
4746         task_t task,
4747         int *limit_mb)
4748 {
4749         ledger_amount_t limit;
4750
4751         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
4752         /*
4753          * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
4754          * result. There are, however, implicit assumptions that -1 mb limit
4755          * equates to LEDGER_LIMIT_INFINITY.
4756          */
4757         assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
4758         *limit_mb = (int)(limit >> 20);
4759
4760         return (KERN_SUCCESS);
4761 }
4762 #else /* CONFIG_MEMORYSTATUS */
4763 kern_return_t
4764 task_set_phys_footprint_limit(
4765         __unused task_t task,
4766         __unused int new_limit_mb,
4767         __unused int *old_limit_mb)
4768 {
4769         return (KERN_FAILURE);
4770 }
4771
4772 kern_return_t
4773 task_get_phys_footprint_limit(
4774         __unused task_t task,
4775         __unused int *limit_mb)
4776 {
4777         return (KERN_FAILURE);
4778 }
4779 #endif /* CONFIG_MEMORYSTATUS */
4780
4781 /*
4782  * We need to export some functions to other components that
4783  * are currently implemented in macros within the osfmk
4784  * component.  Just export them as functions of the same name.
4785  */
4786 boolean_t is_kerneltask(task_t t)
4787 {
4788         if (t == kernel_task)
4789                 return (TRUE);
4790
4791         return (FALSE);
4792 }
4793
4794 boolean_t is_corpsetask(task_t t)
4795 {
4796         return (task_is_a_corpse(t));
4797 }
4798
4799 #undef current_task
4800 task_t current_task(void);
4801 task_t current_task(void)
4802 {
4803         return (current_task_fast());
4804 }
4805
4806 #undef task_reference
4807 void task_reference(task_t task);
4808 void
4809 task_reference(
4810         task_t          task)
4811 {
4812         if (task != TASK_NULL)
4813                 task_reference_internal(task);
4814 }
4815
4816 /* defined in bsd/kern/kern_prot.c */
4817 extern int get_audit_token_pid(audit_token_t *audit_token);
4818
4819 int task_pid(task_t task)
4820 {
4821         if (task)
4822                 return get_audit_token_pid(&task->audit_token);
4823         return -1;
4824 }
4825
4826
4827 /*
4828  * This routine finds a thread in a task by its unique id
4829  * Returns a referenced thread or THREAD_NULL if the thread was not found
4830  *
4831  * TODO: This is super inefficient - it's an O(threads in task) list walk!
4832  *       We should make a tid hash, or transition all tid clients to thread ports
4833  *
4834  * Precondition: No locks held (will take task lock)
4835  */
4836 thread_t
4837 task_findtid(task_t task, uint64_t tid)
4838 {
4839         thread_t self           = current_thread();
4840         thread_t found_thread   = THREAD_NULL;
4841         thread_t iter_thread    = THREAD_NULL;
4842
4843         /* Short-circuit the lookup if we're looking up ourselves */
4844         if (tid == self->thread_id || tid == TID_NULL) {
4845                 assert(self->task == task);
4846
4847                 thread_reference(self);
4848
4849                 return self;
4850         }
4851
4852         task_lock(task);
4853
4854         queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
4855                 if (iter_thread->thread_id == tid) {
4856                         found_thread = iter_thread;
4857                         thread_reference(found_thread);
4858                         break;
4859                 }
4860         }
4861
4862         task_unlock(task);
4863
4864         return (found_thread);
4865 }
4866
4867
4868 /*
4869  * Control the CPU usage monitor for a task.
4870  */
4871 kern_return_t
4872 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
4873 {
4874         int error = KERN_SUCCESS;
4875
4876         if (*flags & CPUMON_MAKE_FATAL) {
4877                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
4878         } else {
4879                 error = KERN_INVALID_ARGUMENT;
4880         }
4881
4882         return error;
4883 }
4884
4885 /*
4886  * Control the wakeups monitor for a task.
4887  */
4888 kern_return_t
4889 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
4890 {
4891         ledger_t ledger = task->ledger;
4892
4893         task_lock(task);
4894         if (*flags & WAKEMON_GET_PARAMS) {
4895                 ledger_amount_t limit;
4896                 uint64_t                period;
4897
4898                 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
4899                 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
4900
4901                 if (limit != LEDGER_LIMIT_INFINITY) {
4902                         /*
4903                          * An active limit means the wakeups monitor is enabled.
4904                          */
4905                         *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
4906                         *flags = WAKEMON_ENABLE;
4907                         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
4908                                 *flags |= WAKEMON_MAKE_FATAL;
4909                         }
4910                 } else {
4911                         *flags = WAKEMON_DISABLE;
4912                         *rate_hz = -1;
4913                 }
4914
4915                 /*
4916                  * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
4917                  */
4918                 task_unlock(task);
4919                 return KERN_SUCCESS;
4920         }
4921
4922         if (*flags & WAKEMON_ENABLE) {
4923                 if (*flags & WAKEMON_SET_DEFAULTS) {
4924                         *rate_hz = task_wakeups_monitor_rate;
4925                 }
4926
4927 #ifndef CONFIG_NOMONITORS
4928                 if (*flags & WAKEMON_MAKE_FATAL) {
4929                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
4930                 }
4931 #endif /* CONFIG_NOMONITORS */
4932
4933                 if (*rate_hz <= 0) {
4934                         task_unlock(task);
4935                         return KERN_INVALID_ARGUMENT;
4936                 }
4937
4938 #ifndef CONFIG_NOMONITORS
4939                 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
4940                         task_wakeups_monitor_ustackshots_trigger_pct);
4941                 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
4942                 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
4943 #endif /* CONFIG_NOMONITORS */
4944         } else if (*flags & WAKEMON_DISABLE) {
4945                 /*
4946                  * Caller wishes to disable wakeups monitor on the task.
4947                  *
4948                  * Disable telemetry if it was triggered by the wakeups monitor, and
4949                  * remove the limit & callback on the wakeups ledger entry.
4950                  */
4951 #if CONFIG_TELEMETRY
4952                 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
4953 #endif
4954                 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
4955                 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
4956         }
4957
4958         task_unlock(task);
4959         return KERN_SUCCESS;
4960 }
4961
4962 void
4963 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4964 {
4965         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
4966 #if CONFIG_TELEMETRY
4967                 /*
4968                  * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
4969                  * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
4970                  */
4971                 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
4972 #endif
4973                 return;
4974         }
4975
4976 #if CONFIG_TELEMETRY
4977         /*
4978          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
4979          * exceeded the limit, turn telemetry off for the task.
4980          */
4981         telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
4982 #endif
4983
4984         if (warning == 0) {
4985                 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
4986         }
4987 }
4988
4989 void __attribute__((noinline))
4990 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
4991 {
4992         task_t                      task        = current_task();
4993         int                         pid         = 0;
4994         const char                  *procname   = "unknown";
4995         boolean_t                   fatal;
4996         kern_return_t               kr;
4997 #ifdef EXC_RESOURCE_MONITORS
4998         mach_exception_data_type_t  code[EXCEPTION_CODE_MAX];
4999 #endif /* EXC_RESOURCE_MONITORS */
5000         struct ledger_entry_info    lei;
5001
5002 #ifdef MACH_BSD
5003         pid = proc_selfpid();
5004         if (task->bsd_info != NULL)
5005                 procname = proc_name_address(current_task()->bsd_info);
5006 #endif
5007
5008         ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
5009
5010         /*
5011          * Disable the exception notification so we don't overwhelm
5012          * the listener with an endless stream of redundant exceptions.
5013          * TODO: detect whether another thread is already reporting the violation.
5014          */
5015         uint32_t flags = WAKEMON_DISABLE;
5016         task_wakeups_monitor_ctl(task, &flags, NULL);
5017
5018         fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5019         trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
5020         printf("process %s[%d] caught waking the CPU %llu times "
5021                "over ~%llu seconds, averaging %llu wakes / second and "
5022                "violating a %slimit of %llu wakes over %llu seconds.\n",
5023                procname, pid,
5024                lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
5025                    lei.lei_last_refill == 0 ? 0 :
5026                                 (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
5027                fatal ? "FATAL " : "",
5028                    lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
5029
5030         kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
5031                                      fatal ? kRNFatalLimitFlag : 0);
5032         if (kr) {
5033                 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
5034         }
5035
5036 #ifdef EXC_RESOURCE_MONITORS
5037         if (disable_exc_resource) {
5038                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5039                         "supressed by a boot-arg\n", procname, pid);
5040                 return;
5041         }
5042         if (audio_active) {
5043                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5044                        "supressed due to audio playback\n", procname, pid);
5045                 return;
5046         }
5047         if (lei.lei_last_refill == 0) {
5048                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5049                        "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
5050         }
5051
5052         code[0] = code[1] = 0;
5053         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
5054         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
5055         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
5056                             NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
5057         EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
5058                             lei.lei_last_refill);
5059         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
5060                             NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
5061         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5062 #endif /* EXC_RESOURCE_MONITORS */
5063
5064         if (fatal) {
5065                 task_terminate_internal(task);
5066         }
5067 }
5068
5069 static boolean_t
5070 global_update_logical_writes(int64_t io_delta)
5071 {
5072         int64_t old_count, new_count;
5073         boolean_t needs_telemetry;
5074
5075         do {
5076                 new_count = old_count = global_logical_writes_count;
5077                 new_count += io_delta;
5078                 if (new_count >= io_telemetry_limit) {
5079                         new_count = 0;
5080                         needs_telemetry = TRUE;
5081                 } else {
5082                         needs_telemetry = FALSE;
5083                 }
5084         } while(!OSCompareAndSwap64(old_count, new_count, &global_logical_writes_count));
5085         return needs_telemetry;
5086 }
5087
5088 void task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
5089 {
5090         int64_t io_delta = 0;
5091         boolean_t needs_telemetry = FALSE;
5092
5093         if ((!task) || (!io_size) || (!vp))
5094                 return;
5095
5096         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
5097                                                         task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
5098         DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
5099         switch(flags) {
5100                 case TASK_WRITE_IMMEDIATE:
5101                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_immediate_writes));
5102                         ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5103                         break;
5104                 case TASK_WRITE_DEFERRED:
5105                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_deferred_writes));
5106                         ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5107                         break;
5108                 case TASK_WRITE_INVALIDATED:
5109                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_invalidated_writes));
5110                         ledger_debit(task->ledger, task_ledgers.logical_writes, io_size);
5111                         break;
5112                 case TASK_WRITE_METADATA:
5113                         OSAddAtomic64(io_size, (SInt64 *)&(task->task_metadata_writes));
5114                         ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5115                         break;
5116         }
5117
5118         io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
5119         if (io_telemetry_limit != 0) {
5120                 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
5121                 needs_telemetry = global_update_logical_writes(io_delta);
5122                 if (needs_telemetry) {
5123                         act_set_io_telemetry_ast(current_thread());
5124                 }
5125         }
5126 }
5127
5128 /*
5129  * Control the I/O monitor for a task.
5130  */
5131 kern_return_t
5132 task_io_monitor_ctl(task_t task, uint32_t *flags)
5133 {
5134         ledger_t ledger = task->ledger;
5135
5136         task_lock(task);
5137         if (*flags & IOMON_ENABLE) {
5138                 /* Configure the physical I/O ledger */
5139                 ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
5140                 ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5141
5142                 /* Configure the logical I/O ledger */
5143                 ledger_set_limit(ledger, task_ledgers.logical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
5144                 ledger_set_period(ledger, task_ledgers.logical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5145
5146         } else if (*flags & IOMON_DISABLE) {
5147                 /*
5148                  * Caller wishes to disable I/O monitor on the task.
5149                  */
5150                 ledger_disable_refill(ledger, task_ledgers.physical_writes);
5151                 ledger_disable_callback(ledger, task_ledgers.physical_writes);
5152                 ledger_disable_refill(ledger, task_ledgers.logical_writes);
5153                 ledger_disable_callback(ledger, task_ledgers.logical_writes);
5154         }
5155
5156         task_unlock(task);
5157         return KERN_SUCCESS;
5158 }
5159
5160 void
5161 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
5162 {
5163         if (warning == 0) {
5164                 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
5165         }
5166 }
5167
5168 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
5169 {
5170         int                             pid = 0;
5171         task_t                          task = current_task();
5172 #ifdef EXC_RESOURCE_MONITORS
5173         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
5174 #endif /* EXC_RESOURCE_MONITORS */
5175         struct ledger_entry_info        lei;
5176         kern_return_t                   kr;
5177
5178 #ifdef MACH_BSD
5179         pid = proc_selfpid();
5180 #endif
5181         /*
5182          * Get the ledger entry info. We need to do this before disabling the exception
5183          * to get correct values for all fields.
5184          */
5185         switch(flavor) {
5186                 case FLAVOR_IO_PHYSICAL_WRITES:
5187                         ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
5188                         break;
5189                 case FLAVOR_IO_LOGICAL_WRITES:
5190                         ledger_get_entry_info(task->ledger, task_ledgers.logical_writes, &lei);
5191                         break;
5192         }
5193
5194
5195         /*
5196          * Disable the exception notification so we don't overwhelm
5197          * the listener with an endless stream of redundant exceptions.
5198          * TODO: detect whether another thread is already reporting the violation.
5199          */
5200         uint32_t flags = IOMON_DISABLE;
5201         task_io_monitor_ctl(task, &flags);
5202
5203         if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
5204                 trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
5205         }
5206         printf("process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
5207                 pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
5208
5209         kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
5210         if (kr) {
5211                 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
5212         }
5213
5214 #ifdef EXC_RESOURCE_MONITORS
5215         code[0] = code[1] = 0;
5216         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
5217         EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
5218         EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
5219         EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
5220         EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
5221         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5222 #endif /* EXC_RESOURCE_MONITORS */
5223 }
5224
5225 /* Placeholders for the task set/get voucher interfaces */
5226 kern_return_t
5227 task_get_mach_voucher(
5228         task_t                  task,
5229         mach_voucher_selector_t __unused which,
5230         ipc_voucher_t           *voucher)
5231 {
5232         if (TASK_NULL == task)
5233                 return KERN_INVALID_TASK;
5234
5235         *voucher = NULL;
5236         return KERN_SUCCESS;
5237 }
5238
5239 kern_return_t
5240 task_set_mach_voucher(
5241         task_t                  task,
5242         ipc_voucher_t           __unused voucher)
5243 {
5244         if (TASK_NULL == task)
5245                 return KERN_INVALID_TASK;
5246
5247         return KERN_SUCCESS;
5248 }
5249
5250 kern_return_t
5251 task_swap_mach_voucher(
5252         task_t                  task,
5253         ipc_voucher_t           new_voucher,
5254         ipc_voucher_t           *in_out_old_voucher)
5255 {
5256         if (TASK_NULL == task)
5257                 return KERN_INVALID_TASK;
5258
5259         *in_out_old_voucher = new_voucher;
5260         return KERN_SUCCESS;
5261 }
5262
5263 void task_set_gpu_denied(task_t task, boolean_t denied)
5264 {
5265         task_lock(task);
5266
5267         if (denied) {
5268                 task->t_flags |= TF_GPU_DENIED;
5269         } else {
5270                 task->t_flags &= ~TF_GPU_DENIED;
5271         }
5272
5273         task_unlock(task);
5274 }
5275
5276 boolean_t task_is_gpu_denied(task_t task)
5277 {
5278         /* We don't need the lock to read this flag */
5279         return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
5280 }
5281
5282
5283 uint64_t get_task_memory_region_count(task_t task)
5284 {
5285         vm_map_t map;
5286         map = (task == kernel_task) ? kernel_map: task->map;
5287         return((uint64_t)get_map_nentries(map));
5288 }
5289
5290 static void
5291 kdebug_trace_dyld_internal(uint32_t base_code,
5292         struct dyld_kernel_image_info *info)
5293 {
5294         static_assert(sizeof(info->uuid) >= 16);
5295
5296 #if defined(__LP64__)
5297         uint64_t *uuid = (uint64_t *)&(info->uuid);
5298
5299         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5300                 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
5301                 uuid[1], info->load_addr,
5302                 (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
5303                 0);
5304         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5305                 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
5306                 (uint64_t)info->fsobjid.fid_objno |
5307                 ((uint64_t)info->fsobjid.fid_generation << 32),
5308                 0, 0, 0, 0);
5309 #else /* defined(__LP64__) */
5310         uint32_t *uuid = (uint32_t *)&(info->uuid);
5311
5312         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5313                 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
5314                 uuid[1], uuid[2], uuid[3], 0);
5315         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5316                 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
5317                 (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
5318                 info->fsobjid.fid_objno, 0);
5319         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5320                 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
5321                 info->fsobjid.fid_generation, 0, 0, 0, 0);
5322 #endif /* !defined(__LP64__) */
5323 }
5324
5325 static kern_return_t
5326 kdebug_trace_dyld(task_t task, uint32_t base_code,
5327         vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
5328 {
5329         kern_return_t kr;
5330         dyld_kernel_image_info_array_t infos;
5331         vm_map_offset_t map_data;
5332         vm_offset_t data;
5333
5334         assert(infos_copy != NULL);
5335
5336         if (task == NULL || task != current_task()) {
5337                 return KERN_INVALID_TASK;
5338         }
5339
5340         kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
5341         if (kr != KERN_SUCCESS) {
5342                 return kr;
5343         }
5344
5345         infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
5346
5347         for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
5348                 kdebug_trace_dyld_internal(base_code, &(infos[i]));
5349         }
5350
5351         data = CAST_DOWN(vm_offset_t, map_data);
5352         mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
5353         return KERN_SUCCESS;
5354 }
5355
5356 kern_return_t
5357 task_register_dyld_image_infos(task_t task,
5358                                dyld_kernel_image_info_array_t infos_copy,
5359                                mach_msg_type_number_t infos_len)
5360 {
5361         return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
5362                 (vm_map_copy_t)infos_copy, infos_len);
5363 }
5364
5365 kern_return_t
5366 task_unregister_dyld_image_infos(task_t task,
5367                                  dyld_kernel_image_info_array_t infos_copy,
5368                                  mach_msg_type_number_t infos_len)
5369 {
5370         return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
5371                 (vm_map_copy_t)infos_copy, infos_len);
5372 }
5373
5374 kern_return_t
5375 task_get_dyld_image_infos(__unused task_t task,
5376                           __unused dyld_kernel_image_info_array_t * dyld_images,
5377                           __unused mach_msg_type_number_t * dyld_imagesCnt)
5378 {
5379         return KERN_NOT_SUPPORTED;
5380 }
5381
5382 kern_return_t
5383 task_register_dyld_shared_cache_image_info(task_t task,
5384                                            dyld_kernel_image_info_t cache_img,
5385                                            __unused boolean_t no_cache,
5386                                            __unused boolean_t private_cache)
5387 {
5388         if (task == NULL || task != current_task()) {
5389                 return KERN_INVALID_TASK;
5390         }
5391
5392         kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
5393         return KERN_SUCCESS;
5394 }
5395
5396 kern_return_t
5397 task_register_dyld_set_dyld_state(__unused task_t task,
5398                                   __unused uint8_t dyld_state)
5399 {
5400         return KERN_NOT_SUPPORTED;
5401 }
5402
5403 kern_return_t
5404 task_register_dyld_get_process_state(__unused task_t task,
5405                                      __unused dyld_kernel_process_info_t * dyld_process_state)
5406 {
5407         return KERN_NOT_SUPPORTED;
5408 }
5409
5410 #if CONFIG_SECLUDED_MEMORY
5411 int num_tasks_can_use_secluded_mem = 0;
5412
5413 void
5414 task_set_can_use_secluded_mem(
5415         task_t          task,
5416         boolean_t       can_use_secluded_mem)
5417 {
5418         if (!task->task_could_use_secluded_mem) {
5419                 return;
5420         }
5421         task_lock(task);
5422         task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
5423         task_unlock(task);
5424 }
5425
5426 void
5427 task_set_can_use_secluded_mem_locked(
5428         task_t          task,
5429         boolean_t       can_use_secluded_mem)
5430 {
5431         assert(task->task_could_use_secluded_mem);
5432         if (can_use_secluded_mem &&
5433             secluded_for_apps && /* global boot-arg */
5434             !task->task_can_use_secluded_mem) {
5435                 assert(num_tasks_can_use_secluded_mem >= 0);
5436                 OSAddAtomic(+1,
5437                             (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
5438                 task->task_can_use_secluded_mem = TRUE;
5439         } else if (!can_use_secluded_mem &&
5440                    task->task_can_use_secluded_mem) {
5441                 assert(num_tasks_can_use_secluded_mem > 0);
5442                 OSAddAtomic(-1,
5443                             (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
5444                 task->task_can_use_secluded_mem = FALSE;
5445         }
5446 }
5447
5448 void
5449 task_set_could_use_secluded_mem(
5450         task_t          task,
5451         boolean_t       could_use_secluded_mem)
5452 {
5453         task->task_could_use_secluded_mem = could_use_secluded_mem;
5454 }
5455
5456 void
5457 task_set_could_also_use_secluded_mem(
5458         task_t          task,
5459         boolean_t       could_also_use_secluded_mem)
5460 {
5461         task->task_could_also_use_secluded_mem = could_also_use_secluded_mem;
5462 }
5463
5464 boolean_t
5465 task_can_use_secluded_mem(
5466         task_t  task)
5467 {
5468         if (task->task_can_use_secluded_mem) {
5469                 assert(task->task_could_use_secluded_mem);
5470                 assert(num_tasks_can_use_secluded_mem > 0);
5471                 return TRUE;
5472         }
5473         if (task->task_could_also_use_secluded_mem &&
5474             num_tasks_can_use_secluded_mem > 0) {
5475                 assert(num_tasks_can_use_secluded_mem > 0);
5476                 return TRUE;
5477         }
5478         return FALSE;
5479 }
5480
5481 boolean_t
5482 task_could_use_secluded_mem(
5483         task_t  task)
5484 {
5485         return task->task_could_use_secluded_mem;
5486 }
5487 #endif /* CONFIG_SECLUDED_MEMORY */
5488
5489 queue_head_t *
5490 task_io_user_clients(task_t task)
5491 {
5492     return (&task->io_user_clients);
5493 }