osfmk/kern/task.c

   1 /*
   2  * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  *      File:   kern/task.c
  58  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
  59  *              David Black
  60  *
  61  *      Task management primitives implementation.
  62  */
  63 /*
  64  * Copyright (c) 1993 The University of Utah and
  65  * the Computer Systems Laboratory (CSL).  All rights reserved.
  66  *
  67  * Permission to use, copy, modify and distribute this software and its
  68  * documentation is hereby granted, provided that both the copyright
  69  * notice and this permission notice appear in all copies of the
  70  * software, derivative works or modified versions, and any portions
  71  * thereof, and that both notices appear in supporting documentation.
  72  *
  73  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  74  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  75  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  76  *
  77  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  78  * improvements that they make and grant CSL redistribution rights.
  79  *
  80  */
  81 /*
  82  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
  83  * support for mandatory and extensible security protections.  This notice
  84  * is included in support of clause 2.2 (b) of the Apple Public License,
  85  * Version 2.0.
  86  * Copyright (c) 2005 SPARTA, Inc.
  87  */
  88
  89 #include <fast_tas.h>
  90 #include <platforms.h>
  91
  92 #include <mach/mach_types.h>
  93 #include <mach/boolean.h>
  94 #include <mach/host_priv.h>
  95 #include <mach/machine/vm_types.h>
  96 #include <mach/vm_param.h>
  97 #include <mach/semaphore.h>
  98 #include <mach/task_info.h>
  99 #include <mach/task_special_ports.h>
 100
 101 #include <ipc/ipc_types.h>
 102 #include <ipc/ipc_space.h>
 103 #include <ipc/ipc_entry.h>
 104 #include <ipc/ipc_hash.h>
 105
 106 #include <kern/kern_types.h>
 107 #include <kern/mach_param.h>
 108 #include <kern/misc_protos.h>
 109 #include <kern/task.h>
 110 #include <kern/thread.h>
 111 #include <kern/zalloc.h>
 112 #include <kern/kalloc.h>
 113 #include <kern/processor.h>
 114 #include <kern/sched_prim.h>    /* for thread_wakeup */
 115 #include <kern/ipc_tt.h>
 116 #include <kern/host.h>
 117 #include <kern/clock.h>
 118 #include <kern/timer.h>
 119 #include <kern/assert.h>
 120 #include <kern/sync_lock.h>
 121 #include <kern/affinity.h>
 122 #include <kern/exc_resource.h>
 123 #if CONFIG_TELEMETRY
 124 #include <kern/telemetry.h>
 125 #endif
 126
 127 #include <vm/pmap.h>
 128 #include <vm/vm_map.h>
 129 #include <vm/vm_kern.h>         /* for kernel_map, ipc_kernel_map */
 130 #include <vm/vm_pageout.h>
 131 #include <vm/vm_protos.h>
 132 #include <vm/vm_purgeable_internal.h>
 133
 134 #include <sys/resource.h>
 135 /*
 136  * Exported interfaces
 137  */
 138
 139 #include <mach/task_server.h>
 140 #include <mach/mach_host_server.h>
 141 #include <mach/host_security_server.h>
 142 #include <mach/mach_port_server.h>
 143 #include <mach/security_server.h>
 144
 145 #include <vm/vm_shared_region.h>
 146
 147 #if CONFIG_MACF_MACH
 148 #include <security/mac_mach_internal.h>
 149 #endif
 150
 151 #if CONFIG_COUNTERS
 152 #include <pmc/pmc.h>
 153 #endif /* CONFIG_COUNTERS */
 154
 155 #include <libkern/OSDebug.h>
 156 #include <libkern/OSAtomic.h>
 157
 158 task_t                  kernel_task;
 159 zone_t                  task_zone;
 160 lck_attr_t      task_lck_attr;
 161 lck_grp_t       task_lck_grp;
 162 lck_grp_attr_t  task_lck_grp_attr;
 163
 164 zinfo_usage_store_t tasks_tkm_private;
 165 zinfo_usage_store_t tasks_tkm_shared;
 166
 167 /* A container to accumulate statistics for expired tasks */
 168 expired_task_statistics_t               dead_task_statistics;
 169 lck_spin_t              dead_task_statistics_lock;
 170
 171 static ledger_template_t task_ledger_template = NULL;
 172 struct _task_ledger_indices task_ledgers __attribute__((used)) = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
 173 void init_task_ledgers(void);
 174 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 175 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 176 void __attribute__((noinline)) THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void);
 177 void __attribute__((noinline)) THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb);
 178 int coredump(void *core_proc, int reserve_mb, int ignore_ulimit);
 179
 180 kern_return_t task_suspend_internal(task_t);
 181 kern_return_t task_resume_internal(task_t);
 182
 183 void proc_init_cpumon_params(void);
 184
 185 // Warn tasks when they hit 80% of their memory limit.
 186 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
 187
 188 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT              150 /* wakeups per second */
 189 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL   300 /* in seconds. */
 190
 191 /*
 192  * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
 193  *
 194  * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
 195  *  stacktraces, aka micro-stackshots)
 196  */
 197 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER        70
 198
 199 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
 200 int task_wakeups_monitor_rate;     /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
 201
 202 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 203
 204 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
 205
 206 int max_task_footprint = 0; /* Per-task limit on physical memory consumption */
 207 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
 208
 209 int hwm_user_cores = 0; /* high watermark violations generate user core files */
 210
 211 #ifdef MACH_BSD
 212 extern void     proc_getexecutableuuid(void *, unsigned char *, unsigned long);
 213 extern int      proc_pid(struct proc *p);
 214 extern int      proc_selfpid(void);
 215 extern char     *proc_name_address(struct proc *p);
 216 #if CONFIG_JETSAM
 217 extern void     memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb);
 218 #endif
 219 #endif
 220
 221 /* Forwards */
 222
 223 void            task_hold_locked(
 224                         task_t          task);
 225 void            task_wait_locked(
 226                         task_t          task,
 227                         boolean_t       until_not_runnable);
 228 void            task_release_locked(
 229                         task_t          task);
 230 void            task_free(
 231                         task_t          task );
 232 void            task_synchronizer_destroy_all(
 233                         task_t          task);
 234
 235 int check_for_tasksuspend(
 236                         task_t task);
 237
 238 void
 239 task_backing_store_privileged(
 240                         task_t task)
 241 {
 242         task_lock(task);
 243         task->priv_flags |= VM_BACKING_STORE_PRIV;
 244         task_unlock(task);
 245         return;
 246 }
 247
 248
 249 void
 250 task_set_64bit(
 251                 task_t task,
 252                 boolean_t is64bit)
 253 {
 254 #if defined(__i386__) || defined(__x86_64__)
 255         thread_t thread;
 256 #endif /* defined(__i386__) || defined(__x86_64__) */
 257
 258         task_lock(task);
 259
 260         if (is64bit) {
 261                 if (task_has_64BitAddr(task))
 262                         goto out;
 263                 task_set_64BitAddr(task);
 264         } else {
 265                 if ( !task_has_64BitAddr(task))
 266                         goto out;
 267                 task_clear_64BitAddr(task);
 268         }
 269         /* FIXME: On x86, the thread save state flavor can diverge from the
 270          * task's 64-bit feature flag due to the 32-bit/64-bit register save
 271          * state dichotomy. Since we can be pre-empted in this interval,
 272          * certain routines may observe the thread as being in an inconsistent
 273          * state with respect to its task's 64-bitness.
 274          */
 275
 276 #if defined(__i386__) || defined(__x86_64__)
 277         queue_iterate(&task->threads, thread, thread_t, task_threads) {
 278                 thread_mtx_lock(thread);
 279                 machine_thread_switch_addrmode(thread);
 280                 thread_mtx_unlock(thread);
 281         }
 282 #endif /* defined(__i386__) || defined(__x86_64__) */
 283
 284 out:
 285         task_unlock(task);
 286 }
 287
 288
 289 void
 290 task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size)
 291 {
 292         task_lock(task);
 293         task->all_image_info_addr = addr;
 294         task->all_image_info_size = size;
 295         task_unlock(task);
 296 }
 297
 298 #if TASK_REFERENCE_LEAK_DEBUG
 299 #include <kern/btlog.h>
 300
 301 decl_simple_lock_data(static,task_ref_lock);
 302 static btlog_t *task_ref_btlog;
 303 #define TASK_REF_OP_INCR        0x1
 304 #define TASK_REF_OP_DECR        0x2
 305
 306 #define TASK_REF_BTDEPTH        7
 307
 308 static void
 309 task_ref_lock_lock(void *context)
 310 {
 311         simple_lock((simple_lock_t)context);
 312 }
 313 static void
 314 task_ref_lock_unlock(void *context)
 315 {
 316         simple_unlock((simple_lock_t)context);
 317 }
 318
 319 void
 320 task_reference_internal(task_t task)
 321 {
 322         void *       bt[TASK_REF_BTDEPTH];
 323         int             numsaved = 0;
 324
 325         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 326
 327         (void)hw_atomic_add(&(task)->ref_count, 1);
 328         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
 329                                         bt, numsaved);
 330 }
 331
 332 uint32_t
 333 task_deallocate_internal(task_t task)
 334 {
 335         void *       bt[TASK_REF_BTDEPTH];
 336         int             numsaved = 0;
 337
 338         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 339
 340         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
 341                                         bt, numsaved);
 342         return hw_atomic_sub(&(task)->ref_count, 1);
 343 }
 344
 345 #endif /* TASK_REFERENCE_LEAK_DEBUG */
 346
 347 void
 348 task_init(void)
 349 {
 350
 351         lck_grp_attr_setdefault(&task_lck_grp_attr);
 352         lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
 353         lck_attr_setdefault(&task_lck_attr);
 354         lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
 355
 356         task_zone = zinit(
 357                         sizeof(struct task),
 358                         task_max * sizeof(struct task),
 359                         TASK_CHUNK * sizeof(struct task),
 360                         "tasks");
 361
 362         zone_change(task_zone, Z_NOENCRYPT, TRUE);
 363
 364         /*
 365          * Configure per-task memory limit. The boot arg takes precedence over the
 366          * device tree.
 367          */
 368         if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint,
 369                         sizeof (max_task_footprint))) {
 370                 max_task_footprint = 0;
 371         }
 372
 373         if (max_task_footprint == 0) {
 374                 /*
 375                  * No limit was found in boot-args, so go look in the device tree.
 376                  */
 377                 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint,
 378                                 sizeof(max_task_footprint))) {
 379                         max_task_footprint = 0;
 380                 }
 381         }
 382
 383         if (max_task_footprint != 0) {
 384 #if CONFIG_JETSAM
 385                 if (max_task_footprint < 50) {
 386                                 printf("Warning: max_task_pmem %d below minimum.\n",
 387                                 max_task_footprint);
 388                                 max_task_footprint = 50;
 389                 }
 390                 printf("Limiting task physical memory footprint to %d MB\n",
 391                         max_task_footprint);
 392                 max_task_footprint *= 1024 * 1024; // Convert MB to bytes
 393 #else
 394                 printf("Warning: max_task_footprint specified, but jetsam not configured; ignoring.\n");
 395 #endif
 396         }
 397
 398         if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
 399                         sizeof (hwm_user_cores))) {
 400                 hwm_user_cores = 0;
 401         }
 402
 403         proc_init_cpumon_params();
 404
 405         if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
 406                 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
 407         }
 408
 409         if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
 410                 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
 411         }
 412
 413         if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
 414                 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
 415                 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
 416         }
 417
 418         if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
 419                 sizeof (disable_exc_resource))) {
 420                 disable_exc_resource = 0;
 421         }
 422
 423         init_task_ledgers();
 424
 425 #if TASK_REFERENCE_LEAK_DEBUG
 426         simple_lock_init(&task_ref_lock, 0);
 427         task_ref_btlog = btlog_create(100000,
 428                                                                   TASK_REF_BTDEPTH,
 429                                                                   task_ref_lock_lock,
 430                                                                   task_ref_lock_unlock,
 431                                                                   &task_ref_lock);
 432         assert(task_ref_btlog);
 433 #endif
 434
 435         /*
 436          * Create the kernel task as the first task.
 437          */
 438 #ifdef __LP64__
 439         if (task_create_internal(TASK_NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS)
 440 #else
 441         if (task_create_internal(TASK_NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS)
 442 #endif
 443                 panic("task_init\n");
 444
 445         vm_map_deallocate(kernel_task->map);
 446         kernel_task->map = kernel_map;
 447         lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
 448 }
 449
 450 /*
 451  * Create a task running in the kernel address space.  It may
 452  * have its own map of size mem_size and may have ipc privileges.
 453  */
 454 kern_return_t
 455 kernel_task_create(
 456         __unused task_t         parent_task,
 457         __unused vm_offset_t            map_base,
 458         __unused vm_size_t              map_size,
 459         __unused task_t         *child_task)
 460 {
 461         return (KERN_INVALID_ARGUMENT);
 462 }
 463
 464 kern_return_t
 465 task_create(
 466         task_t                          parent_task,
 467         __unused ledger_port_array_t    ledger_ports,
 468         __unused mach_msg_type_number_t num_ledger_ports,
 469         __unused boolean_t              inherit_memory,
 470         __unused task_t                 *child_task)    /* OUT */
 471 {
 472         if (parent_task == TASK_NULL)
 473                 return(KERN_INVALID_ARGUMENT);
 474
 475         /*
 476          * No longer supported: too many calls assume that a task has a valid
 477          * process attached.
 478          */
 479         return(KERN_FAILURE);
 480 }
 481
 482 kern_return_t
 483 host_security_create_task_token(
 484         host_security_t                 host_security,
 485         task_t                          parent_task,
 486         __unused security_token_t       sec_token,
 487         __unused audit_token_t          audit_token,
 488         __unused host_priv_t            host_priv,
 489         __unused ledger_port_array_t    ledger_ports,
 490         __unused mach_msg_type_number_t num_ledger_ports,
 491         __unused boolean_t              inherit_memory,
 492         __unused task_t                 *child_task)    /* OUT */
 493 {
 494         if (parent_task == TASK_NULL)
 495                 return(KERN_INVALID_ARGUMENT);
 496
 497         if (host_security == HOST_NULL)
 498                 return(KERN_INVALID_SECURITY);
 499
 500         /*
 501          * No longer supported.
 502          */
 503         return(KERN_FAILURE);
 504 }
 505
 506 /*
 507  * Task ledgers
 508  * ------------
 509  *
 510  * phys_footprint
 511  *   Physical footprint: This is the sum of:
 512  *     + phys_mem [task's resident memory]
 513  *     + phys_compressed
 514  *     + iokit_mem
 515  *
 516  * iokit_mem
 517  *   IOKit mappings: The total size of all IOKit mappings in this task [regardless of clean/dirty state].
 518  *
 519  * phys_compressed
 520  *   Physical compressed: Amount of this task's resident memory which is held by the compressor.
 521  *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
 522  *   and could be either decompressed back into memory, or paged out to storage, depending
 523  *   on our implementation.
 524  */
 525 void
 526 init_task_ledgers(void)
 527 {
 528         ledger_template_t t;
 529
 530         assert(task_ledger_template == NULL);
 531         assert(kernel_task == TASK_NULL);
 532
 533         if ((t = ledger_template_create("Per-task ledger")) == NULL)
 534                 panic("couldn't create task ledger template");
 535
 536         task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
 537         task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
 538             "physmem", "bytes");
 539         task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
 540             "bytes");
 541         task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
 542             "bytes");
 543         task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
 544             "bytes");
 545         task_ledgers.iokit_mem = ledger_entry_add(t, "iokit_mem", "mappings",
 546             "bytes");
 547         task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
 548             "bytes");
 549         task_ledgers.phys_compressed = ledger_entry_add(t, "phys_compressed", "physmem",
 550             "bytes");
 551         task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
 552             "count");
 553         task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
 554             "count");
 555
 556         if ((task_ledgers.cpu_time < 0) || (task_ledgers.tkm_private < 0) ||
 557             (task_ledgers.tkm_shared < 0) || (task_ledgers.phys_mem < 0) ||
 558             (task_ledgers.wired_mem < 0) || (task_ledgers.iokit_mem < 0) ||
 559             (task_ledgers.phys_footprint < 0) || (task_ledgers.phys_compressed < 0) ||
 560             (task_ledgers.platform_idle_wakeups < 0) || (task_ledgers.interrupt_wakeups < 0)) {
 561                 panic("couldn't create entries for task ledger template");
 562         }
 563
 564         ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
 565
 566 #if CONFIG_JETSAM
 567         ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
 568 #endif
 569
 570         ledger_set_callback(t, task_ledgers.interrupt_wakeups,
 571                 task_wakeups_rate_exceeded, NULL, NULL);
 572
 573         task_ledger_template = t;
 574 }
 575
 576 kern_return_t
 577 task_create_internal(
 578         task_t          parent_task,
 579         boolean_t       inherit_memory,
 580         boolean_t       is_64bit,
 581         task_t          *child_task)            /* OUT */
 582 {
 583         task_t                  new_task;
 584         vm_shared_region_t      shared_region;
 585         ledger_t                ledger = NULL;
 586
 587         new_task = (task_t) zalloc(task_zone);
 588
 589         if (new_task == TASK_NULL)
 590                 return(KERN_RESOURCE_SHORTAGE);
 591
 592         /* one ref for just being alive; one for our caller */
 593         new_task->ref_count = 2;
 594
 595         /* allocate with active entries */
 596         assert(task_ledger_template != NULL);
 597         if ((ledger = ledger_instantiate(task_ledger_template,
 598                         LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
 599                 zfree(task_zone, new_task);
 600                 return(KERN_RESOURCE_SHORTAGE);
 601         }
 602
 603         new_task->ledger = ledger;
 604
 605         /* if inherit_memory is true, parent_task MUST not be NULL */
 606         if (inherit_memory)
 607                 new_task->map = vm_map_fork(ledger, parent_task->map);
 608         else
 609                 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
 610                                 (vm_map_offset_t)(VM_MIN_ADDRESS),
 611                                 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
 612
 613         /* Inherit memlock limit from parent */
 614         if (parent_task)
 615                 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
 616
 617         lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
 618         queue_init(&new_task->threads);
 619         new_task->suspend_count = 0;
 620         new_task->thread_count = 0;
 621         new_task->active_thread_count = 0;
 622         new_task->user_stop_count = 0;
 623         new_task->legacy_stop_count = 0;
 624         new_task->active = TRUE;
 625         new_task->halting = FALSE;
 626         new_task->user_data = NULL;
 627         new_task->faults = 0;
 628         new_task->cow_faults = 0;
 629         new_task->pageins = 0;
 630         new_task->messages_sent = 0;
 631         new_task->messages_received = 0;
 632         new_task->syscalls_mach = 0;
 633         new_task->priv_flags = 0;
 634         new_task->syscalls_unix=0;
 635         new_task->c_switch = new_task->p_switch = new_task->ps_switch = 0;
 636         new_task->t_flags = 0;
 637         new_task->importance = 0;
 638
 639         zinfo_task_init(new_task);
 640
 641 #ifdef MACH_BSD
 642         new_task->bsd_info = NULL;
 643 #endif /* MACH_BSD */
 644
 645 #if CONFIG_JETSAM
 646         if (max_task_footprint != 0) {
 647                 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
 648         }
 649 #endif
 650
 651         if (task_wakeups_monitor_rate != 0) {
 652                 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
 653                 int32_t  rate; // Ignored because of WAKEMON_SET_DEFAULTS
 654                 task_wakeups_monitor_ctl(new_task, &flags, &rate);
 655         }
 656
 657 #if defined(__i386__) || defined(__x86_64__)
 658         new_task->i386_ldt = 0;
 659 #endif
 660
 661         new_task->task_debug = NULL;
 662
 663         queue_init(&new_task->semaphore_list);
 664         new_task->semaphores_owned = 0;
 665
 666 #if CONFIG_MACF_MACH
 667         new_task->label = labelh_new(1);
 668         mac_task_label_init (&new_task->maclabel);
 669 #endif
 670
 671         ipc_task_init(new_task, parent_task);
 672
 673         new_task->total_user_time = 0;
 674         new_task->total_system_time = 0;
 675
 676         new_task->vtimers = 0;
 677
 678         new_task->shared_region = NULL;
 679
 680         new_task->affinity_space = NULL;
 681
 682 #if CONFIG_COUNTERS
 683         new_task->t_chud = 0U;
 684 #endif
 685
 686         new_task->pidsuspended = FALSE;
 687         new_task->frozen = FALSE;
 688         new_task->changing_freeze_state = FALSE;
 689         new_task->rusage_cpu_flags = 0;
 690         new_task->rusage_cpu_percentage = 0;
 691         new_task->rusage_cpu_interval = 0;
 692         new_task->rusage_cpu_deadline = 0;
 693         new_task->rusage_cpu_callt = NULL;
 694 #if MACH_ASSERT
 695         new_task->suspends_outstanding = 0;
 696 #endif
 697
 698
 699         new_task->low_mem_notified_warn = 0;
 700         new_task->low_mem_notified_critical = 0;
 701         new_task->purged_memory_warn = 0;
 702         new_task->purged_memory_critical = 0;
 703         new_task->mem_notify_reserved = 0;
 704 #if IMPORTANCE_INHERITANCE
 705         new_task->imp_receiver = 0;
 706         new_task->imp_donor = 0;
 707         new_task->imp_reserved = 0;
 708         new_task->task_imp_assertcnt = 0;
 709         new_task->task_imp_externcnt = 0;
 710 #endif /* IMPORTANCE_INHERITANCE */
 711
 712 #if     defined(__x86_64__)
 713         new_task->uexc_range_start = new_task->uexc_range_size = new_task->uexc_handler = 0;
 714 #endif
 715
 716         new_task->requested_policy = default_task_requested_policy;
 717         new_task->effective_policy = default_task_effective_policy;
 718         new_task->pended_policy    = default_task_pended_policy;
 719
 720         if (parent_task != TASK_NULL) {
 721                 new_task->sec_token = parent_task->sec_token;
 722                 new_task->audit_token = parent_task->audit_token;
 723
 724                 /* inherit the parent's shared region */
 725                 shared_region = vm_shared_region_get(parent_task);
 726                 vm_shared_region_set(new_task, shared_region);
 727
 728                 if(task_has_64BitAddr(parent_task))
 729                         task_set_64BitAddr(new_task);
 730                 new_task->all_image_info_addr = parent_task->all_image_info_addr;
 731                 new_task->all_image_info_size = parent_task->all_image_info_size;
 732
 733 #if defined(__i386__) || defined(__x86_64__)
 734                 if (inherit_memory && parent_task->i386_ldt)
 735                         new_task->i386_ldt = user_ldt_copy(parent_task->i386_ldt);
 736 #endif
 737                 if (inherit_memory && parent_task->affinity_space)
 738                         task_affinity_create(parent_task, new_task);
 739
 740                 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
 741
 742 #if IMPORTANCE_INHERITANCE
 743                 new_task->imp_donor = parent_task->imp_donor;
 744                 /* Embedded doesn't want this to inherit */
 745                 new_task->imp_receiver = parent_task->imp_receiver;
 746 #endif /* IMPORTANCE_INHERITANCE */
 747
 748                 new_task->requested_policy.t_apptype     = parent_task->requested_policy.t_apptype;
 749
 750                 new_task->requested_policy.int_darwinbg  = parent_task->requested_policy.int_darwinbg;
 751                 new_task->requested_policy.ext_darwinbg  = parent_task->requested_policy.ext_darwinbg;
 752                 new_task->requested_policy.int_iotier    = parent_task->requested_policy.int_iotier;
 753                 new_task->requested_policy.ext_iotier    = parent_task->requested_policy.ext_iotier;
 754                 new_task->requested_policy.int_iopassive = parent_task->requested_policy.int_iopassive;
 755                 new_task->requested_policy.ext_iopassive = parent_task->requested_policy.ext_iopassive;
 756                 new_task->requested_policy.bg_iotier     = parent_task->requested_policy.bg_iotier;
 757                 new_task->requested_policy.terminated    = parent_task->requested_policy.terminated;
 758
 759                 task_policy_create(new_task, parent_task->requested_policy.t_boosted);
 760         } else {
 761                 new_task->sec_token = KERNEL_SECURITY_TOKEN;
 762                 new_task->audit_token = KERNEL_AUDIT_TOKEN;
 763 #ifdef __LP64__
 764                 if(is_64bit)
 765                         task_set_64BitAddr(new_task);
 766 #endif
 767                 new_task->all_image_info_addr = (mach_vm_address_t)0;
 768                 new_task->all_image_info_size = (mach_vm_size_t)0;
 769
 770                 new_task->pset_hint = PROCESSOR_SET_NULL;
 771         }
 772
 773         if (kernel_task == TASK_NULL) {
 774                 new_task->priority = BASEPRI_KERNEL;
 775                 new_task->max_priority = MAXPRI_KERNEL;
 776         } else if (proc_get_effective_task_policy(new_task, TASK_POLICY_LOWPRI_CPU)) {
 777                 new_task->priority = MAXPRI_THROTTLE;
 778                 new_task->max_priority = MAXPRI_THROTTLE;
 779         } else {
 780                 new_task->priority = BASEPRI_DEFAULT;
 781                 new_task->max_priority = MAXPRI_USER;
 782         }
 783
 784         bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
 785         new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0;
 786         lck_mtx_lock(&tasks_threads_lock);
 787         queue_enter(&tasks, new_task, task_t, tasks);
 788         tasks_count++;
 789         lck_mtx_unlock(&tasks_threads_lock);
 790
 791         if (vm_backing_store_low && parent_task != NULL)
 792                 new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
 793
 794         new_task->task_volatile_objects = 0;
 795
 796         ipc_task_enable(new_task);
 797
 798         *child_task = new_task;
 799         return(KERN_SUCCESS);
 800 }
 801
 802 /*
 803  *      task_deallocate:
 804  *
 805  *      Drop a reference on a task.
 806  */
 807 void
 808 task_deallocate(
 809         task_t          task)
 810 {
 811         ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
 812
 813         if (task == TASK_NULL)
 814             return;
 815
 816         if (task_deallocate_internal(task) > 0)
 817                 return;
 818
 819         lck_mtx_lock(&tasks_threads_lock);
 820         queue_remove(&terminated_tasks, task, task_t, tasks);
 821         terminated_tasks_count--;
 822         lck_mtx_unlock(&tasks_threads_lock);
 823
 824         /*
 825          *      Give the machine dependent code a chance
 826          *      to perform cleanup before ripping apart
 827          *      the task.
 828          */
 829         machine_task_terminate(task);
 830
 831         ipc_task_terminate(task);
 832
 833         if (task->affinity_space)
 834                 task_affinity_deallocate(task);
 835
 836         vm_map_deallocate(task->map);
 837         is_release(task->itk_space);
 838
 839         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
 840                            &interrupt_wakeups, &debit);
 841         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
 842                            &platform_idle_wakeups, &debit);
 843
 844         /* Accumulate statistics for dead tasks */
 845         lck_spin_lock(&dead_task_statistics_lock);
 846         dead_task_statistics.total_user_time += task->total_user_time;
 847         dead_task_statistics.total_system_time += task->total_system_time;
 848
 849         dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
 850         dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
 851
 852         dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
 853         dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
 854
 855         lck_spin_unlock(&dead_task_statistics_lock);
 856         lck_mtx_destroy(&task->lock, &task_lck_grp);
 857
 858 #if CONFIG_MACF_MACH
 859         labelh_release(task->label);
 860 #endif
 861
 862         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
 863             &debit)) {
 864                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
 865                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
 866         }
 867         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
 868             &debit)) {
 869                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
 870                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
 871         }
 872         ledger_dereference(task->ledger);
 873         zinfo_task_free(task);
 874
 875 #if TASK_REFERENCE_LEAK_DEBUG
 876         btlog_remove_entries_for_element(task_ref_btlog, task);
 877 #endif
 878
 879         if (task->task_volatile_objects) {
 880                 /*
 881                  * This task still "owns" some volatile VM objects.
 882                  * Disown them now to avoid leaving them pointing back at
 883                  * an invalid task.
 884                  */
 885                 vm_purgeable_disown(task);
 886                 assert(task->task_volatile_objects == 0);
 887         }
 888
 889         zfree(task_zone, task);
 890 }
 891
 892 /*
 893  *      task_name_deallocate:
 894  *
 895  *      Drop a reference on a task name.
 896  */
 897 void
 898 task_name_deallocate(
 899         task_name_t             task_name)
 900 {
 901         return(task_deallocate((task_t)task_name));
 902 }
 903
 904 /*
 905  *      task_suspension_token_deallocate:
 906  *
 907  *      Drop a reference on a task suspension token.
 908  */
 909 void
 910 task_suspension_token_deallocate(
 911         task_suspension_token_t         token)
 912 {
 913         return(task_deallocate((task_t)token));
 914 }
 915
 916 /*
 917  *      task_terminate:
 918  *
 919  *      Terminate the specified task.  See comments on thread_terminate
 920  *      (kern/thread.c) about problems with terminating the "current task."
 921  */
 922
 923 kern_return_t
 924 task_terminate(
 925         task_t          task)
 926 {
 927         if (task == TASK_NULL)
 928                 return (KERN_INVALID_ARGUMENT);
 929
 930         if (task->bsd_info)
 931                 return (KERN_FAILURE);
 932
 933         return (task_terminate_internal(task));
 934 }
 935
 936 kern_return_t
 937 task_terminate_internal(
 938         task_t                  task)
 939 {
 940         thread_t                        thread, self;
 941         task_t                          self_task;
 942         boolean_t                       interrupt_save;
 943
 944         assert(task != kernel_task);
 945
 946         self = current_thread();
 947         self_task = self->task;
 948
 949         /*
 950          *      Get the task locked and make sure that we are not racing
 951          *      with someone else trying to terminate us.
 952          */
 953         if (task == self_task)
 954                 task_lock(task);
 955         else
 956         if (task < self_task) {
 957                 task_lock(task);
 958                 task_lock(self_task);
 959         }
 960         else {
 961                 task_lock(self_task);
 962                 task_lock(task);
 963         }
 964
 965         if (!task->active) {
 966                 /*
 967                  *      Task is already being terminated.
 968                  *      Just return an error. If we are dying, this will
 969                  *      just get us to our AST special handler and that
 970                  *      will get us to finalize the termination of ourselves.
 971                  */
 972                 task_unlock(task);
 973                 if (self_task != task)
 974                         task_unlock(self_task);
 975
 976                 return (KERN_FAILURE);
 977         }
 978
 979 #if MACH_ASSERT
 980         if (task->suspends_outstanding != 0) {
 981                 printf("WARNING: %s (%d) exiting with %d outstanding suspensions\n",
 982                         proc_name_address(task->bsd_info), proc_pid(task->bsd_info),
 983                         task->suspends_outstanding);
 984         }
 985 #endif
 986
 987         if (self_task != task)
 988                 task_unlock(self_task);
 989
 990         /*
 991          * Make sure the current thread does not get aborted out of
 992          * the waits inside these operations.
 993          */
 994         interrupt_save = thread_interrupt_level(THREAD_UNINT);
 995
 996         /*
 997          *      Indicate that we want all the threads to stop executing
 998          *      at user space by holding the task (we would have held
 999          *      each thread independently in thread_terminate_internal -
1000          *      but this way we may be more likely to already find it
1001          *      held there).  Mark the task inactive, and prevent
1002          *      further task operations via the task port.
1003          */
1004         task_hold_locked(task);
1005         task->active = FALSE;
1006         ipc_task_disable(task);
1007
1008 #if CONFIG_TELEMETRY
1009         /*
1010          * Notify telemetry that this task is going away.
1011          */
1012         telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
1013 #endif
1014
1015         /*
1016          *      Terminate each thread in the task.
1017          */
1018         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1019                         thread_terminate_internal(thread);
1020         }
1021
1022         task_unlock(task);
1023
1024
1025         /*
1026          *      Destroy all synchronizers owned by the task.
1027          */
1028         task_synchronizer_destroy_all(task);
1029
1030         /*
1031          *      Destroy the IPC space, leaving just a reference for it.
1032          */
1033         ipc_space_terminate(task->itk_space);
1034
1035         if (vm_map_has_4GB_pagezero(task->map))
1036                 vm_map_clear_4GB_pagezero(task->map);
1037
1038         /*
1039          * If the current thread is a member of the task
1040          * being terminated, then the last reference to
1041          * the task will not be dropped until the thread
1042          * is finally reaped.  To avoid incurring the
1043          * expense of removing the address space regions
1044          * at reap time, we do it explictly here.
1045          */
1046         vm_map_remove(task->map,
1047                       task->map->min_offset,
1048                       task->map->max_offset,
1049                       VM_MAP_NO_FLAGS);
1050
1051         /* release our shared region */
1052         vm_shared_region_set(task, NULL);
1053
1054         lck_mtx_lock(&tasks_threads_lock);
1055         queue_remove(&tasks, task, task_t, tasks);
1056         queue_enter(&terminated_tasks, task, task_t, tasks);
1057         tasks_count--;
1058         terminated_tasks_count++;
1059         lck_mtx_unlock(&tasks_threads_lock);
1060
1061         /*
1062          * We no longer need to guard against being aborted, so restore
1063          * the previous interruptible state.
1064          */
1065         thread_interrupt_level(interrupt_save);
1066
1067         /*
1068          * Get rid of the task active reference on itself.
1069          */
1070         task_deallocate(task);
1071
1072         return (KERN_SUCCESS);
1073 }
1074
1075 /*
1076  * task_start_halt:
1077  *
1078  *      Shut the current task down (except for the current thread) in
1079  *      preparation for dramatic changes to the task (probably exec).
1080  *      We hold the task and mark all other threads in the task for
1081  *      termination.
1082  */
1083 kern_return_t
1084 task_start_halt(
1085         task_t          task)
1086 {
1087         thread_t        thread, self;
1088
1089         assert(task != kernel_task);
1090
1091         self = current_thread();
1092
1093         if (task != self->task)
1094                 return (KERN_INVALID_ARGUMENT);
1095
1096         task_lock(task);
1097
1098         if (task->halting || !task->active || !self->active) {
1099                 /*
1100                  *      Task or current thread is already being terminated.
1101                  *      Hurry up and return out of the current kernel context
1102                  *      so that we run our AST special handler to terminate
1103                  *      ourselves.
1104                  */
1105                 task_unlock(task);
1106
1107                 return (KERN_FAILURE);
1108         }
1109
1110         task->halting = TRUE;
1111
1112         if (task->thread_count > 1) {
1113
1114                 /*
1115                  * Mark all the threads to keep them from starting any more
1116                  * user-level execution.  The thread_terminate_internal code
1117                  * would do this on a thread by thread basis anyway, but this
1118                  * gives us a better chance of not having to wait there.
1119                  */
1120                 task_hold_locked(task);
1121
1122                 /*
1123                  *      Terminate all the other threads in the task.
1124                  */
1125                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1126                         if (thread != self)
1127                                 thread_terminate_internal(thread);
1128                 }
1129
1130                 task_release_locked(task);
1131         }
1132         task_unlock(task);
1133         return KERN_SUCCESS;
1134 }
1135
1136
1137 /*
1138  * task_complete_halt:
1139  *
1140  *      Complete task halt by waiting for threads to terminate, then clean
1141  *      up task resources (VM, port namespace, etc...) and then let the
1142  *      current thread go in the (practically empty) task context.
1143  */
1144 void
1145 task_complete_halt(task_t task)
1146 {
1147         task_lock(task);
1148         assert(task->halting);
1149         assert(task == current_task());
1150
1151         /*
1152          *      Wait for the other threads to get shut down.
1153          *      When the last other thread is reaped, we'll be
1154          *      woken up.
1155          */
1156         if (task->thread_count > 1) {
1157                 assert_wait((event_t)&task->halting, THREAD_UNINT);
1158                 task_unlock(task);
1159                 thread_block(THREAD_CONTINUE_NULL);
1160         } else {
1161                 task_unlock(task);
1162         }
1163
1164         /*
1165          *      Give the machine dependent code a chance
1166          *      to perform cleanup of task-level resources
1167          *      associated with the current thread before
1168          *      ripping apart the task.
1169          */
1170         machine_task_terminate(task);
1171
1172         /*
1173          *      Destroy all synchronizers owned by the task.
1174          */
1175         task_synchronizer_destroy_all(task);
1176
1177         /*
1178          *      Destroy the contents of the IPC space, leaving just
1179          *      a reference for it.
1180          */
1181         ipc_space_clean(task->itk_space);
1182
1183         /*
1184          * Clean out the address space, as we are going to be
1185          * getting a new one.
1186          */
1187         vm_map_remove(task->map, task->map->min_offset,
1188                       task->map->max_offset, VM_MAP_NO_FLAGS);
1189
1190         task->halting = FALSE;
1191 }
1192
1193 /*
1194  *      task_hold_locked:
1195  *
1196  *      Suspend execution of the specified task.
1197  *      This is a recursive-style suspension of the task, a count of
1198  *      suspends is maintained.
1199  *
1200  *      CONDITIONS: the task is locked and active.
1201  */
1202 void
1203 task_hold_locked(
1204         register task_t         task)
1205 {
1206         register thread_t       thread;
1207
1208         assert(task->active);
1209
1210         if (task->suspend_count++ > 0)
1211                 return;
1212
1213         /*
1214          *      Iterate through all the threads and hold them.
1215          */
1216         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1217                 thread_mtx_lock(thread);
1218                 thread_hold(thread);
1219                 thread_mtx_unlock(thread);
1220         }
1221 }
1222
1223 /*
1224  *      task_hold:
1225  *
1226  *      Same as the internal routine above, except that is must lock
1227  *      and verify that the task is active.  This differs from task_suspend
1228  *      in that it places a kernel hold on the task rather than just a
1229  *      user-level hold.  This keeps users from over resuming and setting
1230  *      it running out from under the kernel.
1231  *
1232  *      CONDITIONS: the caller holds a reference on the task
1233  */
1234 kern_return_t
1235 task_hold(
1236         register task_t         task)
1237 {
1238         if (task == TASK_NULL)
1239                 return (KERN_INVALID_ARGUMENT);
1240
1241         task_lock(task);
1242
1243         if (!task->active) {
1244                 task_unlock(task);
1245
1246                 return (KERN_FAILURE);
1247         }
1248
1249         task_hold_locked(task);
1250         task_unlock(task);
1251
1252         return (KERN_SUCCESS);
1253 }
1254
1255 kern_return_t
1256 task_wait(
1257                 task_t          task,
1258                 boolean_t       until_not_runnable)
1259 {
1260         if (task == TASK_NULL)
1261                 return (KERN_INVALID_ARGUMENT);
1262
1263         task_lock(task);
1264
1265         if (!task->active) {
1266                 task_unlock(task);
1267
1268                 return (KERN_FAILURE);
1269         }
1270
1271         task_wait_locked(task, until_not_runnable);
1272         task_unlock(task);
1273
1274         return (KERN_SUCCESS);
1275 }
1276
1277 /*
1278  *      task_wait_locked:
1279  *
1280  *      Wait for all threads in task to stop.
1281  *
1282  * Conditions:
1283  *      Called with task locked, active, and held.
1284  */
1285 void
1286 task_wait_locked(
1287         register task_t         task,
1288         boolean_t               until_not_runnable)
1289 {
1290         register thread_t       thread, self;
1291
1292         assert(task->active);
1293         assert(task->suspend_count > 0);
1294
1295         self = current_thread();
1296
1297         /*
1298          *      Iterate through all the threads and wait for them to
1299          *      stop.  Do not wait for the current thread if it is within
1300          *      the task.
1301          */
1302         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1303                 if (thread != self)
1304                         thread_wait(thread, until_not_runnable);
1305         }
1306 }
1307
1308 /*
1309  *      task_release_locked:
1310  *
1311  *      Release a kernel hold on a task.
1312  *
1313  *      CONDITIONS: the task is locked and active
1314  */
1315 void
1316 task_release_locked(
1317         register task_t         task)
1318 {
1319         register thread_t       thread;
1320
1321         assert(task->active);
1322         assert(task->suspend_count > 0);
1323
1324         if (--task->suspend_count > 0)
1325                 return;
1326
1327         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1328                 thread_mtx_lock(thread);
1329                 thread_release(thread);
1330                 thread_mtx_unlock(thread);
1331         }
1332 }
1333
1334 /*
1335  *      task_release:
1336  *
1337  *      Same as the internal routine above, except that it must lock
1338  *      and verify that the task is active.
1339  *
1340  *      CONDITIONS: The caller holds a reference to the task
1341  */
1342 kern_return_t
1343 task_release(
1344         task_t          task)
1345 {
1346         if (task == TASK_NULL)
1347                 return (KERN_INVALID_ARGUMENT);
1348
1349         task_lock(task);
1350
1351         if (!task->active) {
1352                 task_unlock(task);
1353
1354                 return (KERN_FAILURE);
1355         }
1356
1357         task_release_locked(task);
1358         task_unlock(task);
1359
1360         return (KERN_SUCCESS);
1361 }
1362
1363 kern_return_t
1364 task_threads(
1365         task_t                                  task,
1366         thread_act_array_t              *threads_out,
1367         mach_msg_type_number_t  *count)
1368 {
1369         mach_msg_type_number_t  actual;
1370         thread_t                                *thread_list;
1371         thread_t                                thread;
1372         vm_size_t                               size, size_needed;
1373         void                                    *addr;
1374         unsigned int                    i, j;
1375
1376         if (task == TASK_NULL)
1377                 return (KERN_INVALID_ARGUMENT);
1378
1379         size = 0; addr = NULL;
1380
1381         for (;;) {
1382                 task_lock(task);
1383                 if (!task->active) {
1384                         task_unlock(task);
1385
1386                         if (size != 0)
1387                                 kfree(addr, size);
1388
1389                         return (KERN_FAILURE);
1390                 }
1391
1392                 actual = task->thread_count;
1393
1394                 /* do we have the memory we need? */
1395                 size_needed = actual * sizeof (mach_port_t);
1396                 if (size_needed <= size)
1397                         break;
1398
1399                 /* unlock the task and allocate more memory */
1400                 task_unlock(task);
1401
1402                 if (size != 0)
1403                         kfree(addr, size);
1404
1405                 assert(size_needed > 0);
1406                 size = size_needed;
1407
1408                 addr = kalloc(size);
1409                 if (addr == 0)
1410                         return (KERN_RESOURCE_SHORTAGE);
1411         }
1412
1413         /* OK, have memory and the task is locked & active */
1414         thread_list = (thread_t *)addr;
1415
1416         i = j = 0;
1417
1418         for (thread = (thread_t)queue_first(&task->threads); i < actual;
1419                                 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
1420                 thread_reference_internal(thread);
1421                 thread_list[j++] = thread;
1422         }
1423
1424         assert(queue_end(&task->threads, (queue_entry_t)thread));
1425
1426         actual = j;
1427         size_needed = actual * sizeof (mach_port_t);
1428
1429         /* can unlock task now that we've got the thread refs */
1430         task_unlock(task);
1431
1432         if (actual == 0) {
1433                 /* no threads, so return null pointer and deallocate memory */
1434
1435                 *threads_out = NULL;
1436                 *count = 0;
1437
1438                 if (size != 0)
1439                         kfree(addr, size);
1440         }
1441         else {
1442                 /* if we allocated too much, must copy */
1443
1444                 if (size_needed < size) {
1445                         void *newaddr;
1446
1447                         newaddr = kalloc(size_needed);
1448                         if (newaddr == 0) {
1449                                 for (i = 0; i < actual; ++i)
1450                                         thread_deallocate(thread_list[i]);
1451                                 kfree(addr, size);
1452                                 return (KERN_RESOURCE_SHORTAGE);
1453                         }
1454
1455                         bcopy(addr, newaddr, size_needed);
1456                         kfree(addr, size);
1457                         thread_list = (thread_t *)newaddr;
1458                 }
1459
1460                 *threads_out = thread_list;
1461                 *count = actual;
1462
1463                 /* do the conversion that Mig should handle */
1464
1465                 for (i = 0; i < actual; ++i)
1466                         ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
1467         }
1468
1469         return (KERN_SUCCESS);
1470 }
1471
1472 #define TASK_HOLD_NORMAL        0
1473 #define TASK_HOLD_PIDSUSPEND    1
1474 #define TASK_HOLD_LEGACY        2
1475 #define TASK_HOLD_LEGACY_ALL    3
1476
1477 static kern_return_t
1478 place_task_hold    (
1479         register task_t task,
1480         int mode)
1481 {
1482         if (!task->active) {
1483                 return (KERN_FAILURE);
1484         }
1485
1486         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1487             MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
1488             proc_pid(task->bsd_info), ((thread_t)queue_first(&task->threads))->thread_id,
1489             task->user_stop_count, task->user_stop_count + 1, 0);
1490
1491 #if MACH_ASSERT
1492         current_task()->suspends_outstanding++;
1493 #endif
1494
1495         if (mode == TASK_HOLD_LEGACY)
1496                 task->legacy_stop_count++;
1497
1498         if (task->user_stop_count++ > 0) {
1499                 /*
1500                  *      If the stop count was positive, the task is
1501                  *      already stopped and we can exit.
1502                  */
1503                 return (KERN_SUCCESS);
1504         }
1505
1506         /*
1507          * Put a kernel-level hold on the threads in the task (all
1508          * user-level task suspensions added together represent a
1509          * single kernel-level hold).  We then wait for the threads
1510          * to stop executing user code.
1511          */
1512         task_hold_locked(task);
1513         task_wait_locked(task, FALSE);
1514
1515         return (KERN_SUCCESS);
1516 }
1517
1518 static kern_return_t
1519 release_task_hold    (
1520         register task_t         task,
1521         int                     mode)
1522 {
1523         register boolean_t release = FALSE;
1524
1525         if (!task->active) {
1526                 return (KERN_FAILURE);
1527         }
1528
1529         if (mode == TASK_HOLD_PIDSUSPEND) {
1530             if (task->pidsuspended == FALSE) {
1531                     return (KERN_FAILURE);
1532             }
1533             task->pidsuspended = FALSE;
1534         }
1535
1536         if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
1537
1538                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1539                     MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
1540                     proc_pid(task->bsd_info), ((thread_t)queue_first(&task->threads))->thread_id,
1541                     task->user_stop_count, mode, task->legacy_stop_count);
1542
1543 #if MACH_ASSERT
1544                 /*
1545                  * This is obviously not robust; if we suspend one task and then resume a different one,
1546                  * we'll fly under the radar. This is only meant to catch the common case of a crashed
1547                  * or buggy suspender.
1548                  */
1549                 current_task()->suspends_outstanding--;
1550 #endif
1551
1552                 if (mode == TASK_HOLD_LEGACY_ALL) {
1553                         if (task->legacy_stop_count >= task->user_stop_count) {
1554                                 task->user_stop_count = 0;
1555                                 release = TRUE;
1556                         } else {
1557                                 task->user_stop_count -= task->legacy_stop_count;
1558                         }
1559                         task->legacy_stop_count = 0;
1560                 } else {
1561                         if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
1562                                 task->legacy_stop_count--;
1563                         if (--task->user_stop_count == 0)
1564                                 release = TRUE;
1565                 }
1566         }
1567         else {
1568                 return (KERN_FAILURE);
1569         }
1570
1571         /*
1572          *      Release the task if necessary.
1573          */
1574         if (release)
1575                 task_release_locked(task);
1576
1577     return (KERN_SUCCESS);
1578 }
1579
1580
1581 /*
1582  *      task_suspend:
1583  *
1584  *      Implement an (old-fashioned) user-level suspension on a task.
1585  *
1586  *      Because the user isn't expecting to have to manage a suspension
1587  *      token, we'll track it for him in the kernel in the form of a naked
1588  *      send right to the task's resume port.  All such send rights
1589  *      account for a single suspension against the task (unlike task_suspend2()
1590  *      where each caller gets a unique suspension count represented by a
1591  *      unique send-once right).
1592  *
1593  * Conditions:
1594  *      The caller holds a reference to the task
1595  */
1596 kern_return_t
1597 task_suspend(
1598         register task_t         task)
1599 {
1600         kern_return_t                   kr;
1601         mach_port_t                     port, send, old_notify;
1602         mach_port_name_t                name;
1603
1604         if (task == TASK_NULL || task == kernel_task)
1605                 return (KERN_INVALID_ARGUMENT);
1606
1607         task_lock(task);
1608
1609         /*
1610          * Claim a send right on the task resume port, and request a no-senders
1611          * notification on that port (if none outstanding).
1612          */
1613         if (task->itk_resume == IP_NULL) {
1614                 task->itk_resume = ipc_port_alloc_kernel();
1615                 if (!IP_VALID(task->itk_resume))
1616                         panic("failed to create resume port");
1617                 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
1618         }
1619
1620         port = task->itk_resume;
1621         ip_lock(port);
1622         assert(ip_active(port));
1623
1624         send = ipc_port_make_send_locked(port);
1625         assert(IP_VALID(send));
1626
1627         if (port->ip_nsrequest == IP_NULL) {
1628                 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
1629                 assert(old_notify == IP_NULL);
1630                 /* port unlocked */
1631         } else {
1632                 ip_unlock(port);
1633         }
1634
1635         /*
1636          * place a legacy hold on the task.
1637          */
1638         kr = place_task_hold(task, TASK_HOLD_LEGACY);
1639         if (kr != KERN_SUCCESS) {
1640                 task_unlock(task);
1641                 ipc_port_release_send(send);
1642                 return kr;
1643         }
1644
1645         task_unlock(task);
1646
1647         /*
1648          * Copyout the send right into the calling task's IPC space.  It won't know it is there,
1649          * but we'll look it up when calling a traditional resume.  Any IPC operations that
1650          * deallocate the send right will auto-release the suspension.
1651          */
1652         if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
1653                 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
1654                 printf("warning: %s(%d) failed to copyout suspension token for task %s(%d) with error: %d\n",
1655                         proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
1656                         proc_name_address(task->bsd_info), proc_pid(task->bsd_info), kr);
1657                 return (kr);
1658         }
1659
1660         return (kr);
1661 }
1662
1663 /*
1664  *      task_resume:
1665  *              Release a user hold on a task.
1666  *
1667  * Conditions:
1668  *              The caller holds a reference to the task
1669  */
1670 kern_return_t
1671 task_resume(
1672         register task_t task)
1673 {
1674         kern_return_t    kr;
1675         mach_port_name_t resume_port_name;
1676         ipc_entry_t              resume_port_entry;
1677         ipc_space_t              space = current_task()->itk_space;
1678
1679         if (task == TASK_NULL || task == kernel_task )
1680                 return (KERN_INVALID_ARGUMENT);
1681
1682         /* release a legacy task hold */
1683         task_lock(task);
1684         kr = release_task_hold(task, TASK_HOLD_LEGACY);
1685         task_unlock(task);
1686
1687         is_write_lock(space);
1688         if (is_active(space) && IP_VALID(task->itk_resume) &&
1689             ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
1690                 /*
1691                  * We found a suspension token in the caller's IPC space. Release a send right to indicate that
1692                  * we are holding one less legacy hold on the task from this caller.  If the release failed,
1693                  * go ahead and drop all the rights, as someone either already released our holds or the task
1694                  * is gone.
1695                  */
1696                 if (kr == KERN_SUCCESS)
1697                         ipc_right_dealloc(space, resume_port_name, resume_port_entry);
1698                 else
1699                         ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
1700                 /* space unlocked */
1701         } else {
1702                 is_write_unlock(space);
1703                 if (kr == KERN_SUCCESS)
1704                         printf("warning: %s(%d) performed out-of-band resume on %s(%d)\n",
1705                                proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
1706                                proc_name_address(task->bsd_info), proc_pid(task->bsd_info));
1707         }
1708
1709         return kr;
1710 }
1711
1712 /*
1713  * Suspend the target task.
1714  * Making/holding a token/reference/port is the callers responsibility.
1715  */
1716 kern_return_t
1717 task_suspend_internal(task_t task)
1718 {
1719         kern_return_t    kr;
1720
1721         if (task == TASK_NULL || task == kernel_task)
1722                 return (KERN_INVALID_ARGUMENT);
1723
1724         task_lock(task);
1725         kr = place_task_hold(task, TASK_HOLD_NORMAL);
1726         task_unlock(task);
1727         return (kr);
1728 }
1729
1730 /*
1731  * Suspend the target task, and return a suspension token. The token
1732  * represents a reference on the suspended task.
1733  */
1734 kern_return_t
1735 task_suspend2(
1736         register task_t                 task,
1737         task_suspension_token_t *suspend_token)
1738 {
1739         kern_return_t    kr;
1740
1741         kr = task_suspend_internal(task);
1742         if (kr != KERN_SUCCESS) {
1743                 *suspend_token = TASK_NULL;
1744                 return (kr);
1745         }
1746
1747         /*
1748          * Take a reference on the target task and return that to the caller
1749          * as a "suspension token," which can be converted into an SO right to
1750          * the now-suspended task's resume port.
1751          */
1752         task_reference_internal(task);
1753         *suspend_token = task;
1754
1755         return (KERN_SUCCESS);
1756 }
1757
1758 /*
1759  * Resume the task
1760  * (reference/token/port management is caller's responsibility).
1761  */
1762 kern_return_t
1763 task_resume_internal(
1764         register task_suspension_token_t                task)
1765 {
1766         kern_return_t kr;
1767
1768         if (task == TASK_NULL || task == kernel_task)
1769                 return (KERN_INVALID_ARGUMENT);
1770
1771         task_lock(task);
1772         kr = release_task_hold(task, TASK_HOLD_NORMAL);
1773         task_unlock(task);
1774         return (kr);
1775 }
1776
1777 /*
1778  * Resume the task using a suspension token. Consumes the token's ref.
1779  */
1780 kern_return_t
1781 task_resume2(
1782         register task_suspension_token_t                task)
1783 {
1784         kern_return_t kr;
1785
1786         kr = task_resume_internal(task);
1787         task_suspension_token_deallocate(task);
1788
1789         return (kr);
1790 }
1791
1792 boolean_t
1793 task_suspension_notify(mach_msg_header_t *request_header)
1794 {
1795         ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
1796         task_t task = convert_port_to_task_suspension_token(port);
1797         mach_msg_type_number_t not_count;
1798
1799         if (task == TASK_NULL || task == kernel_task)
1800                 return TRUE;  /* nothing to do */
1801
1802         switch (request_header->msgh_id) {
1803
1804         case MACH_NOTIFY_SEND_ONCE:
1805                 /* release the hold held by this specific send-once right */
1806                 task_lock(task);
1807                 release_task_hold(task, TASK_HOLD_NORMAL);
1808                 task_unlock(task);
1809                 break;
1810
1811         case MACH_NOTIFY_NO_SENDERS:
1812                 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
1813
1814                 task_lock(task);
1815                 ip_lock(port);
1816                 if (port->ip_mscount == not_count) {
1817
1818                         /* release all the [remaining] outstanding legacy holds */
1819                         assert(port->ip_nsrequest == IP_NULL);
1820                         ip_unlock(port);
1821                         release_task_hold(task, TASK_HOLD_LEGACY_ALL);
1822                         task_unlock(task);
1823
1824                 } else if (port->ip_nsrequest == IP_NULL) {
1825                         ipc_port_t old_notify;
1826
1827                         task_unlock(task);
1828                         /* new send rights, re-arm notification at current make-send count */
1829                         ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
1830                         assert(old_notify == IP_NULL);
1831                         /* port unlocked */
1832                 } else {
1833                         ip_unlock(port);
1834                         task_unlock(task);
1835                 }
1836                 break;
1837
1838         default:
1839                 break;
1840         }
1841
1842         task_suspension_token_deallocate(task); /* drop token reference */
1843         return TRUE;
1844 }
1845
1846 kern_return_t
1847 task_pidsuspend_locked(task_t task)
1848 {
1849         kern_return_t kr;
1850
1851         if (task->pidsuspended) {
1852                 kr = KERN_FAILURE;
1853                 goto out;
1854         }
1855
1856         task->pidsuspended = TRUE;
1857
1858         kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
1859         if (kr != KERN_SUCCESS) {
1860                 task->pidsuspended = FALSE;
1861         }
1862 out:
1863         return(kr);
1864 }
1865
1866
1867 /*
1868  *      task_pidsuspend:
1869  *
1870  *      Suspends a task by placing a hold on its threads.
1871  *
1872  * Conditions:
1873  *      The caller holds a reference to the task
1874  */
1875 kern_return_t
1876 task_pidsuspend(
1877         register task_t         task)
1878 {
1879         kern_return_t    kr;
1880
1881         if (task == TASK_NULL || task == kernel_task)
1882                 return (KERN_INVALID_ARGUMENT);
1883
1884         task_lock(task);
1885
1886         kr = task_pidsuspend_locked(task);
1887
1888         task_unlock(task);
1889
1890         return (kr);
1891 }
1892
1893 /* If enabled, we bring all the frozen pages back in prior to resumption; otherwise, they're faulted back in on demand */
1894 #define THAW_ON_RESUME 1
1895
1896 /*
1897  *      task_pidresume:
1898  *              Resumes a previously suspended task.
1899  *
1900  * Conditions:
1901  *              The caller holds a reference to the task
1902  */
1903 kern_return_t
1904 task_pidresume(
1905         register task_t task)
1906 {
1907         kern_return_t    kr;
1908
1909         if (task == TASK_NULL || task == kernel_task)
1910                 return (KERN_INVALID_ARGUMENT);
1911
1912         task_lock(task);
1913
1914 #if (CONFIG_FREEZE && THAW_ON_RESUME)
1915
1916         while (task->changing_freeze_state) {
1917
1918                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
1919                 task_unlock(task);
1920                 thread_block(THREAD_CONTINUE_NULL);
1921
1922                 task_lock(task);
1923         }
1924         task->changing_freeze_state = TRUE;
1925 #endif
1926
1927         kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
1928
1929         task_unlock(task);
1930
1931 #if (CONFIG_FREEZE && THAW_ON_RESUME)
1932         if ((kr == KERN_SUCCESS) && (task->frozen == TRUE)) {
1933
1934                 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
1935
1936                         kr = KERN_SUCCESS;
1937                 } else {
1938
1939                         kr = vm_map_thaw(task->map);
1940                 }
1941         }
1942         task_lock(task);
1943
1944         if (kr == KERN_SUCCESS)
1945                 task->frozen = FALSE;
1946         task->changing_freeze_state = FALSE;
1947         thread_wakeup(&task->changing_freeze_state);
1948
1949         task_unlock(task);
1950 #endif
1951
1952         return (kr);
1953 }
1954
1955 #if CONFIG_FREEZE
1956
1957 /*
1958  *      task_freeze:
1959  *
1960  *      Freeze a task.
1961  *
1962  * Conditions:
1963  *      The caller holds a reference to the task
1964  */
1965 kern_return_t
1966 task_freeze(
1967         register task_t    task,
1968         uint32_t           *purgeable_count,
1969         uint32_t           *wired_count,
1970         uint32_t           *clean_count,
1971         uint32_t           *dirty_count,
1972         uint32_t           dirty_budget,
1973         boolean_t          *shared,
1974         boolean_t          walk_only)
1975 {
1976         kern_return_t kr;
1977
1978         if (task == TASK_NULL || task == kernel_task)
1979                 return (KERN_INVALID_ARGUMENT);
1980
1981         task_lock(task);
1982
1983         while (task->changing_freeze_state) {
1984
1985                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
1986                 task_unlock(task);
1987                 thread_block(THREAD_CONTINUE_NULL);
1988
1989                 task_lock(task);
1990         }
1991         if (task->frozen) {
1992                 task_unlock(task);
1993                 return (KERN_FAILURE);
1994         }
1995         task->changing_freeze_state = TRUE;
1996
1997         task_unlock(task);
1998
1999         if (walk_only) {
2000                 kr = vm_map_freeze_walk(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2001         } else {
2002                 kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2003         }
2004
2005         task_lock(task);
2006
2007         if (walk_only == FALSE && kr == KERN_SUCCESS)
2008                 task->frozen = TRUE;
2009         task->changing_freeze_state = FALSE;
2010         thread_wakeup(&task->changing_freeze_state);
2011
2012         task_unlock(task);
2013
2014         return (kr);
2015 }
2016
2017 /*
2018  *      task_thaw:
2019  *
2020  *      Thaw a currently frozen task.
2021  *
2022  * Conditions:
2023  *      The caller holds a reference to the task
2024  */
2025 extern void
2026 vm_consider_waking_compactor_swapper(void);
2027
2028 kern_return_t
2029 task_thaw(
2030         register task_t         task)
2031 {
2032         kern_return_t kr;
2033
2034         if (task == TASK_NULL || task == kernel_task)
2035                 return (KERN_INVALID_ARGUMENT);
2036
2037         task_lock(task);
2038
2039         while (task->changing_freeze_state) {
2040
2041                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2042                 task_unlock(task);
2043                 thread_block(THREAD_CONTINUE_NULL);
2044
2045                 task_lock(task);
2046         }
2047         if (!task->frozen) {
2048                 task_unlock(task);
2049                 return (KERN_FAILURE);
2050         }
2051         task->changing_freeze_state = TRUE;
2052
2053         if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) {
2054                 task_unlock(task);
2055
2056                 kr = vm_map_thaw(task->map);
2057
2058                 task_lock(task);
2059
2060                 if (kr == KERN_SUCCESS)
2061                         task->frozen = FALSE;
2062         } else {
2063                 task->frozen = FALSE;
2064                 kr = KERN_SUCCESS;
2065         }
2066
2067         task->changing_freeze_state = FALSE;
2068         thread_wakeup(&task->changing_freeze_state);
2069
2070         task_unlock(task);
2071
2072         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2073                 vm_consider_waking_compactor_swapper();
2074         }
2075
2076         return (kr);
2077 }
2078
2079 #endif /* CONFIG_FREEZE */
2080
2081 kern_return_t
2082 host_security_set_task_token(
2083         host_security_t  host_security,
2084         task_t           task,
2085         security_token_t sec_token,
2086         audit_token_t    audit_token,
2087         host_priv_t      host_priv)
2088 {
2089         ipc_port_t       host_port;
2090         kern_return_t    kr;
2091
2092         if (task == TASK_NULL)
2093                 return(KERN_INVALID_ARGUMENT);
2094
2095         if (host_security == HOST_NULL)
2096                 return(KERN_INVALID_SECURITY);
2097
2098         task_lock(task);
2099         task->sec_token = sec_token;
2100         task->audit_token = audit_token;
2101
2102         task_unlock(task);
2103
2104         if (host_priv != HOST_PRIV_NULL) {
2105                 kr = host_get_host_priv_port(host_priv, &host_port);
2106         } else {
2107                 kr = host_get_host_port(host_priv_self(), &host_port);
2108         }
2109         assert(kr == KERN_SUCCESS);
2110         kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
2111         return(kr);
2112 }
2113
2114 /*
2115  * This routine was added, pretty much exclusively, for registering the
2116  * RPC glue vector for in-kernel short circuited tasks.  Rather than
2117  * removing it completely, I have only disabled that feature (which was
2118  * the only feature at the time).  It just appears that we are going to
2119  * want to add some user data to tasks in the future (i.e. bsd info,
2120  * task names, etc...), so I left it in the formal task interface.
2121  */
2122 kern_return_t
2123 task_set_info(
2124         task_t          task,
2125         task_flavor_t   flavor,
2126         __unused task_info_t    task_info_in,           /* pointer to IN array */
2127         __unused mach_msg_type_number_t task_info_count)
2128 {
2129         if (task == TASK_NULL)
2130                 return(KERN_INVALID_ARGUMENT);
2131
2132         switch (flavor) {
2133             default:
2134                 return (KERN_INVALID_ARGUMENT);
2135         }
2136         return (KERN_SUCCESS);
2137 }
2138
2139 kern_return_t
2140 task_info(
2141         task_t                  task,
2142         task_flavor_t           flavor,
2143         task_info_t             task_info_out,
2144         mach_msg_type_number_t  *task_info_count)
2145 {
2146         kern_return_t error = KERN_SUCCESS;
2147
2148         if (task == TASK_NULL)
2149                 return (KERN_INVALID_ARGUMENT);
2150
2151         task_lock(task);
2152
2153         if ((task != current_task()) && (!task->active)) {
2154                 task_unlock(task);
2155                 return (KERN_INVALID_ARGUMENT);
2156         }
2157
2158         switch (flavor) {
2159
2160         case TASK_BASIC_INFO_32:
2161         case TASK_BASIC2_INFO_32:
2162         {
2163                 task_basic_info_32_t    basic_info;
2164                 vm_map_t                                map;
2165                 clock_sec_t                             secs;
2166                 clock_usec_t                    usecs;
2167
2168                 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
2169                     error = KERN_INVALID_ARGUMENT;
2170                     break;
2171                 }
2172
2173                 basic_info = (task_basic_info_32_t)task_info_out;
2174
2175                 map = (task == kernel_task)? kernel_map: task->map;
2176                 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
2177                 if (flavor == TASK_BASIC2_INFO_32) {
2178                         /*
2179                          * The "BASIC2" flavor gets the maximum resident
2180                          * size instead of the current resident size...
2181                          */
2182                         basic_info->resident_size = pmap_resident_max(map->pmap);
2183                 } else {
2184                         basic_info->resident_size = pmap_resident_count(map->pmap);
2185                 }
2186                 basic_info->resident_size *= PAGE_SIZE;
2187
2188                 basic_info->policy = ((task != kernel_task)?
2189                                                                                   POLICY_TIMESHARE: POLICY_RR);
2190                 basic_info->suspend_count = task->user_stop_count;
2191
2192                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2193                 basic_info->user_time.seconds =
2194                         (typeof(basic_info->user_time.seconds))secs;
2195                 basic_info->user_time.microseconds = usecs;
2196
2197                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2198                 basic_info->system_time.seconds =
2199                         (typeof(basic_info->system_time.seconds))secs;
2200                 basic_info->system_time.microseconds = usecs;
2201
2202                 *task_info_count = TASK_BASIC_INFO_32_COUNT;
2203                 break;
2204         }
2205
2206         case TASK_BASIC_INFO_64:
2207         {
2208                 task_basic_info_64_t    basic_info;
2209                 vm_map_t                                map;
2210                 clock_sec_t                             secs;
2211                 clock_usec_t                    usecs;
2212
2213                 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
2214                     error = KERN_INVALID_ARGUMENT;
2215                     break;
2216                 }
2217
2218                 basic_info = (task_basic_info_64_t)task_info_out;
2219
2220                 map = (task == kernel_task)? kernel_map: task->map;
2221                 basic_info->virtual_size  = map->size;
2222                 basic_info->resident_size =
2223                         (mach_vm_size_t)(pmap_resident_count(map->pmap))
2224                         * PAGE_SIZE_64;
2225
2226                 basic_info->policy = ((task != kernel_task)?
2227                                                                                   POLICY_TIMESHARE: POLICY_RR);
2228                 basic_info->suspend_count = task->user_stop_count;
2229
2230                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2231                 basic_info->user_time.seconds =
2232                         (typeof(basic_info->user_time.seconds))secs;
2233                 basic_info->user_time.microseconds = usecs;
2234
2235                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2236                 basic_info->system_time.seconds =
2237                         (typeof(basic_info->system_time.seconds))secs;
2238                 basic_info->system_time.microseconds = usecs;
2239
2240                 *task_info_count = TASK_BASIC_INFO_64_COUNT;
2241                 break;
2242         }
2243
2244         case MACH_TASK_BASIC_INFO:
2245         {
2246                 mach_task_basic_info_t  basic_info;
2247                 vm_map_t                map;
2248                 clock_sec_t             secs;
2249                 clock_usec_t            usecs;
2250
2251                 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
2252                     error = KERN_INVALID_ARGUMENT;
2253                     break;
2254                 }
2255
2256                 basic_info = (mach_task_basic_info_t)task_info_out;
2257
2258                 map = (task == kernel_task) ? kernel_map : task->map;
2259
2260                 basic_info->virtual_size  = map->size;
2261
2262                 basic_info->resident_size =
2263                     (mach_vm_size_t)(pmap_resident_count(map->pmap));
2264                 basic_info->resident_size *= PAGE_SIZE_64;
2265
2266                 basic_info->resident_size_max =
2267                     (mach_vm_size_t)(pmap_resident_max(map->pmap));
2268                 basic_info->resident_size_max *= PAGE_SIZE_64;
2269
2270                 basic_info->policy = ((task != kernel_task) ?
2271                                       POLICY_TIMESHARE : POLICY_RR);
2272
2273                 basic_info->suspend_count = task->user_stop_count;
2274
2275                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2276                 basic_info->user_time.seconds =
2277                     (typeof(basic_info->user_time.seconds))secs;
2278                 basic_info->user_time.microseconds = usecs;
2279
2280                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2281                 basic_info->system_time.seconds =
2282                     (typeof(basic_info->system_time.seconds))secs;
2283                 basic_info->system_time.microseconds = usecs;
2284
2285                 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
2286                 break;
2287         }
2288
2289         case TASK_THREAD_TIMES_INFO:
2290         {
2291                 register task_thread_times_info_t       times_info;
2292                 register thread_t                                       thread;
2293
2294                 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
2295                     error = KERN_INVALID_ARGUMENT;
2296                     break;
2297                 }
2298
2299                 times_info = (task_thread_times_info_t) task_info_out;
2300                 times_info->user_time.seconds = 0;
2301                 times_info->user_time.microseconds = 0;
2302                 times_info->system_time.seconds = 0;
2303                 times_info->system_time.microseconds = 0;
2304
2305
2306                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2307                         time_value_t    user_time, system_time;
2308
2309                         if (thread->options & TH_OPT_IDLE_THREAD)
2310                                 continue;
2311
2312                         thread_read_times(thread, &user_time, &system_time);
2313
2314                         time_value_add(&times_info->user_time, &user_time);
2315                         time_value_add(&times_info->system_time, &system_time);
2316                 }
2317
2318                 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
2319                 break;
2320         }
2321
2322         case TASK_ABSOLUTETIME_INFO:
2323         {
2324                 task_absolutetime_info_t        info;
2325                 register thread_t                       thread;
2326
2327                 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
2328                         error = KERN_INVALID_ARGUMENT;
2329                         break;
2330                 }
2331
2332                 info = (task_absolutetime_info_t)task_info_out;
2333                 info->threads_user = info->threads_system = 0;
2334
2335
2336                 info->total_user = task->total_user_time;
2337                 info->total_system = task->total_system_time;
2338
2339                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2340                         uint64_t        tval;
2341                         spl_t           x;
2342
2343                         if (thread->options & TH_OPT_IDLE_THREAD)
2344                                 continue;
2345
2346                         x = splsched();
2347                         thread_lock(thread);
2348
2349                         tval = timer_grab(&thread->user_timer);
2350                         info->threads_user += tval;
2351                         info->total_user += tval;
2352
2353                         tval = timer_grab(&thread->system_timer);
2354                         if (thread->precise_user_kernel_time) {
2355                                 info->threads_system += tval;
2356                                 info->total_system += tval;
2357                         } else {
2358                                 /* system_timer may represent either sys or user */
2359                                 info->threads_user += tval;
2360                                 info->total_user += tval;
2361                         }
2362
2363                         thread_unlock(thread);
2364                         splx(x);
2365                 }
2366
2367
2368                 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
2369                 break;
2370         }
2371
2372         case TASK_DYLD_INFO:
2373         {
2374                 task_dyld_info_t info;
2375
2376                 /*
2377                  * We added the format field to TASK_DYLD_INFO output.  For
2378                  * temporary backward compatibility, accept the fact that
2379                  * clients may ask for the old version - distinquished by the
2380                  * size of the expected result structure.
2381                  */
2382 #define TASK_LEGACY_DYLD_INFO_COUNT \
2383                 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
2384
2385                 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
2386                         error = KERN_INVALID_ARGUMENT;
2387                         break;
2388                 }
2389
2390                 info = (task_dyld_info_t)task_info_out;
2391                 info->all_image_info_addr = task->all_image_info_addr;
2392                 info->all_image_info_size = task->all_image_info_size;
2393
2394                 /* only set format on output for those expecting it */
2395                 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
2396                         info->all_image_info_format = task_has_64BitAddr(task) ?
2397                                                  TASK_DYLD_ALL_IMAGE_INFO_64 :
2398                                                  TASK_DYLD_ALL_IMAGE_INFO_32 ;
2399                         *task_info_count = TASK_DYLD_INFO_COUNT;
2400                 } else {
2401                         *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
2402                 }
2403                 break;
2404         }
2405
2406         case TASK_EXTMOD_INFO:
2407         {
2408                 task_extmod_info_t info;
2409                 void *p;
2410
2411                 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
2412                         error = KERN_INVALID_ARGUMENT;
2413                         break;
2414                 }
2415
2416                 info = (task_extmod_info_t)task_info_out;
2417
2418                 p = get_bsdtask_info(task);
2419                 if (p) {
2420                         proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
2421                 } else {
2422                         bzero(info->task_uuid, sizeof(info->task_uuid));
2423                 }
2424                 info->extmod_statistics = task->extmod_statistics;
2425                 *task_info_count = TASK_EXTMOD_INFO_COUNT;
2426
2427                 break;
2428         }
2429
2430         case TASK_KERNELMEMORY_INFO:
2431         {
2432                 task_kernelmemory_info_t        tkm_info;
2433                 ledger_amount_t                 credit, debit;
2434
2435                 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
2436                    error = KERN_INVALID_ARGUMENT;
2437                    break;
2438                 }
2439
2440                 tkm_info = (task_kernelmemory_info_t) task_info_out;
2441                 tkm_info->total_palloc = 0;
2442                 tkm_info->total_pfree = 0;
2443                 tkm_info->total_salloc = 0;
2444                 tkm_info->total_sfree = 0;
2445
2446                 if (task == kernel_task) {
2447                         /*
2448                          * All shared allocs/frees from other tasks count against
2449                          * the kernel private memory usage.  If we are looking up
2450                          * info for the kernel task, gather from everywhere.
2451                          */
2452                         task_unlock(task);
2453
2454                         /* start by accounting for all the terminated tasks against the kernel */
2455                         tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
2456                         tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
2457
2458                         /* count all other task/thread shared alloc/free against the kernel */
2459                         lck_mtx_lock(&tasks_threads_lock);
2460
2461                         /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
2462                         queue_iterate(&tasks, task, task_t, tasks) {
2463                                 if (task == kernel_task) {
2464                                         if (ledger_get_entries(task->ledger,
2465                                             task_ledgers.tkm_private, &credit,
2466                                             &debit) == KERN_SUCCESS) {
2467                                                 tkm_info->total_palloc += credit;
2468                                                 tkm_info->total_pfree += debit;
2469                                         }
2470                                 }
2471                                 if (!ledger_get_entries(task->ledger,
2472                                     task_ledgers.tkm_shared, &credit, &debit)) {
2473                                         tkm_info->total_palloc += credit;
2474                                         tkm_info->total_pfree += debit;
2475                                 }
2476                         }
2477                         lck_mtx_unlock(&tasks_threads_lock);
2478                 } else {
2479                         if (!ledger_get_entries(task->ledger,
2480                             task_ledgers.tkm_private, &credit, &debit)) {
2481                                 tkm_info->total_palloc = credit;
2482                                 tkm_info->total_pfree = debit;
2483                         }
2484                         if (!ledger_get_entries(task->ledger,
2485                             task_ledgers.tkm_shared, &credit, &debit)) {
2486                                 tkm_info->total_salloc = credit;
2487                                 tkm_info->total_sfree = debit;
2488                         }
2489                         task_unlock(task);
2490                 }
2491
2492                 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
2493                 return KERN_SUCCESS;
2494         }
2495
2496         /* OBSOLETE */
2497         case TASK_SCHED_FIFO_INFO:
2498         {
2499
2500                 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
2501                         error = KERN_INVALID_ARGUMENT;
2502                         break;
2503                 }
2504
2505                 error = KERN_INVALID_POLICY;
2506                 break;
2507         }
2508
2509         /* OBSOLETE */
2510         case TASK_SCHED_RR_INFO:
2511         {
2512                 register policy_rr_base_t       rr_base;
2513                 uint32_t quantum_time;
2514                 uint64_t quantum_ns;
2515
2516                 if (*task_info_count < POLICY_RR_BASE_COUNT) {
2517                         error = KERN_INVALID_ARGUMENT;
2518                         break;
2519                 }
2520
2521                 rr_base = (policy_rr_base_t) task_info_out;
2522
2523                 if (task != kernel_task) {
2524                         error = KERN_INVALID_POLICY;
2525                         break;
2526                 }
2527
2528                 rr_base->base_priority = task->priority;
2529
2530                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
2531                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
2532
2533                 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
2534
2535                 *task_info_count = POLICY_RR_BASE_COUNT;
2536                 break;
2537         }
2538
2539         /* OBSOLETE */
2540         case TASK_SCHED_TIMESHARE_INFO:
2541         {
2542                 register policy_timeshare_base_t        ts_base;
2543
2544                 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
2545                         error = KERN_INVALID_ARGUMENT;
2546                         break;
2547                 }
2548
2549                 ts_base = (policy_timeshare_base_t) task_info_out;
2550
2551                 if (task == kernel_task) {
2552                         error = KERN_INVALID_POLICY;
2553                         break;
2554                 }
2555
2556                 ts_base->base_priority = task->priority;
2557
2558                 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
2559                 break;
2560         }
2561
2562         case TASK_SECURITY_TOKEN:
2563         {
2564                 register security_token_t       *sec_token_p;
2565
2566                 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
2567                     error = KERN_INVALID_ARGUMENT;
2568                     break;
2569                 }
2570
2571                 sec_token_p = (security_token_t *) task_info_out;
2572
2573                 *sec_token_p = task->sec_token;
2574
2575                 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
2576                 break;
2577         }
2578
2579         case TASK_AUDIT_TOKEN:
2580         {
2581                 register audit_token_t  *audit_token_p;
2582
2583                 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
2584                     error = KERN_INVALID_ARGUMENT;
2585                     break;
2586                 }
2587
2588                 audit_token_p = (audit_token_t *) task_info_out;
2589
2590                 *audit_token_p = task->audit_token;
2591
2592                 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
2593                 break;
2594         }
2595
2596         case TASK_SCHED_INFO:
2597                 error = KERN_INVALID_ARGUMENT;
2598                 break;
2599
2600         case TASK_EVENTS_INFO:
2601         {
2602                 register task_events_info_t     events_info;
2603                 register thread_t                       thread;
2604
2605                 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
2606                    error = KERN_INVALID_ARGUMENT;
2607                    break;
2608                 }
2609
2610                 events_info = (task_events_info_t) task_info_out;
2611
2612
2613                 events_info->faults = task->faults;
2614                 events_info->pageins = task->pageins;
2615                 events_info->cow_faults = task->cow_faults;
2616                 events_info->messages_sent = task->messages_sent;
2617                 events_info->messages_received = task->messages_received;
2618                 events_info->syscalls_mach = task->syscalls_mach;
2619                 events_info->syscalls_unix = task->syscalls_unix;
2620
2621                 events_info->csw = task->c_switch;
2622
2623                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2624                         events_info->csw           += thread->c_switch;
2625                         events_info->syscalls_mach += thread->syscalls_mach;
2626                         events_info->syscalls_unix += thread->syscalls_unix;
2627                 }
2628
2629
2630                 *task_info_count = TASK_EVENTS_INFO_COUNT;
2631                 break;
2632         }
2633         case TASK_AFFINITY_TAG_INFO:
2634         {
2635                 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
2636                     error = KERN_INVALID_ARGUMENT;
2637                     break;
2638                 }
2639
2640                 error = task_affinity_info(task, task_info_out, task_info_count);
2641                 break;
2642         }
2643         case TASK_POWER_INFO:
2644         {
2645                 if (*task_info_count < TASK_POWER_INFO_COUNT) {
2646                         error = KERN_INVALID_ARGUMENT;
2647                         break;
2648                 }
2649
2650                 task_power_info_locked(task, (task_power_info_t)task_info_out);
2651                 break;
2652         }
2653
2654         case TASK_VM_INFO:
2655         case TASK_VM_INFO_PURGEABLE:
2656         {
2657                 task_vm_info_t          vm_info;
2658                 vm_map_t                map;
2659
2660                 if (*task_info_count < TASK_VM_INFO_COUNT) {
2661                     error = KERN_INVALID_ARGUMENT;
2662                     break;
2663                 }
2664
2665                 vm_info = (task_vm_info_t)task_info_out;
2666
2667                 if (task == kernel_task) {
2668                         map = kernel_map;
2669                         /* no lock */
2670                 } else {
2671                         map = task->map;
2672                         vm_map_lock_read(map);
2673                 }
2674
2675                 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
2676                 vm_info->region_count = map->hdr.nentries;
2677                 vm_info->page_size = vm_map_page_size(map);
2678
2679                 vm_info->resident_size = pmap_resident_count(map->pmap);
2680                 vm_info->resident_size *= PAGE_SIZE;
2681                 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
2682                 vm_info->resident_size_peak *= PAGE_SIZE;
2683
2684 #define _VM_INFO(_name) \
2685         vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
2686
2687                 _VM_INFO(device);
2688                 _VM_INFO(device_peak);
2689                 _VM_INFO(external);
2690                 _VM_INFO(external_peak);
2691                 _VM_INFO(internal);
2692                 _VM_INFO(internal_peak);
2693                 _VM_INFO(reusable);
2694                 _VM_INFO(reusable_peak);
2695                 _VM_INFO(compressed);
2696                 _VM_INFO(compressed_peak);
2697                 _VM_INFO(compressed_lifetime);
2698
2699                 vm_info->purgeable_volatile_pmap = 0;
2700                 vm_info->purgeable_volatile_resident = 0;
2701                 vm_info->purgeable_volatile_virtual = 0;
2702                 if (task == kernel_task) {
2703                         /*
2704                          * We do not maintain the detailed stats for the
2705                          * kernel_pmap, so just count everything as
2706                          * "internal"...
2707                          */
2708                         vm_info->internal = vm_info->resident_size;
2709                         /*
2710                          * ... but since the memory held by the VM compressor
2711                          * in the kernel address space ought to be attributed
2712                          * to user-space tasks, we subtract it from "internal"
2713                          * to give memory reporting tools a more accurate idea
2714                          * of what the kernel itself is actually using, instead
2715                          * of making it look like the kernel is leaking memory
2716                          * when the system is under memory pressure.
2717                          */
2718                         vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
2719                                               PAGE_SIZE);
2720                 } else {
2721                         mach_vm_size_t  volatile_virtual_size;
2722                         mach_vm_size_t  volatile_resident_size;
2723                         mach_vm_size_t  volatile_pmap_size;
2724                         kern_return_t   kr;
2725
2726                         if (flavor == TASK_VM_INFO_PURGEABLE) {
2727                                 kr = vm_map_query_volatile(
2728                                         map,
2729                                         &volatile_virtual_size,
2730                                         &volatile_resident_size,
2731                                         &volatile_pmap_size);
2732                                 if (kr == KERN_SUCCESS) {
2733                                         vm_info->purgeable_volatile_pmap =
2734                                                 volatile_pmap_size;
2735                                         vm_info->purgeable_volatile_resident =
2736                                                 volatile_resident_size;
2737                                         vm_info->purgeable_volatile_virtual =
2738                                                 volatile_virtual_size;
2739                                 }
2740                         }
2741                         vm_map_unlock_read(map);
2742                 }
2743
2744                 *task_info_count = TASK_VM_INFO_COUNT;
2745                 break;
2746         }
2747
2748         default:
2749                 error = KERN_INVALID_ARGUMENT;
2750         }
2751
2752         task_unlock(task);
2753         return (error);
2754 }
2755
2756 /*
2757  *      task_power_info
2758  *
2759  *      Returns power stats for the task.
2760  *      Note: Called with task locked.
2761  */
2762 void
2763 task_power_info_locked(
2764         task_t                  task,
2765         task_power_info_t       info)
2766 {
2767         thread_t                thread;
2768         ledger_amount_t         tmp;
2769
2770         task_lock_assert_owned(task);
2771
2772         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
2773                 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
2774         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
2775                 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
2776
2777         info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
2778         info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
2779
2780         info->total_user = task->total_user_time;
2781         info->total_system = task->total_system_time;
2782
2783         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2784                 uint64_t        tval;
2785                 spl_t           x;
2786
2787                 if (thread->options & TH_OPT_IDLE_THREAD)
2788                         continue;
2789
2790                 x = splsched();
2791                 thread_lock(thread);
2792
2793                 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
2794                 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
2795
2796                 tval = timer_grab(&thread->user_timer);
2797                 info->total_user += tval;
2798
2799                 tval = timer_grab(&thread->system_timer);
2800                 if (thread->precise_user_kernel_time) {
2801                         info->total_system += tval;
2802                 } else {
2803                         /* system_timer may represent either sys or user */
2804                         info->total_user += tval;
2805                 }
2806
2807                 thread_unlock(thread);
2808                 splx(x);
2809         }
2810 }
2811
2812 kern_return_t
2813 task_purgable_info(
2814         task_t                  task,
2815         task_purgable_info_t    *stats)
2816 {
2817         if (task == TASK_NULL || stats == NULL)
2818                 return KERN_INVALID_ARGUMENT;
2819         /* Take task reference */
2820         task_reference(task);
2821         vm_purgeable_stats((vm_purgeable_info_t)stats, task);
2822         /* Drop task reference */
2823         task_deallocate(task);
2824         return KERN_SUCCESS;
2825 }
2826
2827 void
2828 task_vtimer_set(
2829         task_t          task,
2830         integer_t       which)
2831 {
2832         thread_t        thread;
2833         spl_t           x;
2834
2835         /* assert(task == current_task()); */ /* bogus assert 4803227 4807483 */
2836
2837         task_lock(task);
2838
2839         task->vtimers |= which;
2840
2841         switch (which) {
2842
2843         case TASK_VTIMER_USER:
2844                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2845                         x = splsched();
2846                         thread_lock(thread);
2847                         if (thread->precise_user_kernel_time)
2848                                 thread->vtimer_user_save = timer_grab(&thread->user_timer);
2849                         else
2850                                 thread->vtimer_user_save = timer_grab(&thread->system_timer);
2851                         thread_unlock(thread);
2852                         splx(x);
2853                 }
2854                 break;
2855
2856         case TASK_VTIMER_PROF:
2857                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2858                         x = splsched();
2859                         thread_lock(thread);
2860                         thread->vtimer_prof_save = timer_grab(&thread->user_timer);
2861                         thread->vtimer_prof_save += timer_grab(&thread->system_timer);
2862                         thread_unlock(thread);
2863                         splx(x);
2864                 }
2865                 break;
2866
2867         case TASK_VTIMER_RLIM:
2868                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2869                         x = splsched();
2870                         thread_lock(thread);
2871                         thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
2872                         thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
2873                         thread_unlock(thread);
2874                         splx(x);
2875                 }
2876                 break;
2877         }
2878
2879         task_unlock(task);
2880 }
2881
2882 void
2883 task_vtimer_clear(
2884         task_t          task,
2885         integer_t       which)
2886 {
2887         assert(task == current_task());
2888
2889         task_lock(task);
2890
2891         task->vtimers &= ~which;
2892
2893         task_unlock(task);
2894 }
2895
2896 void
2897 task_vtimer_update(
2898 __unused
2899         task_t          task,
2900         integer_t       which,
2901         uint32_t        *microsecs)
2902 {
2903         thread_t        thread = current_thread();
2904         uint32_t        tdelt;
2905         clock_sec_t     secs;
2906         uint64_t        tsum;
2907
2908         assert(task == current_task());
2909
2910         assert(task->vtimers & which);
2911
2912         secs = tdelt = 0;
2913
2914         switch (which) {
2915
2916         case TASK_VTIMER_USER:
2917                 if (thread->precise_user_kernel_time) {
2918                         tdelt = (uint32_t)timer_delta(&thread->user_timer,
2919                                                                 &thread->vtimer_user_save);
2920                 } else {
2921                         tdelt = (uint32_t)timer_delta(&thread->system_timer,
2922                                                                 &thread->vtimer_user_save);
2923                 }
2924                 absolutetime_to_microtime(tdelt, &secs, microsecs);
2925                 break;
2926
2927         case TASK_VTIMER_PROF:
2928                 tsum = timer_grab(&thread->user_timer);
2929                 tsum += timer_grab(&thread->system_timer);
2930                 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
2931                 absolutetime_to_microtime(tdelt, &secs, microsecs);
2932                 /* if the time delta is smaller than a usec, ignore */
2933                 if (*microsecs != 0)
2934                         thread->vtimer_prof_save = tsum;
2935                 break;
2936
2937         case TASK_VTIMER_RLIM:
2938                 tsum = timer_grab(&thread->user_timer);
2939                 tsum += timer_grab(&thread->system_timer);
2940                 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
2941                 thread->vtimer_rlim_save = tsum;
2942                 absolutetime_to_microtime(tdelt, &secs, microsecs);
2943                 break;
2944         }
2945
2946 }
2947
2948 /*
2949  *      task_assign:
2950  *
2951  *      Change the assigned processor set for the task
2952  */
2953 kern_return_t
2954 task_assign(
2955         __unused task_t         task,
2956         __unused processor_set_t        new_pset,
2957         __unused boolean_t      assign_threads)
2958 {
2959         return(KERN_FAILURE);
2960 }
2961
2962 /*
2963  *      task_assign_default:
2964  *
2965  *      Version of task_assign to assign to default processor set.
2966  */
2967 kern_return_t
2968 task_assign_default(
2969         task_t          task,
2970         boolean_t       assign_threads)
2971 {
2972     return (task_assign(task, &pset0, assign_threads));
2973 }
2974
2975 /*
2976  *      task_get_assignment
2977  *
2978  *      Return name of processor set that task is assigned to.
2979  */
2980 kern_return_t
2981 task_get_assignment(
2982         task_t          task,
2983         processor_set_t *pset)
2984 {
2985         if (!task->active)
2986                 return(KERN_FAILURE);
2987
2988         *pset = &pset0;
2989
2990         return (KERN_SUCCESS);
2991 }
2992
2993
2994 /*
2995  *      task_policy
2996  *
2997  *      Set scheduling policy and parameters, both base and limit, for
2998  *      the given task. Policy must be a policy which is enabled for the
2999  *      processor set. Change contained threads if requested.
3000  */
3001 kern_return_t
3002 task_policy(
3003         __unused task_t                 task,
3004         __unused policy_t                       policy_id,
3005         __unused policy_base_t          base,
3006         __unused mach_msg_type_number_t count,
3007         __unused boolean_t                      set_limit,
3008         __unused boolean_t                      change)
3009 {
3010         return(KERN_FAILURE);
3011 }
3012
3013 /*
3014  *      task_set_policy
3015  *
3016  *      Set scheduling policy and parameters, both base and limit, for
3017  *      the given task. Policy can be any policy implemented by the
3018  *      processor set, whether enabled or not. Change contained threads
3019  *      if requested.
3020  */
3021 kern_return_t
3022 task_set_policy(
3023         __unused task_t                 task,
3024         __unused processor_set_t                pset,
3025         __unused policy_t                       policy_id,
3026         __unused policy_base_t          base,
3027         __unused mach_msg_type_number_t base_count,
3028         __unused policy_limit_t         limit,
3029         __unused mach_msg_type_number_t limit_count,
3030         __unused boolean_t                      change)
3031 {
3032         return(KERN_FAILURE);
3033 }
3034
3035 #if     FAST_TAS
3036 kern_return_t
3037 task_set_ras_pc(
3038         task_t          task,
3039         vm_offset_t     pc,
3040         vm_offset_t     endpc)
3041 {
3042         extern int fast_tas_debug;
3043
3044         if (fast_tas_debug) {
3045                 printf("task 0x%x: setting fast_tas to [0x%x, 0x%x]\n",
3046                        task, pc, endpc);
3047         }
3048         task_lock(task);
3049         task->fast_tas_base = pc;
3050         task->fast_tas_end =  endpc;
3051         task_unlock(task);
3052         return KERN_SUCCESS;
3053 }
3054 #else   /* FAST_TAS */
3055 kern_return_t
3056 task_set_ras_pc(
3057         __unused task_t task,
3058         __unused vm_offset_t    pc,
3059         __unused vm_offset_t    endpc)
3060 {
3061         return KERN_FAILURE;
3062 }
3063 #endif  /* FAST_TAS */
3064
3065 void
3066 task_synchronizer_destroy_all(task_t task)
3067 {
3068         semaphore_t     semaphore;
3069
3070         /*
3071          *  Destroy owned semaphores
3072          */
3073
3074         while (!queue_empty(&task->semaphore_list)) {
3075                 semaphore = (semaphore_t) queue_first(&task->semaphore_list);
3076                 (void) semaphore_destroy(task, semaphore);
3077         }
3078 }
3079
3080 /*
3081  * Install default (machine-dependent) initial thread state
3082  * on the task.  Subsequent thread creation will have this initial
3083  * state set on the thread by machine_thread_inherit_taskwide().
3084  * Flavors and structures are exactly the same as those to thread_set_state()
3085  */
3086 kern_return_t
3087 task_set_state(
3088         task_t task,
3089         int flavor,
3090         thread_state_t state,
3091         mach_msg_type_number_t state_count)
3092 {
3093         kern_return_t ret;
3094
3095         if (task == TASK_NULL) {
3096                 return (KERN_INVALID_ARGUMENT);
3097         }
3098
3099         task_lock(task);
3100
3101         if (!task->active) {
3102                 task_unlock(task);
3103                 return (KERN_FAILURE);
3104         }
3105
3106         ret = machine_task_set_state(task, flavor, state, state_count);
3107
3108         task_unlock(task);
3109         return ret;
3110 }
3111
3112 /*
3113  * Examine the default (machine-dependent) initial thread state
3114  * on the task, as set by task_set_state().  Flavors and structures
3115  * are exactly the same as those passed to thread_get_state().
3116  */
3117 kern_return_t
3118 task_get_state(
3119         task_t  task,
3120         int     flavor,
3121         thread_state_t state,
3122         mach_msg_type_number_t *state_count)
3123 {
3124         kern_return_t ret;
3125
3126         if (task == TASK_NULL) {
3127                 return (KERN_INVALID_ARGUMENT);
3128         }
3129
3130         task_lock(task);
3131
3132         if (!task->active) {
3133                 task_unlock(task);
3134                 return (KERN_FAILURE);
3135         }
3136
3137         ret = machine_task_get_state(task, flavor, state, state_count);
3138
3139         task_unlock(task);
3140         return ret;
3141 }
3142
3143 #if CONFIG_JETSAM
3144 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
3145
3146 void __attribute__((noinline))
3147 THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb)
3148 {
3149         task_t                                          task            = current_task();
3150         int                                                     pid         = 0;
3151         char                                    *procname       = (char *) "unknown";
3152         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
3153
3154 #ifdef MACH_BSD
3155         pid = proc_selfpid();
3156         if (task->bsd_info != NULL)
3157                 procname = proc_name_address(current_task()->bsd_info);
3158 #endif
3159
3160         if (hwm_user_cores) {
3161                 int                             error;
3162                 uint64_t                starttime, end;
3163                 clock_sec_t             secs = 0;
3164                 uint32_t                microsecs = 0;
3165
3166                 starttime = mach_absolute_time();
3167                 /*
3168                  * Trigger a coredump of this process. Don't proceed unless we know we won't
3169                  * be filling up the disk; and ignore the core size resource limit for this
3170                  * core file.
3171                  */
3172                 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, 1)) != 0) {
3173                         printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
3174                 }
3175                 /*
3176                 * coredump() leaves the task suspended.
3177                 */
3178                 task_resume_internal(current_task());
3179
3180                 end = mach_absolute_time();
3181                 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
3182                 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
3183                        proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
3184         }
3185
3186         if (disable_exc_resource) {
3187                 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
3188                         "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
3189                 return;
3190         }
3191
3192         printf("process %s[%d] crossed memory high watermark (%d MB); sending "
3193                 "EXC_RESOURCE.\n", procname, pid, max_footprint_mb);
3194
3195         code[0] = code[1] = 0;
3196         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
3197         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
3198         EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
3199         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
3200 }
3201
3202 /*
3203  * Callback invoked when a task exceeds its physical footprint limit.
3204  */
3205 void
3206 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
3207 {
3208         ledger_amount_t max_footprint_mb;
3209
3210         if (warning == LEDGER_WARNING_DIPPED_BELOW) {
3211                 /*
3212                  * Task memory limits only provide a warning on the way up.
3213                  */
3214                 return;
3215         }
3216
3217         ledger_get_limit(current_task()->ledger, task_ledgers.phys_footprint, &max_footprint_mb);
3218         max_footprint_mb >>= 20;
3219
3220         /*
3221          * If this an actual violation (not a warning),
3222          * generate a non-fatal high watermark EXC_RESOURCE.
3223          */
3224         if ((warning == 0) && (current_task()->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) {
3225                 THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE((int)max_footprint_mb);
3226         }
3227
3228         memorystatus_on_ledger_footprint_exceeded((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE,
3229                 (int)max_footprint_mb);
3230 }
3231
3232 extern int proc_check_footprint_priv(void);
3233
3234 kern_return_t
3235 task_set_phys_footprint_limit(
3236         task_t task,
3237         int new_limit_mb,
3238         int *old_limit_mb)
3239 {
3240         kern_return_t error;
3241
3242         if ((error = proc_check_footprint_priv())) {
3243                 return (KERN_NO_ACCESS);
3244         }
3245
3246         return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, FALSE);
3247 }
3248
3249 kern_return_t
3250 task_set_phys_footprint_limit_internal(
3251         task_t task,
3252         int new_limit_mb,
3253         int *old_limit_mb,
3254         boolean_t trigger_exception)
3255 {
3256         ledger_amount_t old;
3257
3258         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
3259
3260         if (old_limit_mb) {
3261                 *old_limit_mb = old >> 20;
3262         }
3263
3264         if (new_limit_mb == -1) {
3265                 /*
3266                  * Caller wishes to remove the limit.
3267                  */
3268                 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
3269                                  max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
3270                                  max_task_footprint ? PHYS_FOOTPRINT_WARNING_LEVEL : 0);
3271                 return (KERN_SUCCESS);
3272         }
3273
3274 #ifdef CONFIG_NOMONITORS
3275         return (KERN_SUCCESS);
3276 #endif /* CONFIG_NOMONITORS */
3277
3278         task_lock(task);
3279
3280         if (trigger_exception) {
3281                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
3282         } else {
3283                 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
3284         }
3285
3286         ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
3287                 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
3288
3289         task_unlock(task);
3290
3291         return (KERN_SUCCESS);
3292 }
3293
3294 kern_return_t
3295 task_get_phys_footprint_limit(
3296         task_t task,
3297         int *limit_mb)
3298 {
3299         ledger_amount_t limit;
3300
3301         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
3302         *limit_mb = limit >> 20;
3303
3304         return (KERN_SUCCESS);
3305 }
3306 #else /* CONFIG_JETSAM */
3307 kern_return_t
3308 task_set_phys_footprint_limit(
3309         __unused task_t task,
3310         __unused int new_limit_mb,
3311         __unused int *old_limit_mb)
3312 {
3313         return (KERN_FAILURE);
3314 }
3315
3316 kern_return_t
3317 task_get_phys_footprint_limit(
3318         __unused task_t task,
3319         __unused int *limit_mb)
3320 {
3321         return (KERN_FAILURE);
3322 }
3323 #endif /* CONFIG_JETSAM */
3324
3325 /*
3326  * We need to export some functions to other components that
3327  * are currently implemented in macros within the osfmk
3328  * component.  Just export them as functions of the same name.
3329  */
3330 boolean_t is_kerneltask(task_t t)
3331 {
3332         if (t == kernel_task)
3333                 return (TRUE);
3334
3335         return (FALSE);
3336 }
3337
3338 int
3339 check_for_tasksuspend(task_t task)
3340 {
3341
3342         if (task == TASK_NULL)
3343                 return (0);
3344
3345         return (task->suspend_count > 0);
3346 }
3347
3348 #undef current_task
3349 task_t current_task(void);
3350 task_t current_task(void)
3351 {
3352         return (current_task_fast());
3353 }
3354
3355 #undef task_reference
3356 void task_reference(task_t task);
3357 void
3358 task_reference(
3359         task_t          task)
3360 {
3361         if (task != TASK_NULL)
3362                 task_reference_internal(task);
3363 }
3364
3365 /*
3366  * This routine is called always with task lock held.
3367  * And it returns a thread handle without reference as the caller
3368  * operates on it under the task lock held.
3369  */
3370 thread_t
3371 task_findtid(task_t task, uint64_t tid)
3372 {
3373         thread_t thread= THREAD_NULL;
3374
3375         queue_iterate(&task->threads, thread, thread_t, task_threads) {
3376                         if (thread->thread_id == tid)
3377                                 return(thread);
3378         }
3379         return(THREAD_NULL);
3380 }
3381
3382
3383 #if CONFIG_MACF_MACH
3384 /*
3385  * Protect 2 task labels against modification by adding a reference on
3386  * both label handles. The locks do not actually have to be held while
3387  * using the labels as only labels with one reference can be modified
3388  * in place.
3389  */
3390
3391 void
3392 tasklabel_lock2(
3393         task_t a,
3394         task_t b)
3395 {
3396         labelh_reference(a->label);
3397         labelh_reference(b->label);
3398 }
3399
3400 void
3401 tasklabel_unlock2(
3402         task_t a,
3403         task_t b)
3404 {
3405         labelh_release(a->label);
3406         labelh_release(b->label);
3407 }
3408
3409 void
3410 mac_task_label_update_internal(
3411         struct label    *pl,
3412         struct task     *task)
3413 {
3414
3415         tasklabel_lock(task);
3416         task->label = labelh_modify(task->label);
3417         mac_task_label_update(pl, &task->maclabel);
3418         tasklabel_unlock(task);
3419         ip_lock(task->itk_self);
3420         mac_port_label_update_cred(pl, &task->itk_self->ip_label);
3421         ip_unlock(task->itk_self);
3422 }
3423
3424 void
3425 mac_task_label_modify(
3426         struct task     *task,
3427         void            *arg,
3428         void (*f)       (struct label *l, void *arg))
3429 {
3430
3431         tasklabel_lock(task);
3432         task->label = labelh_modify(task->label);
3433         (*f)(&task->maclabel, arg);
3434         tasklabel_unlock(task);
3435 }
3436
3437 struct label *
3438 mac_task_get_label(struct task *task)
3439 {
3440         return (&task->maclabel);
3441 }
3442 #endif
3443
3444 /*
3445  * Control the CPU usage monitor for a task.
3446  */
3447 kern_return_t
3448 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
3449 {
3450         int error = KERN_SUCCESS;
3451
3452         if (*flags & CPUMON_MAKE_FATAL) {
3453                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
3454         } else {
3455                 error = KERN_INVALID_ARGUMENT;
3456         }
3457
3458         return error;
3459 }
3460
3461 /*
3462  * Control the wakeups monitor for a task.
3463  */
3464 kern_return_t
3465 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
3466 {
3467         ledger_t ledger = task->ledger;
3468
3469         task_lock(task);
3470         if (*flags & WAKEMON_GET_PARAMS) {
3471                 ledger_amount_t limit;
3472                 uint64_t                period;
3473
3474                 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
3475                 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
3476
3477                 if (limit != LEDGER_LIMIT_INFINITY) {
3478                         /*
3479                          * An active limit means the wakeups monitor is enabled.
3480                          */
3481                         *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
3482                         *flags = WAKEMON_ENABLE;
3483                         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
3484                                 *flags |= WAKEMON_MAKE_FATAL;
3485                         }
3486                 } else {
3487                         *flags = WAKEMON_DISABLE;
3488                         *rate_hz = -1;
3489                 }
3490
3491                 /*
3492                  * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
3493                  */
3494                 task_unlock(task);
3495                 return KERN_SUCCESS;
3496         }
3497
3498         if (*flags & WAKEMON_ENABLE) {
3499                 if (*flags & WAKEMON_SET_DEFAULTS) {
3500                         *rate_hz = task_wakeups_monitor_rate;
3501                 }
3502
3503 #ifndef CONFIG_NOMONITORS
3504                 if (*flags & WAKEMON_MAKE_FATAL) {
3505                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
3506                 }
3507 #endif /* CONFIG_NOMONITORS */
3508
3509                 if (*rate_hz < 0) {
3510                         task_unlock(task);
3511                         return KERN_INVALID_ARGUMENT;
3512                 }
3513
3514 #ifndef CONFIG_NOMONITORS
3515                 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
3516                         task_wakeups_monitor_ustackshots_trigger_pct);
3517                 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
3518                 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
3519 #endif /* CONFIG_NOMONITORS */
3520         } else if (*flags & WAKEMON_DISABLE) {
3521                 /*
3522                  * Caller wishes to disable wakeups monitor on the task.
3523                  *
3524                  * Disable telemetry if it was triggered by the wakeups monitor, and
3525                  * remove the limit & callback on the wakeups ledger entry.
3526                  */
3527 #if CONFIG_TELEMETRY
3528                 telemetry_task_ctl_locked(current_task(), TF_WAKEMON_WARNING, 0);
3529 #endif
3530                 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
3531                 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
3532         }
3533
3534         task_unlock(task);
3535         return KERN_SUCCESS;
3536 }
3537
3538 void
3539 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
3540 {
3541         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
3542 #if CONFIG_TELEMETRY
3543                 /*
3544                  * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
3545                  * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
3546                  */
3547                 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
3548 #endif
3549                 return;
3550         }
3551
3552 #if CONFIG_TELEMETRY
3553         /*
3554          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
3555          * exceeded the limit, turn telemetry off for the task.
3556          */
3557         telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
3558 #endif
3559
3560         if (warning == 0) {
3561                 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE();
3562         }
3563 }
3564
3565 void __attribute__((noinline))
3566 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void)
3567 {
3568         task_t                                          task            = current_task();
3569         int                                                     pid         = 0;
3570         char                                    *procname       = (char *) "unknown";
3571         uint64_t                                        observed_wakeups_rate;
3572         uint64_t                                        permitted_wakeups_rate;
3573         uint64_t                                        observation_interval;
3574         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
3575         struct ledger_entry_info        lei;
3576
3577 #ifdef MACH_BSD
3578         pid = proc_selfpid();
3579         if (task->bsd_info != NULL)
3580                 procname = proc_name_address(current_task()->bsd_info);
3581 #endif
3582
3583         ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
3584
3585         /*
3586          * Disable the exception notification so we don't overwhelm
3587          * the listener with an endless stream of redundant exceptions.
3588          */
3589         uint32_t flags = WAKEMON_DISABLE;
3590         task_wakeups_monitor_ctl(task, &flags, NULL);
3591
3592         observed_wakeups_rate = (lei.lei_balance * (int64_t)NSEC_PER_SEC) / lei.lei_last_refill;
3593         permitted_wakeups_rate = lei.lei_limit / task_wakeups_monitor_interval;
3594         observation_interval = lei.lei_refill_period / NSEC_PER_SEC;
3595
3596         if (disable_exc_resource) {
3597                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
3598                         "supressed by a boot-arg\n", procname, pid);
3599                 return;
3600         }
3601         printf("process %s[%d] caught causing excessive wakeups. Observed wakeups rate "
3602                 "(per sec): %lld; Maximum permitted wakeups rate (per sec): %lld; Observation "
3603                 "period: %lld seconds; Task lifetime number of wakeups: %lld\n",
3604                 procname, pid, observed_wakeups_rate, permitted_wakeups_rate,
3605                 observation_interval, lei.lei_credit);
3606
3607         code[0] = code[1] = 0;
3608         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
3609         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
3610         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0], task_wakeups_monitor_rate);
3611         EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0], observation_interval);
3612         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1], lei.lei_balance * (int64_t)NSEC_PER_SEC / lei.lei_last_refill);
3613         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
3614
3615         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
3616                 task_terminate_internal(task);
3617         }
3618 }