osfmk/kern/task.c

   1 /*
   2  * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_FREE_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  *      File:   kern/task.c
  58  *      Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
  59  *              David Black
  60  *
  61  *      Task management primitives implementation.
  62  */
  63 /*
  64  * Copyright (c) 1993 The University of Utah and
  65  * the Computer Systems Laboratory (CSL).  All rights reserved.
  66  *
  67  * Permission to use, copy, modify and distribute this software and its
  68  * documentation is hereby granted, provided that both the copyright
  69  * notice and this permission notice appear in all copies of the
  70  * software, derivative works or modified versions, and any portions
  71  * thereof, and that both notices appear in supporting documentation.
  72  *
  73  * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
  74  * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
  75  * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  76  *
  77  * CSL requests users of this software to return to csl-dist@cs.utah.edu any
  78  * improvements that they make and grant CSL redistribution rights.
  79  *
  80  */
  81 /*
  82  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
  83  * support for mandatory and extensible security protections.  This notice
  84  * is included in support of clause 2.2 (b) of the Apple Public License,
  85  * Version 2.0.
  86  * Copyright (c) 2005 SPARTA, Inc.
  87  */
  88
  89 #include <fast_tas.h>
  90 #include <platforms.h>
  91
  92 #include <mach/mach_types.h>
  93 #include <mach/boolean.h>
  94 #include <mach/host_priv.h>
  95 #include <mach/machine/vm_types.h>
  96 #include <mach/vm_param.h>
  97 #include <mach/semaphore.h>
  98 #include <mach/task_info.h>
  99 #include <mach/task_special_ports.h>
 100
 101 #include <ipc/ipc_types.h>
 102 #include <ipc/ipc_space.h>
 103 #include <ipc/ipc_entry.h>
 104 #include <ipc/ipc_hash.h>
 105
 106 #include <kern/kern_types.h>
 107 #include <kern/mach_param.h>
 108 #include <kern/misc_protos.h>
 109 #include <kern/task.h>
 110 #include <kern/thread.h>
 111 #include <kern/zalloc.h>
 112 #include <kern/kalloc.h>
 113 #include <kern/processor.h>
 114 #include <kern/sched_prim.h>    /* for thread_wakeup */
 115 #include <kern/ipc_tt.h>
 116 #include <kern/host.h>
 117 #include <kern/clock.h>
 118 #include <kern/timer.h>
 119 #include <kern/assert.h>
 120 #include <kern/sync_lock.h>
 121 #include <kern/affinity.h>
 122 #include <kern/exc_resource.h>
 123 #if CONFIG_TELEMETRY
 124 #include <kern/telemetry.h>
 125 #endif
 126
 127 #include <vm/pmap.h>
 128 #include <vm/vm_map.h>
 129 #include <vm/vm_kern.h>         /* for kernel_map, ipc_kernel_map */
 130 #include <vm/vm_pageout.h>
 131 #include <vm/vm_protos.h>
 132 #include <vm/vm_purgeable_internal.h>
 133
 134 #include <sys/resource.h>
 135 /*
 136  * Exported interfaces
 137  */
 138
 139 #include <mach/task_server.h>
 140 #include <mach/mach_host_server.h>
 141 #include <mach/host_security_server.h>
 142 #include <mach/mach_port_server.h>
 143 #include <mach/security_server.h>
 144
 145 #include <vm/vm_shared_region.h>
 146
 147 #if CONFIG_MACF_MACH
 148 #include <security/mac_mach_internal.h>
 149 #endif
 150
 151 #if CONFIG_COUNTERS
 152 #include <pmc/pmc.h>
 153 #endif /* CONFIG_COUNTERS */
 154
 155 #include <libkern/OSDebug.h>
 156 #include <libkern/OSAtomic.h>
 157
 158 task_t                  kernel_task;
 159 zone_t                  task_zone;
 160 lck_attr_t      task_lck_attr;
 161 lck_grp_t       task_lck_grp;
 162 lck_grp_attr_t  task_lck_grp_attr;
 163
 164 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
 165 int audio_active = 0;
 166
 167 zinfo_usage_store_t tasks_tkm_private;
 168 zinfo_usage_store_t tasks_tkm_shared;
 169
 170 /* A container to accumulate statistics for expired tasks */
 171 expired_task_statistics_t               dead_task_statistics;
 172 lck_spin_t              dead_task_statistics_lock;
 173
 174 static ledger_template_t task_ledger_template = NULL;
 175 struct _task_ledger_indices task_ledgers __attribute__((used)) = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
 176 void init_task_ledgers(void);
 177 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 178 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
 179 void __attribute__((noinline)) THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void);
 180 void __attribute__((noinline)) THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb);
 181 int coredump(void *core_proc, int reserve_mb, int ignore_ulimit);
 182
 183 kern_return_t task_suspend_internal(task_t);
 184 kern_return_t task_resume_internal(task_t);
 185
 186 void proc_init_cpumon_params(void);
 187
 188 // Warn tasks when they hit 80% of their memory limit.
 189 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
 190
 191 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT              150 /* wakeups per second */
 192 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL   300 /* in seconds. */
 193
 194 /*
 195  * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
 196  *
 197  * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
 198  *  stacktraces, aka micro-stackshots)
 199  */
 200 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER        70
 201
 202 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
 203 int task_wakeups_monitor_rate;     /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
 204
 205 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
 206
 207 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
 208
 209 int max_task_footprint = 0; /* Per-task limit on physical memory consumption */
 210 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
 211
 212 int hwm_user_cores = 0; /* high watermark violations generate user core files */
 213
 214 #ifdef MACH_BSD
 215 extern void     proc_getexecutableuuid(void *, unsigned char *, unsigned long);
 216 extern int      proc_pid(struct proc *p);
 217 extern int      proc_selfpid(void);
 218 extern char     *proc_name_address(struct proc *p);
 219 #if CONFIG_JETSAM
 220 extern void     memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb);
 221 #endif
 222 #endif
 223
 224 /* Forwards */
 225
 226 void            task_hold_locked(
 227                         task_t          task);
 228 void            task_wait_locked(
 229                         task_t          task,
 230                         boolean_t       until_not_runnable);
 231 void            task_release_locked(
 232                         task_t          task);
 233 void            task_free(
 234                         task_t          task );
 235 void            task_synchronizer_destroy_all(
 236                         task_t          task);
 237
 238 int check_for_tasksuspend(
 239                         task_t task);
 240
 241 void
 242 task_backing_store_privileged(
 243                         task_t task)
 244 {
 245         task_lock(task);
 246         task->priv_flags |= VM_BACKING_STORE_PRIV;
 247         task_unlock(task);
 248         return;
 249 }
 250
 251
 252 void
 253 task_set_64bit(
 254                 task_t task,
 255                 boolean_t is64bit)
 256 {
 257 #if defined(__i386__) || defined(__x86_64__)
 258         thread_t thread;
 259 #endif /* defined(__i386__) || defined(__x86_64__) */
 260
 261         task_lock(task);
 262
 263         if (is64bit) {
 264                 if (task_has_64BitAddr(task))
 265                         goto out;
 266                 task_set_64BitAddr(task);
 267         } else {
 268                 if ( !task_has_64BitAddr(task))
 269                         goto out;
 270                 task_clear_64BitAddr(task);
 271         }
 272         /* FIXME: On x86, the thread save state flavor can diverge from the
 273          * task's 64-bit feature flag due to the 32-bit/64-bit register save
 274          * state dichotomy. Since we can be pre-empted in this interval,
 275          * certain routines may observe the thread as being in an inconsistent
 276          * state with respect to its task's 64-bitness.
 277          */
 278
 279 #if defined(__i386__) || defined(__x86_64__)
 280         queue_iterate(&task->threads, thread, thread_t, task_threads) {
 281                 thread_mtx_lock(thread);
 282                 machine_thread_switch_addrmode(thread);
 283                 thread_mtx_unlock(thread);
 284         }
 285 #endif /* defined(__i386__) || defined(__x86_64__) */
 286
 287 out:
 288         task_unlock(task);
 289 }
 290
 291
 292 void
 293 task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size)
 294 {
 295         task_lock(task);
 296         task->all_image_info_addr = addr;
 297         task->all_image_info_size = size;
 298         task_unlock(task);
 299 }
 300
 301 #if TASK_REFERENCE_LEAK_DEBUG
 302 #include <kern/btlog.h>
 303
 304 decl_simple_lock_data(static,task_ref_lock);
 305 static btlog_t *task_ref_btlog;
 306 #define TASK_REF_OP_INCR        0x1
 307 #define TASK_REF_OP_DECR        0x2
 308
 309 #define TASK_REF_BTDEPTH        7
 310
 311 static void
 312 task_ref_lock_lock(void *context)
 313 {
 314         simple_lock((simple_lock_t)context);
 315 }
 316 static void
 317 task_ref_lock_unlock(void *context)
 318 {
 319         simple_unlock((simple_lock_t)context);
 320 }
 321
 322 void
 323 task_reference_internal(task_t task)
 324 {
 325         void *       bt[TASK_REF_BTDEPTH];
 326         int             numsaved = 0;
 327
 328         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 329
 330         (void)hw_atomic_add(&(task)->ref_count, 1);
 331         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
 332                                         bt, numsaved);
 333 }
 334
 335 uint32_t
 336 task_deallocate_internal(task_t task)
 337 {
 338         void *       bt[TASK_REF_BTDEPTH];
 339         int             numsaved = 0;
 340
 341         numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
 342
 343         btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
 344                                         bt, numsaved);
 345         return hw_atomic_sub(&(task)->ref_count, 1);
 346 }
 347
 348 #endif /* TASK_REFERENCE_LEAK_DEBUG */
 349
 350 void
 351 task_init(void)
 352 {
 353
 354         lck_grp_attr_setdefault(&task_lck_grp_attr);
 355         lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
 356         lck_attr_setdefault(&task_lck_attr);
 357         lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
 358
 359         task_zone = zinit(
 360                         sizeof(struct task),
 361                         task_max * sizeof(struct task),
 362                         TASK_CHUNK * sizeof(struct task),
 363                         "tasks");
 364
 365         zone_change(task_zone, Z_NOENCRYPT, TRUE);
 366
 367         /*
 368          * Configure per-task memory limit. The boot arg takes precedence over the
 369          * device tree.
 370          */
 371         if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint,
 372                         sizeof (max_task_footprint))) {
 373                 max_task_footprint = 0;
 374         }
 375
 376         if (max_task_footprint == 0) {
 377                 /*
 378                  * No limit was found in boot-args, so go look in the device tree.
 379                  */
 380                 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint,
 381                                 sizeof(max_task_footprint))) {
 382                         max_task_footprint = 0;
 383                 }
 384         }
 385
 386         if (max_task_footprint != 0) {
 387 #if CONFIG_JETSAM
 388                 if (max_task_footprint < 50) {
 389                                 printf("Warning: max_task_pmem %d below minimum.\n",
 390                                 max_task_footprint);
 391                                 max_task_footprint = 50;
 392                 }
 393                 printf("Limiting task physical memory footprint to %d MB\n",
 394                         max_task_footprint);
 395                 max_task_footprint *= 1024 * 1024; // Convert MB to bytes
 396 #else
 397                 printf("Warning: max_task_footprint specified, but jetsam not configured; ignoring.\n");
 398 #endif
 399         }
 400
 401         if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
 402                         sizeof (hwm_user_cores))) {
 403                 hwm_user_cores = 0;
 404         }
 405
 406         proc_init_cpumon_params();
 407
 408         if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
 409                 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
 410         }
 411
 412         if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
 413                 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
 414         }
 415
 416         if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
 417                 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
 418                 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
 419         }
 420
 421         if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
 422                 sizeof (disable_exc_resource))) {
 423                 disable_exc_resource = 0;
 424         }
 425
 426         init_task_ledgers();
 427
 428 #if TASK_REFERENCE_LEAK_DEBUG
 429         simple_lock_init(&task_ref_lock, 0);
 430         task_ref_btlog = btlog_create(100000,
 431                                                                   TASK_REF_BTDEPTH,
 432                                                                   task_ref_lock_lock,
 433                                                                   task_ref_lock_unlock,
 434                                                                   &task_ref_lock);
 435         assert(task_ref_btlog);
 436 #endif
 437
 438         /*
 439          * Create the kernel task as the first task.
 440          */
 441 #ifdef __LP64__
 442         if (task_create_internal(TASK_NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS)
 443 #else
 444         if (task_create_internal(TASK_NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS)
 445 #endif
 446                 panic("task_init\n");
 447
 448         vm_map_deallocate(kernel_task->map);
 449         kernel_task->map = kernel_map;
 450         lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
 451 }
 452
 453 /*
 454  * Create a task running in the kernel address space.  It may
 455  * have its own map of size mem_size and may have ipc privileges.
 456  */
 457 kern_return_t
 458 kernel_task_create(
 459         __unused task_t         parent_task,
 460         __unused vm_offset_t            map_base,
 461         __unused vm_size_t              map_size,
 462         __unused task_t         *child_task)
 463 {
 464         return (KERN_INVALID_ARGUMENT);
 465 }
 466
 467 kern_return_t
 468 task_create(
 469         task_t                          parent_task,
 470         __unused ledger_port_array_t    ledger_ports,
 471         __unused mach_msg_type_number_t num_ledger_ports,
 472         __unused boolean_t              inherit_memory,
 473         __unused task_t                 *child_task)    /* OUT */
 474 {
 475         if (parent_task == TASK_NULL)
 476                 return(KERN_INVALID_ARGUMENT);
 477
 478         /*
 479          * No longer supported: too many calls assume that a task has a valid
 480          * process attached.
 481          */
 482         return(KERN_FAILURE);
 483 }
 484
 485 kern_return_t
 486 host_security_create_task_token(
 487         host_security_t                 host_security,
 488         task_t                          parent_task,
 489         __unused security_token_t       sec_token,
 490         __unused audit_token_t          audit_token,
 491         __unused host_priv_t            host_priv,
 492         __unused ledger_port_array_t    ledger_ports,
 493         __unused mach_msg_type_number_t num_ledger_ports,
 494         __unused boolean_t              inherit_memory,
 495         __unused task_t                 *child_task)    /* OUT */
 496 {
 497         if (parent_task == TASK_NULL)
 498                 return(KERN_INVALID_ARGUMENT);
 499
 500         if (host_security == HOST_NULL)
 501                 return(KERN_INVALID_SECURITY);
 502
 503         /*
 504          * No longer supported.
 505          */
 506         return(KERN_FAILURE);
 507 }
 508
 509 /*
 510  * Task ledgers
 511  * ------------
 512  *
 513  * phys_footprint
 514  *   Physical footprint: This is the sum of:
 515  *     + phys_mem [task's resident memory]
 516  *     + phys_compressed
 517  *     + iokit_mem
 518  *
 519  * iokit_mem
 520  *   IOKit mappings: The total size of all IOKit mappings in this task [regardless of clean/dirty state].
 521  *
 522  * phys_compressed
 523  *   Physical compressed: Amount of this task's resident memory which is held by the compressor.
 524  *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
 525  *   and could be either decompressed back into memory, or paged out to storage, depending
 526  *   on our implementation.
 527  */
 528 void
 529 init_task_ledgers(void)
 530 {
 531         ledger_template_t t;
 532
 533         assert(task_ledger_template == NULL);
 534         assert(kernel_task == TASK_NULL);
 535
 536         if ((t = ledger_template_create("Per-task ledger")) == NULL)
 537                 panic("couldn't create task ledger template");
 538
 539         task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
 540         task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
 541             "physmem", "bytes");
 542         task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
 543             "bytes");
 544         task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
 545             "bytes");
 546         task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
 547             "bytes");
 548         task_ledgers.iokit_mem = ledger_entry_add(t, "iokit_mem", "mappings",
 549             "bytes");
 550         task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
 551             "bytes");
 552         task_ledgers.phys_compressed = ledger_entry_add(t, "phys_compressed", "physmem",
 553             "bytes");
 554         task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
 555             "count");
 556         task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
 557             "count");
 558
 559         if ((task_ledgers.cpu_time < 0) || (task_ledgers.tkm_private < 0) ||
 560             (task_ledgers.tkm_shared < 0) || (task_ledgers.phys_mem < 0) ||
 561             (task_ledgers.wired_mem < 0) || (task_ledgers.iokit_mem < 0) ||
 562             (task_ledgers.phys_footprint < 0) || (task_ledgers.phys_compressed < 0) ||
 563             (task_ledgers.platform_idle_wakeups < 0) || (task_ledgers.interrupt_wakeups < 0)) {
 564                 panic("couldn't create entries for task ledger template");
 565         }
 566
 567         ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
 568
 569 #if CONFIG_JETSAM
 570         ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
 571 #endif
 572
 573         ledger_set_callback(t, task_ledgers.interrupt_wakeups,
 574                 task_wakeups_rate_exceeded, NULL, NULL);
 575
 576         task_ledger_template = t;
 577 }
 578
 579 kern_return_t
 580 task_create_internal(
 581         task_t          parent_task,
 582         boolean_t       inherit_memory,
 583         boolean_t       is_64bit,
 584         task_t          *child_task)            /* OUT */
 585 {
 586         task_t                  new_task;
 587         vm_shared_region_t      shared_region;
 588         ledger_t                ledger = NULL;
 589
 590         new_task = (task_t) zalloc(task_zone);
 591
 592         if (new_task == TASK_NULL)
 593                 return(KERN_RESOURCE_SHORTAGE);
 594
 595         /* one ref for just being alive; one for our caller */
 596         new_task->ref_count = 2;
 597
 598         /* allocate with active entries */
 599         assert(task_ledger_template != NULL);
 600         if ((ledger = ledger_instantiate(task_ledger_template,
 601                         LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
 602                 zfree(task_zone, new_task);
 603                 return(KERN_RESOURCE_SHORTAGE);
 604         }
 605
 606         new_task->ledger = ledger;
 607
 608         /* if inherit_memory is true, parent_task MUST not be NULL */
 609         if (inherit_memory)
 610                 new_task->map = vm_map_fork(ledger, parent_task->map);
 611         else
 612                 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
 613                                 (vm_map_offset_t)(VM_MIN_ADDRESS),
 614                                 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
 615
 616         /* Inherit memlock limit from parent */
 617         if (parent_task)
 618                 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
 619
 620         lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
 621         queue_init(&new_task->threads);
 622         new_task->suspend_count = 0;
 623         new_task->thread_count = 0;
 624         new_task->active_thread_count = 0;
 625         new_task->user_stop_count = 0;
 626         new_task->legacy_stop_count = 0;
 627         new_task->active = TRUE;
 628         new_task->halting = FALSE;
 629         new_task->user_data = NULL;
 630         new_task->faults = 0;
 631         new_task->cow_faults = 0;
 632         new_task->pageins = 0;
 633         new_task->messages_sent = 0;
 634         new_task->messages_received = 0;
 635         new_task->syscalls_mach = 0;
 636         new_task->priv_flags = 0;
 637         new_task->syscalls_unix=0;
 638         new_task->c_switch = new_task->p_switch = new_task->ps_switch = 0;
 639         new_task->t_flags = 0;
 640         new_task->importance = 0;
 641
 642         zinfo_task_init(new_task);
 643
 644 #ifdef MACH_BSD
 645         new_task->bsd_info = NULL;
 646 #endif /* MACH_BSD */
 647
 648 #if CONFIG_JETSAM
 649         if (max_task_footprint != 0) {
 650                 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
 651         }
 652 #endif
 653
 654         if (task_wakeups_monitor_rate != 0) {
 655                 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
 656                 int32_t  rate; // Ignored because of WAKEMON_SET_DEFAULTS
 657                 task_wakeups_monitor_ctl(new_task, &flags, &rate);
 658         }
 659
 660 #if defined(__i386__) || defined(__x86_64__)
 661         new_task->i386_ldt = 0;
 662 #endif
 663
 664         new_task->task_debug = NULL;
 665
 666         queue_init(&new_task->semaphore_list);
 667         new_task->semaphores_owned = 0;
 668
 669 #if CONFIG_MACF_MACH
 670         new_task->label = labelh_new(1);
 671         mac_task_label_init (&new_task->maclabel);
 672 #endif
 673
 674         ipc_task_init(new_task, parent_task);
 675
 676         new_task->total_user_time = 0;
 677         new_task->total_system_time = 0;
 678
 679         new_task->vtimers = 0;
 680
 681         new_task->shared_region = NULL;
 682
 683         new_task->affinity_space = NULL;
 684
 685 #if CONFIG_COUNTERS
 686         new_task->t_chud = 0U;
 687 #endif
 688
 689         new_task->pidsuspended = FALSE;
 690         new_task->frozen = FALSE;
 691         new_task->changing_freeze_state = FALSE;
 692         new_task->rusage_cpu_flags = 0;
 693         new_task->rusage_cpu_percentage = 0;
 694         new_task->rusage_cpu_interval = 0;
 695         new_task->rusage_cpu_deadline = 0;
 696         new_task->rusage_cpu_callt = NULL;
 697 #if MACH_ASSERT
 698         new_task->suspends_outstanding = 0;
 699 #endif
 700
 701
 702         new_task->low_mem_notified_warn = 0;
 703         new_task->low_mem_notified_critical = 0;
 704         new_task->purged_memory_warn = 0;
 705         new_task->purged_memory_critical = 0;
 706         new_task->mem_notify_reserved = 0;
 707 #if IMPORTANCE_INHERITANCE
 708         new_task->imp_receiver = 0;
 709         new_task->imp_donor = 0;
 710         new_task->imp_reserved = 0;
 711         new_task->task_imp_assertcnt = 0;
 712         new_task->task_imp_externcnt = 0;
 713 #endif /* IMPORTANCE_INHERITANCE */
 714
 715 #if     defined(__x86_64__)
 716         new_task->uexc_range_start = new_task->uexc_range_size = new_task->uexc_handler = 0;
 717 #endif
 718
 719         new_task->requested_policy = default_task_requested_policy;
 720         new_task->effective_policy = default_task_effective_policy;
 721         new_task->pended_policy    = default_task_pended_policy;
 722
 723         if (parent_task != TASK_NULL) {
 724                 new_task->sec_token = parent_task->sec_token;
 725                 new_task->audit_token = parent_task->audit_token;
 726
 727                 /* inherit the parent's shared region */
 728                 shared_region = vm_shared_region_get(parent_task);
 729                 vm_shared_region_set(new_task, shared_region);
 730
 731                 if(task_has_64BitAddr(parent_task))
 732                         task_set_64BitAddr(new_task);
 733                 new_task->all_image_info_addr = parent_task->all_image_info_addr;
 734                 new_task->all_image_info_size = parent_task->all_image_info_size;
 735
 736 #if defined(__i386__) || defined(__x86_64__)
 737                 if (inherit_memory && parent_task->i386_ldt)
 738                         new_task->i386_ldt = user_ldt_copy(parent_task->i386_ldt);
 739 #endif
 740                 if (inherit_memory && parent_task->affinity_space)
 741                         task_affinity_create(parent_task, new_task);
 742
 743                 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
 744
 745 #if IMPORTANCE_INHERITANCE
 746                 new_task->imp_donor = parent_task->imp_donor;
 747                 /* Embedded doesn't want this to inherit */
 748                 new_task->imp_receiver = parent_task->imp_receiver;
 749 #endif /* IMPORTANCE_INHERITANCE */
 750
 751                 new_task->requested_policy.t_apptype     = parent_task->requested_policy.t_apptype;
 752
 753                 new_task->requested_policy.int_darwinbg  = parent_task->requested_policy.int_darwinbg;
 754                 new_task->requested_policy.ext_darwinbg  = parent_task->requested_policy.ext_darwinbg;
 755                 new_task->requested_policy.int_iotier    = parent_task->requested_policy.int_iotier;
 756                 new_task->requested_policy.ext_iotier    = parent_task->requested_policy.ext_iotier;
 757                 new_task->requested_policy.int_iopassive = parent_task->requested_policy.int_iopassive;
 758                 new_task->requested_policy.ext_iopassive = parent_task->requested_policy.ext_iopassive;
 759                 new_task->requested_policy.bg_iotier     = parent_task->requested_policy.bg_iotier;
 760                 new_task->requested_policy.terminated    = parent_task->requested_policy.terminated;
 761
 762                 task_policy_create(new_task, parent_task->requested_policy.t_boosted);
 763         } else {
 764                 new_task->sec_token = KERNEL_SECURITY_TOKEN;
 765                 new_task->audit_token = KERNEL_AUDIT_TOKEN;
 766 #ifdef __LP64__
 767                 if(is_64bit)
 768                         task_set_64BitAddr(new_task);
 769 #endif
 770                 new_task->all_image_info_addr = (mach_vm_address_t)0;
 771                 new_task->all_image_info_size = (mach_vm_size_t)0;
 772
 773                 new_task->pset_hint = PROCESSOR_SET_NULL;
 774         }
 775
 776         if (kernel_task == TASK_NULL) {
 777                 new_task->priority = BASEPRI_KERNEL;
 778                 new_task->max_priority = MAXPRI_KERNEL;
 779         } else if (proc_get_effective_task_policy(new_task, TASK_POLICY_LOWPRI_CPU)) {
 780                 new_task->priority = MAXPRI_THROTTLE;
 781                 new_task->max_priority = MAXPRI_THROTTLE;
 782         } else {
 783                 new_task->priority = BASEPRI_DEFAULT;
 784                 new_task->max_priority = MAXPRI_USER;
 785         }
 786
 787         bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
 788         new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0;
 789         lck_mtx_lock(&tasks_threads_lock);
 790         queue_enter(&tasks, new_task, task_t, tasks);
 791         tasks_count++;
 792         lck_mtx_unlock(&tasks_threads_lock);
 793
 794         if (vm_backing_store_low && parent_task != NULL)
 795                 new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
 796
 797         new_task->task_volatile_objects = 0;
 798
 799         ipc_task_enable(new_task);
 800
 801         *child_task = new_task;
 802         return(KERN_SUCCESS);
 803 }
 804
 805 /*
 806  *      task_deallocate:
 807  *
 808  *      Drop a reference on a task.
 809  */
 810 void
 811 task_deallocate(
 812         task_t          task)
 813 {
 814         ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
 815
 816         if (task == TASK_NULL)
 817             return;
 818
 819         if (task_deallocate_internal(task) > 0)
 820                 return;
 821
 822         lck_mtx_lock(&tasks_threads_lock);
 823         queue_remove(&terminated_tasks, task, task_t, tasks);
 824         terminated_tasks_count--;
 825         lck_mtx_unlock(&tasks_threads_lock);
 826
 827         /*
 828          *      Give the machine dependent code a chance
 829          *      to perform cleanup before ripping apart
 830          *      the task.
 831          */
 832         machine_task_terminate(task);
 833
 834         ipc_task_terminate(task);
 835
 836         if (task->affinity_space)
 837                 task_affinity_deallocate(task);
 838
 839         vm_map_deallocate(task->map);
 840         is_release(task->itk_space);
 841
 842         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
 843                            &interrupt_wakeups, &debit);
 844         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
 845                            &platform_idle_wakeups, &debit);
 846
 847         /* Accumulate statistics for dead tasks */
 848         lck_spin_lock(&dead_task_statistics_lock);
 849         dead_task_statistics.total_user_time += task->total_user_time;
 850         dead_task_statistics.total_system_time += task->total_system_time;
 851
 852         dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
 853         dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
 854
 855         dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
 856         dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
 857
 858         lck_spin_unlock(&dead_task_statistics_lock);
 859         lck_mtx_destroy(&task->lock, &task_lck_grp);
 860
 861 #if CONFIG_MACF_MACH
 862         labelh_release(task->label);
 863 #endif
 864
 865         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
 866             &debit)) {
 867                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
 868                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
 869         }
 870         if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
 871             &debit)) {
 872                 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
 873                 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
 874         }
 875         ledger_dereference(task->ledger);
 876         zinfo_task_free(task);
 877
 878 #if TASK_REFERENCE_LEAK_DEBUG
 879         btlog_remove_entries_for_element(task_ref_btlog, task);
 880 #endif
 881
 882         if (task->task_volatile_objects) {
 883                 /*
 884                  * This task still "owns" some volatile VM objects.
 885                  * Disown them now to avoid leaving them pointing back at
 886                  * an invalid task.
 887                  */
 888                 vm_purgeable_disown(task);
 889                 assert(task->task_volatile_objects == 0);
 890         }
 891
 892         zfree(task_zone, task);
 893 }
 894
 895 /*
 896  *      task_name_deallocate:
 897  *
 898  *      Drop a reference on a task name.
 899  */
 900 void
 901 task_name_deallocate(
 902         task_name_t             task_name)
 903 {
 904         return(task_deallocate((task_t)task_name));
 905 }
 906
 907 /*
 908  *      task_suspension_token_deallocate:
 909  *
 910  *      Drop a reference on a task suspension token.
 911  */
 912 void
 913 task_suspension_token_deallocate(
 914         task_suspension_token_t         token)
 915 {
 916         return(task_deallocate((task_t)token));
 917 }
 918
 919 /*
 920  *      task_terminate:
 921  *
 922  *      Terminate the specified task.  See comments on thread_terminate
 923  *      (kern/thread.c) about problems with terminating the "current task."
 924  */
 925
 926 kern_return_t
 927 task_terminate(
 928         task_t          task)
 929 {
 930         if (task == TASK_NULL)
 931                 return (KERN_INVALID_ARGUMENT);
 932
 933         if (task->bsd_info)
 934                 return (KERN_FAILURE);
 935
 936         return (task_terminate_internal(task));
 937 }
 938
 939 kern_return_t
 940 task_terminate_internal(
 941         task_t                  task)
 942 {
 943         thread_t                        thread, self;
 944         task_t                          self_task;
 945         boolean_t                       interrupt_save;
 946
 947         assert(task != kernel_task);
 948
 949         self = current_thread();
 950         self_task = self->task;
 951
 952         /*
 953          *      Get the task locked and make sure that we are not racing
 954          *      with someone else trying to terminate us.
 955          */
 956         if (task == self_task)
 957                 task_lock(task);
 958         else
 959         if (task < self_task) {
 960                 task_lock(task);
 961                 task_lock(self_task);
 962         }
 963         else {
 964                 task_lock(self_task);
 965                 task_lock(task);
 966         }
 967
 968         if (!task->active) {
 969                 /*
 970                  *      Task is already being terminated.
 971                  *      Just return an error. If we are dying, this will
 972                  *      just get us to our AST special handler and that
 973                  *      will get us to finalize the termination of ourselves.
 974                  */
 975                 task_unlock(task);
 976                 if (self_task != task)
 977                         task_unlock(self_task);
 978
 979                 return (KERN_FAILURE);
 980         }
 981
 982 #if MACH_ASSERT
 983         if (task->suspends_outstanding != 0) {
 984                 printf("WARNING: %s (%d) exiting with %d outstanding suspensions\n",
 985                         proc_name_address(task->bsd_info), proc_pid(task->bsd_info),
 986                         task->suspends_outstanding);
 987         }
 988 #endif
 989
 990         if (self_task != task)
 991                 task_unlock(self_task);
 992
 993         /*
 994          * Make sure the current thread does not get aborted out of
 995          * the waits inside these operations.
 996          */
 997         interrupt_save = thread_interrupt_level(THREAD_UNINT);
 998
 999         /*
1000          *      Indicate that we want all the threads to stop executing
1001          *      at user space by holding the task (we would have held
1002          *      each thread independently in thread_terminate_internal -
1003          *      but this way we may be more likely to already find it
1004          *      held there).  Mark the task inactive, and prevent
1005          *      further task operations via the task port.
1006          */
1007         task_hold_locked(task);
1008         task->active = FALSE;
1009         ipc_task_disable(task);
1010
1011 #if CONFIG_TELEMETRY
1012         /*
1013          * Notify telemetry that this task is going away.
1014          */
1015         telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
1016 #endif
1017
1018         /*
1019          *      Terminate each thread in the task.
1020          */
1021         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1022                         thread_terminate_internal(thread);
1023         }
1024
1025         task_unlock(task);
1026
1027
1028         /*
1029          *      Destroy all synchronizers owned by the task.
1030          */
1031         task_synchronizer_destroy_all(task);
1032
1033         /*
1034          *      Destroy the IPC space, leaving just a reference for it.
1035          */
1036         ipc_space_terminate(task->itk_space);
1037
1038         if (vm_map_has_4GB_pagezero(task->map))
1039                 vm_map_clear_4GB_pagezero(task->map);
1040
1041         /*
1042          * If the current thread is a member of the task
1043          * being terminated, then the last reference to
1044          * the task will not be dropped until the thread
1045          * is finally reaped.  To avoid incurring the
1046          * expense of removing the address space regions
1047          * at reap time, we do it explictly here.
1048          */
1049         vm_map_remove(task->map,
1050                       task->map->min_offset,
1051                       task->map->max_offset,
1052                       VM_MAP_NO_FLAGS);
1053
1054         /* release our shared region */
1055         vm_shared_region_set(task, NULL);
1056
1057         lck_mtx_lock(&tasks_threads_lock);
1058         queue_remove(&tasks, task, task_t, tasks);
1059         queue_enter(&terminated_tasks, task, task_t, tasks);
1060         tasks_count--;
1061         terminated_tasks_count++;
1062         lck_mtx_unlock(&tasks_threads_lock);
1063
1064         /*
1065          * We no longer need to guard against being aborted, so restore
1066          * the previous interruptible state.
1067          */
1068         thread_interrupt_level(interrupt_save);
1069
1070         /*
1071          * Get rid of the task active reference on itself.
1072          */
1073         task_deallocate(task);
1074
1075         return (KERN_SUCCESS);
1076 }
1077
1078 /*
1079  * task_start_halt:
1080  *
1081  *      Shut the current task down (except for the current thread) in
1082  *      preparation for dramatic changes to the task (probably exec).
1083  *      We hold the task and mark all other threads in the task for
1084  *      termination.
1085  */
1086 kern_return_t
1087 task_start_halt(
1088         task_t          task)
1089 {
1090         thread_t        thread, self;
1091
1092         assert(task != kernel_task);
1093
1094         self = current_thread();
1095
1096         if (task != self->task)
1097                 return (KERN_INVALID_ARGUMENT);
1098
1099         task_lock(task);
1100
1101         if (task->halting || !task->active || !self->active) {
1102                 /*
1103                  *      Task or current thread is already being terminated.
1104                  *      Hurry up and return out of the current kernel context
1105                  *      so that we run our AST special handler to terminate
1106                  *      ourselves.
1107                  */
1108                 task_unlock(task);
1109
1110                 return (KERN_FAILURE);
1111         }
1112
1113         task->halting = TRUE;
1114
1115         if (task->thread_count > 1) {
1116
1117                 /*
1118                  * Mark all the threads to keep them from starting any more
1119                  * user-level execution.  The thread_terminate_internal code
1120                  * would do this on a thread by thread basis anyway, but this
1121                  * gives us a better chance of not having to wait there.
1122                  */
1123                 task_hold_locked(task);
1124
1125                 /*
1126                  *      Terminate all the other threads in the task.
1127                  */
1128                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1129                         if (thread != self)
1130                                 thread_terminate_internal(thread);
1131                 }
1132
1133                 task_release_locked(task);
1134         }
1135         task_unlock(task);
1136         return KERN_SUCCESS;
1137 }
1138
1139
1140 /*
1141  * task_complete_halt:
1142  *
1143  *      Complete task halt by waiting for threads to terminate, then clean
1144  *      up task resources (VM, port namespace, etc...) and then let the
1145  *      current thread go in the (practically empty) task context.
1146  */
1147 void
1148 task_complete_halt(task_t task)
1149 {
1150         task_lock(task);
1151         assert(task->halting);
1152         assert(task == current_task());
1153
1154         /*
1155          *      Wait for the other threads to get shut down.
1156          *      When the last other thread is reaped, we'll be
1157          *      woken up.
1158          */
1159         if (task->thread_count > 1) {
1160                 assert_wait((event_t)&task->halting, THREAD_UNINT);
1161                 task_unlock(task);
1162                 thread_block(THREAD_CONTINUE_NULL);
1163         } else {
1164                 task_unlock(task);
1165         }
1166
1167         /*
1168          *      Give the machine dependent code a chance
1169          *      to perform cleanup of task-level resources
1170          *      associated with the current thread before
1171          *      ripping apart the task.
1172          */
1173         machine_task_terminate(task);
1174
1175         /*
1176          *      Destroy all synchronizers owned by the task.
1177          */
1178         task_synchronizer_destroy_all(task);
1179
1180         /*
1181          *      Destroy the contents of the IPC space, leaving just
1182          *      a reference for it.
1183          */
1184         ipc_space_clean(task->itk_space);
1185
1186         /*
1187          * Clean out the address space, as we are going to be
1188          * getting a new one.
1189          */
1190         vm_map_remove(task->map, task->map->min_offset,
1191                       task->map->max_offset, VM_MAP_NO_FLAGS);
1192
1193         task->halting = FALSE;
1194 }
1195
1196 /*
1197  *      task_hold_locked:
1198  *
1199  *      Suspend execution of the specified task.
1200  *      This is a recursive-style suspension of the task, a count of
1201  *      suspends is maintained.
1202  *
1203  *      CONDITIONS: the task is locked and active.
1204  */
1205 void
1206 task_hold_locked(
1207         register task_t         task)
1208 {
1209         register thread_t       thread;
1210
1211         assert(task->active);
1212
1213         if (task->suspend_count++ > 0)
1214                 return;
1215
1216         /*
1217          *      Iterate through all the threads and hold them.
1218          */
1219         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1220                 thread_mtx_lock(thread);
1221                 thread_hold(thread);
1222                 thread_mtx_unlock(thread);
1223         }
1224 }
1225
1226 /*
1227  *      task_hold:
1228  *
1229  *      Same as the internal routine above, except that is must lock
1230  *      and verify that the task is active.  This differs from task_suspend
1231  *      in that it places a kernel hold on the task rather than just a
1232  *      user-level hold.  This keeps users from over resuming and setting
1233  *      it running out from under the kernel.
1234  *
1235  *      CONDITIONS: the caller holds a reference on the task
1236  */
1237 kern_return_t
1238 task_hold(
1239         register task_t         task)
1240 {
1241         if (task == TASK_NULL)
1242                 return (KERN_INVALID_ARGUMENT);
1243
1244         task_lock(task);
1245
1246         if (!task->active) {
1247                 task_unlock(task);
1248
1249                 return (KERN_FAILURE);
1250         }
1251
1252         task_hold_locked(task);
1253         task_unlock(task);
1254
1255         return (KERN_SUCCESS);
1256 }
1257
1258 kern_return_t
1259 task_wait(
1260                 task_t          task,
1261                 boolean_t       until_not_runnable)
1262 {
1263         if (task == TASK_NULL)
1264                 return (KERN_INVALID_ARGUMENT);
1265
1266         task_lock(task);
1267
1268         if (!task->active) {
1269                 task_unlock(task);
1270
1271                 return (KERN_FAILURE);
1272         }
1273
1274         task_wait_locked(task, until_not_runnable);
1275         task_unlock(task);
1276
1277         return (KERN_SUCCESS);
1278 }
1279
1280 /*
1281  *      task_wait_locked:
1282  *
1283  *      Wait for all threads in task to stop.
1284  *
1285  * Conditions:
1286  *      Called with task locked, active, and held.
1287  */
1288 void
1289 task_wait_locked(
1290         register task_t         task,
1291         boolean_t               until_not_runnable)
1292 {
1293         register thread_t       thread, self;
1294
1295         assert(task->active);
1296         assert(task->suspend_count > 0);
1297
1298         self = current_thread();
1299
1300         /*
1301          *      Iterate through all the threads and wait for them to
1302          *      stop.  Do not wait for the current thread if it is within
1303          *      the task.
1304          */
1305         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1306                 if (thread != self)
1307                         thread_wait(thread, until_not_runnable);
1308         }
1309 }
1310
1311 /*
1312  *      task_release_locked:
1313  *
1314  *      Release a kernel hold on a task.
1315  *
1316  *      CONDITIONS: the task is locked and active
1317  */
1318 void
1319 task_release_locked(
1320         register task_t         task)
1321 {
1322         register thread_t       thread;
1323
1324         assert(task->active);
1325         assert(task->suspend_count > 0);
1326
1327         if (--task->suspend_count > 0)
1328                 return;
1329
1330         queue_iterate(&task->threads, thread, thread_t, task_threads) {
1331                 thread_mtx_lock(thread);
1332                 thread_release(thread);
1333                 thread_mtx_unlock(thread);
1334         }
1335 }
1336
1337 /*
1338  *      task_release:
1339  *
1340  *      Same as the internal routine above, except that it must lock
1341  *      and verify that the task is active.
1342  *
1343  *      CONDITIONS: The caller holds a reference to the task
1344  */
1345 kern_return_t
1346 task_release(
1347         task_t          task)
1348 {
1349         if (task == TASK_NULL)
1350                 return (KERN_INVALID_ARGUMENT);
1351
1352         task_lock(task);
1353
1354         if (!task->active) {
1355                 task_unlock(task);
1356
1357                 return (KERN_FAILURE);
1358         }
1359
1360         task_release_locked(task);
1361         task_unlock(task);
1362
1363         return (KERN_SUCCESS);
1364 }
1365
1366 kern_return_t
1367 task_threads(
1368         task_t                                  task,
1369         thread_act_array_t              *threads_out,
1370         mach_msg_type_number_t  *count)
1371 {
1372         mach_msg_type_number_t  actual;
1373         thread_t                                *thread_list;
1374         thread_t                                thread;
1375         vm_size_t                               size, size_needed;
1376         void                                    *addr;
1377         unsigned int                    i, j;
1378
1379         if (task == TASK_NULL)
1380                 return (KERN_INVALID_ARGUMENT);
1381
1382         size = 0; addr = NULL;
1383
1384         for (;;) {
1385                 task_lock(task);
1386                 if (!task->active) {
1387                         task_unlock(task);
1388
1389                         if (size != 0)
1390                                 kfree(addr, size);
1391
1392                         return (KERN_FAILURE);
1393                 }
1394
1395                 actual = task->thread_count;
1396
1397                 /* do we have the memory we need? */
1398                 size_needed = actual * sizeof (mach_port_t);
1399                 if (size_needed <= size)
1400                         break;
1401
1402                 /* unlock the task and allocate more memory */
1403                 task_unlock(task);
1404
1405                 if (size != 0)
1406                         kfree(addr, size);
1407
1408                 assert(size_needed > 0);
1409                 size = size_needed;
1410
1411                 addr = kalloc(size);
1412                 if (addr == 0)
1413                         return (KERN_RESOURCE_SHORTAGE);
1414         }
1415
1416         /* OK, have memory and the task is locked & active */
1417         thread_list = (thread_t *)addr;
1418
1419         i = j = 0;
1420
1421         for (thread = (thread_t)queue_first(&task->threads); i < actual;
1422                                 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
1423                 thread_reference_internal(thread);
1424                 thread_list[j++] = thread;
1425         }
1426
1427         assert(queue_end(&task->threads, (queue_entry_t)thread));
1428
1429         actual = j;
1430         size_needed = actual * sizeof (mach_port_t);
1431
1432         /* can unlock task now that we've got the thread refs */
1433         task_unlock(task);
1434
1435         if (actual == 0) {
1436                 /* no threads, so return null pointer and deallocate memory */
1437
1438                 *threads_out = NULL;
1439                 *count = 0;
1440
1441                 if (size != 0)
1442                         kfree(addr, size);
1443         }
1444         else {
1445                 /* if we allocated too much, must copy */
1446
1447                 if (size_needed < size) {
1448                         void *newaddr;
1449
1450                         newaddr = kalloc(size_needed);
1451                         if (newaddr == 0) {
1452                                 for (i = 0; i < actual; ++i)
1453                                         thread_deallocate(thread_list[i]);
1454                                 kfree(addr, size);
1455                                 return (KERN_RESOURCE_SHORTAGE);
1456                         }
1457
1458                         bcopy(addr, newaddr, size_needed);
1459                         kfree(addr, size);
1460                         thread_list = (thread_t *)newaddr;
1461                 }
1462
1463                 *threads_out = thread_list;
1464                 *count = actual;
1465
1466                 /* do the conversion that Mig should handle */
1467
1468                 for (i = 0; i < actual; ++i)
1469                         ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
1470         }
1471
1472         return (KERN_SUCCESS);
1473 }
1474
1475 #define TASK_HOLD_NORMAL        0
1476 #define TASK_HOLD_PIDSUSPEND    1
1477 #define TASK_HOLD_LEGACY        2
1478 #define TASK_HOLD_LEGACY_ALL    3
1479
1480 static kern_return_t
1481 place_task_hold    (
1482         register task_t task,
1483         int mode)
1484 {
1485         if (!task->active) {
1486                 return (KERN_FAILURE);
1487         }
1488
1489         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1490             MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
1491             proc_pid(task->bsd_info), ((thread_t)queue_first(&task->threads))->thread_id,
1492             task->user_stop_count, task->user_stop_count + 1, 0);
1493
1494 #if MACH_ASSERT
1495         current_task()->suspends_outstanding++;
1496 #endif
1497
1498         if (mode == TASK_HOLD_LEGACY)
1499                 task->legacy_stop_count++;
1500
1501         if (task->user_stop_count++ > 0) {
1502                 /*
1503                  *      If the stop count was positive, the task is
1504                  *      already stopped and we can exit.
1505                  */
1506                 return (KERN_SUCCESS);
1507         }
1508
1509         /*
1510          * Put a kernel-level hold on the threads in the task (all
1511          * user-level task suspensions added together represent a
1512          * single kernel-level hold).  We then wait for the threads
1513          * to stop executing user code.
1514          */
1515         task_hold_locked(task);
1516         task_wait_locked(task, FALSE);
1517
1518         return (KERN_SUCCESS);
1519 }
1520
1521 static kern_return_t
1522 release_task_hold    (
1523         register task_t         task,
1524         int                     mode)
1525 {
1526         register boolean_t release = FALSE;
1527
1528         if (!task->active) {
1529                 return (KERN_FAILURE);
1530         }
1531
1532         if (mode == TASK_HOLD_PIDSUSPEND) {
1533             if (task->pidsuspended == FALSE) {
1534                     return (KERN_FAILURE);
1535             }
1536             task->pidsuspended = FALSE;
1537         }
1538
1539         if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
1540
1541                 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1542                     MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
1543                     proc_pid(task->bsd_info), ((thread_t)queue_first(&task->threads))->thread_id,
1544                     task->user_stop_count, mode, task->legacy_stop_count);
1545
1546 #if MACH_ASSERT
1547                 /*
1548                  * This is obviously not robust; if we suspend one task and then resume a different one,
1549                  * we'll fly under the radar. This is only meant to catch the common case of a crashed
1550                  * or buggy suspender.
1551                  */
1552                 current_task()->suspends_outstanding--;
1553 #endif
1554
1555                 if (mode == TASK_HOLD_LEGACY_ALL) {
1556                         if (task->legacy_stop_count >= task->user_stop_count) {
1557                                 task->user_stop_count = 0;
1558                                 release = TRUE;
1559                         } else {
1560                                 task->user_stop_count -= task->legacy_stop_count;
1561                         }
1562                         task->legacy_stop_count = 0;
1563                 } else {
1564                         if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
1565                                 task->legacy_stop_count--;
1566                         if (--task->user_stop_count == 0)
1567                                 release = TRUE;
1568                 }
1569         }
1570         else {
1571                 return (KERN_FAILURE);
1572         }
1573
1574         /*
1575          *      Release the task if necessary.
1576          */
1577         if (release)
1578                 task_release_locked(task);
1579
1580     return (KERN_SUCCESS);
1581 }
1582
1583
1584 /*
1585  *      task_suspend:
1586  *
1587  *      Implement an (old-fashioned) user-level suspension on a task.
1588  *
1589  *      Because the user isn't expecting to have to manage a suspension
1590  *      token, we'll track it for him in the kernel in the form of a naked
1591  *      send right to the task's resume port.  All such send rights
1592  *      account for a single suspension against the task (unlike task_suspend2()
1593  *      where each caller gets a unique suspension count represented by a
1594  *      unique send-once right).
1595  *
1596  * Conditions:
1597  *      The caller holds a reference to the task
1598  */
1599 kern_return_t
1600 task_suspend(
1601         register task_t         task)
1602 {
1603         kern_return_t                   kr;
1604         mach_port_t                     port, send, old_notify;
1605         mach_port_name_t                name;
1606
1607         if (task == TASK_NULL || task == kernel_task)
1608                 return (KERN_INVALID_ARGUMENT);
1609
1610         task_lock(task);
1611
1612         /*
1613          * Claim a send right on the task resume port, and request a no-senders
1614          * notification on that port (if none outstanding).
1615          */
1616         if (task->itk_resume == IP_NULL) {
1617                 task->itk_resume = ipc_port_alloc_kernel();
1618                 if (!IP_VALID(task->itk_resume))
1619                         panic("failed to create resume port");
1620                 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
1621         }
1622
1623         port = task->itk_resume;
1624         ip_lock(port);
1625         assert(ip_active(port));
1626
1627         send = ipc_port_make_send_locked(port);
1628         assert(IP_VALID(send));
1629
1630         if (port->ip_nsrequest == IP_NULL) {
1631                 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
1632                 assert(old_notify == IP_NULL);
1633                 /* port unlocked */
1634         } else {
1635                 ip_unlock(port);
1636         }
1637
1638         /*
1639          * place a legacy hold on the task.
1640          */
1641         kr = place_task_hold(task, TASK_HOLD_LEGACY);
1642         if (kr != KERN_SUCCESS) {
1643                 task_unlock(task);
1644                 ipc_port_release_send(send);
1645                 return kr;
1646         }
1647
1648         task_unlock(task);
1649
1650         /*
1651          * Copyout the send right into the calling task's IPC space.  It won't know it is there,
1652          * but we'll look it up when calling a traditional resume.  Any IPC operations that
1653          * deallocate the send right will auto-release the suspension.
1654          */
1655         if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
1656                 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
1657                 printf("warning: %s(%d) failed to copyout suspension token for task %s(%d) with error: %d\n",
1658                         proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
1659                         proc_name_address(task->bsd_info), proc_pid(task->bsd_info), kr);
1660                 return (kr);
1661         }
1662
1663         return (kr);
1664 }
1665
1666 /*
1667  *      task_resume:
1668  *              Release a user hold on a task.
1669  *
1670  * Conditions:
1671  *              The caller holds a reference to the task
1672  */
1673 kern_return_t
1674 task_resume(
1675         register task_t task)
1676 {
1677         kern_return_t    kr;
1678         mach_port_name_t resume_port_name;
1679         ipc_entry_t              resume_port_entry;
1680         ipc_space_t              space = current_task()->itk_space;
1681
1682         if (task == TASK_NULL || task == kernel_task )
1683                 return (KERN_INVALID_ARGUMENT);
1684
1685         /* release a legacy task hold */
1686         task_lock(task);
1687         kr = release_task_hold(task, TASK_HOLD_LEGACY);
1688         task_unlock(task);
1689
1690         is_write_lock(space);
1691         if (is_active(space) && IP_VALID(task->itk_resume) &&
1692             ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
1693                 /*
1694                  * We found a suspension token in the caller's IPC space. Release a send right to indicate that
1695                  * we are holding one less legacy hold on the task from this caller.  If the release failed,
1696                  * go ahead and drop all the rights, as someone either already released our holds or the task
1697                  * is gone.
1698                  */
1699                 if (kr == KERN_SUCCESS)
1700                         ipc_right_dealloc(space, resume_port_name, resume_port_entry);
1701                 else
1702                         ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
1703                 /* space unlocked */
1704         } else {
1705                 is_write_unlock(space);
1706                 if (kr == KERN_SUCCESS)
1707                         printf("warning: %s(%d) performed out-of-band resume on %s(%d)\n",
1708                                proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
1709                                proc_name_address(task->bsd_info), proc_pid(task->bsd_info));
1710         }
1711
1712         return kr;
1713 }
1714
1715 /*
1716  * Suspend the target task.
1717  * Making/holding a token/reference/port is the callers responsibility.
1718  */
1719 kern_return_t
1720 task_suspend_internal(task_t task)
1721 {
1722         kern_return_t    kr;
1723
1724         if (task == TASK_NULL || task == kernel_task)
1725                 return (KERN_INVALID_ARGUMENT);
1726
1727         task_lock(task);
1728         kr = place_task_hold(task, TASK_HOLD_NORMAL);
1729         task_unlock(task);
1730         return (kr);
1731 }
1732
1733 /*
1734  * Suspend the target task, and return a suspension token. The token
1735  * represents a reference on the suspended task.
1736  */
1737 kern_return_t
1738 task_suspend2(
1739         register task_t                 task,
1740         task_suspension_token_t *suspend_token)
1741 {
1742         kern_return_t    kr;
1743
1744         kr = task_suspend_internal(task);
1745         if (kr != KERN_SUCCESS) {
1746                 *suspend_token = TASK_NULL;
1747                 return (kr);
1748         }
1749
1750         /*
1751          * Take a reference on the target task and return that to the caller
1752          * as a "suspension token," which can be converted into an SO right to
1753          * the now-suspended task's resume port.
1754          */
1755         task_reference_internal(task);
1756         *suspend_token = task;
1757
1758         return (KERN_SUCCESS);
1759 }
1760
1761 /*
1762  * Resume the task
1763  * (reference/token/port management is caller's responsibility).
1764  */
1765 kern_return_t
1766 task_resume_internal(
1767         register task_suspension_token_t                task)
1768 {
1769         kern_return_t kr;
1770
1771         if (task == TASK_NULL || task == kernel_task)
1772                 return (KERN_INVALID_ARGUMENT);
1773
1774         task_lock(task);
1775         kr = release_task_hold(task, TASK_HOLD_NORMAL);
1776         task_unlock(task);
1777         return (kr);
1778 }
1779
1780 /*
1781  * Resume the task using a suspension token. Consumes the token's ref.
1782  */
1783 kern_return_t
1784 task_resume2(
1785         register task_suspension_token_t                task)
1786 {
1787         kern_return_t kr;
1788
1789         kr = task_resume_internal(task);
1790         task_suspension_token_deallocate(task);
1791
1792         return (kr);
1793 }
1794
1795 boolean_t
1796 task_suspension_notify(mach_msg_header_t *request_header)
1797 {
1798         ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
1799         task_t task = convert_port_to_task_suspension_token(port);
1800         mach_msg_type_number_t not_count;
1801
1802         if (task == TASK_NULL || task == kernel_task)
1803                 return TRUE;  /* nothing to do */
1804
1805         switch (request_header->msgh_id) {
1806
1807         case MACH_NOTIFY_SEND_ONCE:
1808                 /* release the hold held by this specific send-once right */
1809                 task_lock(task);
1810                 release_task_hold(task, TASK_HOLD_NORMAL);
1811                 task_unlock(task);
1812                 break;
1813
1814         case MACH_NOTIFY_NO_SENDERS:
1815                 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
1816
1817                 task_lock(task);
1818                 ip_lock(port);
1819                 if (port->ip_mscount == not_count) {
1820
1821                         /* release all the [remaining] outstanding legacy holds */
1822                         assert(port->ip_nsrequest == IP_NULL);
1823                         ip_unlock(port);
1824                         release_task_hold(task, TASK_HOLD_LEGACY_ALL);
1825                         task_unlock(task);
1826
1827                 } else if (port->ip_nsrequest == IP_NULL) {
1828                         ipc_port_t old_notify;
1829
1830                         task_unlock(task);
1831                         /* new send rights, re-arm notification at current make-send count */
1832                         ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
1833                         assert(old_notify == IP_NULL);
1834                         /* port unlocked */
1835                 } else {
1836                         ip_unlock(port);
1837                         task_unlock(task);
1838                 }
1839                 break;
1840
1841         default:
1842                 break;
1843         }
1844
1845         task_suspension_token_deallocate(task); /* drop token reference */
1846         return TRUE;
1847 }
1848
1849 kern_return_t
1850 task_pidsuspend_locked(task_t task)
1851 {
1852         kern_return_t kr;
1853
1854         if (task->pidsuspended) {
1855                 kr = KERN_FAILURE;
1856                 goto out;
1857         }
1858
1859         task->pidsuspended = TRUE;
1860
1861         kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
1862         if (kr != KERN_SUCCESS) {
1863                 task->pidsuspended = FALSE;
1864         }
1865 out:
1866         return(kr);
1867 }
1868
1869
1870 /*
1871  *      task_pidsuspend:
1872  *
1873  *      Suspends a task by placing a hold on its threads.
1874  *
1875  * Conditions:
1876  *      The caller holds a reference to the task
1877  */
1878 kern_return_t
1879 task_pidsuspend(
1880         register task_t         task)
1881 {
1882         kern_return_t    kr;
1883
1884         if (task == TASK_NULL || task == kernel_task)
1885                 return (KERN_INVALID_ARGUMENT);
1886
1887         task_lock(task);
1888
1889         kr = task_pidsuspend_locked(task);
1890
1891         task_unlock(task);
1892
1893         return (kr);
1894 }
1895
1896 /* If enabled, we bring all the frozen pages back in prior to resumption; otherwise, they're faulted back in on demand */
1897 #define THAW_ON_RESUME 1
1898
1899 /*
1900  *      task_pidresume:
1901  *              Resumes a previously suspended task.
1902  *
1903  * Conditions:
1904  *              The caller holds a reference to the task
1905  */
1906 kern_return_t
1907 task_pidresume(
1908         register task_t task)
1909 {
1910         kern_return_t    kr;
1911
1912         if (task == TASK_NULL || task == kernel_task)
1913                 return (KERN_INVALID_ARGUMENT);
1914
1915         task_lock(task);
1916
1917 #if (CONFIG_FREEZE && THAW_ON_RESUME)
1918
1919         while (task->changing_freeze_state) {
1920
1921                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
1922                 task_unlock(task);
1923                 thread_block(THREAD_CONTINUE_NULL);
1924
1925                 task_lock(task);
1926         }
1927         task->changing_freeze_state = TRUE;
1928 #endif
1929
1930         kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
1931
1932         task_unlock(task);
1933
1934 #if (CONFIG_FREEZE && THAW_ON_RESUME)
1935         if ((kr == KERN_SUCCESS) && (task->frozen == TRUE)) {
1936
1937                 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
1938
1939                         kr = KERN_SUCCESS;
1940                 } else {
1941
1942                         kr = vm_map_thaw(task->map);
1943                 }
1944         }
1945         task_lock(task);
1946
1947         if (kr == KERN_SUCCESS)
1948                 task->frozen = FALSE;
1949         task->changing_freeze_state = FALSE;
1950         thread_wakeup(&task->changing_freeze_state);
1951
1952         task_unlock(task);
1953 #endif
1954
1955         return (kr);
1956 }
1957
1958 #if CONFIG_FREEZE
1959
1960 /*
1961  *      task_freeze:
1962  *
1963  *      Freeze a task.
1964  *
1965  * Conditions:
1966  *      The caller holds a reference to the task
1967  */
1968 kern_return_t
1969 task_freeze(
1970         register task_t    task,
1971         uint32_t           *purgeable_count,
1972         uint32_t           *wired_count,
1973         uint32_t           *clean_count,
1974         uint32_t           *dirty_count,
1975         uint32_t           dirty_budget,
1976         boolean_t          *shared,
1977         boolean_t          walk_only)
1978 {
1979         kern_return_t kr;
1980
1981         if (task == TASK_NULL || task == kernel_task)
1982                 return (KERN_INVALID_ARGUMENT);
1983
1984         task_lock(task);
1985
1986         while (task->changing_freeze_state) {
1987
1988                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
1989                 task_unlock(task);
1990                 thread_block(THREAD_CONTINUE_NULL);
1991
1992                 task_lock(task);
1993         }
1994         if (task->frozen) {
1995                 task_unlock(task);
1996                 return (KERN_FAILURE);
1997         }
1998         task->changing_freeze_state = TRUE;
1999
2000         task_unlock(task);
2001
2002         if (walk_only) {
2003                 kr = vm_map_freeze_walk(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2004         } else {
2005                 kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2006         }
2007
2008         task_lock(task);
2009
2010         if (walk_only == FALSE && kr == KERN_SUCCESS)
2011                 task->frozen = TRUE;
2012         task->changing_freeze_state = FALSE;
2013         thread_wakeup(&task->changing_freeze_state);
2014
2015         task_unlock(task);
2016
2017         return (kr);
2018 }
2019
2020 /*
2021  *      task_thaw:
2022  *
2023  *      Thaw a currently frozen task.
2024  *
2025  * Conditions:
2026  *      The caller holds a reference to the task
2027  */
2028 extern void
2029 vm_consider_waking_compactor_swapper(void);
2030
2031 kern_return_t
2032 task_thaw(
2033         register task_t         task)
2034 {
2035         kern_return_t kr;
2036
2037         if (task == TASK_NULL || task == kernel_task)
2038                 return (KERN_INVALID_ARGUMENT);
2039
2040         task_lock(task);
2041
2042         while (task->changing_freeze_state) {
2043
2044                 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2045                 task_unlock(task);
2046                 thread_block(THREAD_CONTINUE_NULL);
2047
2048                 task_lock(task);
2049         }
2050         if (!task->frozen) {
2051                 task_unlock(task);
2052                 return (KERN_FAILURE);
2053         }
2054         task->changing_freeze_state = TRUE;
2055
2056         if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) {
2057                 task_unlock(task);
2058
2059                 kr = vm_map_thaw(task->map);
2060
2061                 task_lock(task);
2062
2063                 if (kr == KERN_SUCCESS)
2064                         task->frozen = FALSE;
2065         } else {
2066                 task->frozen = FALSE;
2067                 kr = KERN_SUCCESS;
2068         }
2069
2070         task->changing_freeze_state = FALSE;
2071         thread_wakeup(&task->changing_freeze_state);
2072
2073         task_unlock(task);
2074
2075         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2076                 vm_consider_waking_compactor_swapper();
2077         }
2078
2079         return (kr);
2080 }
2081
2082 #endif /* CONFIG_FREEZE */
2083
2084 kern_return_t
2085 host_security_set_task_token(
2086         host_security_t  host_security,
2087         task_t           task,
2088         security_token_t sec_token,
2089         audit_token_t    audit_token,
2090         host_priv_t      host_priv)
2091 {
2092         ipc_port_t       host_port;
2093         kern_return_t    kr;
2094
2095         if (task == TASK_NULL)
2096                 return(KERN_INVALID_ARGUMENT);
2097
2098         if (host_security == HOST_NULL)
2099                 return(KERN_INVALID_SECURITY);
2100
2101         task_lock(task);
2102         task->sec_token = sec_token;
2103         task->audit_token = audit_token;
2104
2105         task_unlock(task);
2106
2107         if (host_priv != HOST_PRIV_NULL) {
2108                 kr = host_get_host_priv_port(host_priv, &host_port);
2109         } else {
2110                 kr = host_get_host_port(host_priv_self(), &host_port);
2111         }
2112         assert(kr == KERN_SUCCESS);
2113         kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
2114         return(kr);
2115 }
2116
2117 /*
2118  * This routine was added, pretty much exclusively, for registering the
2119  * RPC glue vector for in-kernel short circuited tasks.  Rather than
2120  * removing it completely, I have only disabled that feature (which was
2121  * the only feature at the time).  It just appears that we are going to
2122  * want to add some user data to tasks in the future (i.e. bsd info,
2123  * task names, etc...), so I left it in the formal task interface.
2124  */
2125 kern_return_t
2126 task_set_info(
2127         task_t          task,
2128         task_flavor_t   flavor,
2129         __unused task_info_t    task_info_in,           /* pointer to IN array */
2130         __unused mach_msg_type_number_t task_info_count)
2131 {
2132         if (task == TASK_NULL)
2133                 return(KERN_INVALID_ARGUMENT);
2134
2135         switch (flavor) {
2136             default:
2137                 return (KERN_INVALID_ARGUMENT);
2138         }
2139         return (KERN_SUCCESS);
2140 }
2141
2142 kern_return_t
2143 task_info(
2144         task_t                  task,
2145         task_flavor_t           flavor,
2146         task_info_t             task_info_out,
2147         mach_msg_type_number_t  *task_info_count)
2148 {
2149         kern_return_t error = KERN_SUCCESS;
2150
2151         if (task == TASK_NULL)
2152                 return (KERN_INVALID_ARGUMENT);
2153
2154         task_lock(task);
2155
2156         if ((task != current_task()) && (!task->active)) {
2157                 task_unlock(task);
2158                 return (KERN_INVALID_ARGUMENT);
2159         }
2160
2161         switch (flavor) {
2162
2163         case TASK_BASIC_INFO_32:
2164         case TASK_BASIC2_INFO_32:
2165         {
2166                 task_basic_info_32_t    basic_info;
2167                 vm_map_t                                map;
2168                 clock_sec_t                             secs;
2169                 clock_usec_t                    usecs;
2170
2171                 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
2172                     error = KERN_INVALID_ARGUMENT;
2173                     break;
2174                 }
2175
2176                 basic_info = (task_basic_info_32_t)task_info_out;
2177
2178                 map = (task == kernel_task)? kernel_map: task->map;
2179                 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
2180                 if (flavor == TASK_BASIC2_INFO_32) {
2181                         /*
2182                          * The "BASIC2" flavor gets the maximum resident
2183                          * size instead of the current resident size...
2184                          */
2185                         basic_info->resident_size = pmap_resident_max(map->pmap);
2186                 } else {
2187                         basic_info->resident_size = pmap_resident_count(map->pmap);
2188                 }
2189                 basic_info->resident_size *= PAGE_SIZE;
2190
2191                 basic_info->policy = ((task != kernel_task)?
2192                                                                                   POLICY_TIMESHARE: POLICY_RR);
2193                 basic_info->suspend_count = task->user_stop_count;
2194
2195                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2196                 basic_info->user_time.seconds =
2197                         (typeof(basic_info->user_time.seconds))secs;
2198                 basic_info->user_time.microseconds = usecs;
2199
2200                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2201                 basic_info->system_time.seconds =
2202                         (typeof(basic_info->system_time.seconds))secs;
2203                 basic_info->system_time.microseconds = usecs;
2204
2205                 *task_info_count = TASK_BASIC_INFO_32_COUNT;
2206                 break;
2207         }
2208
2209         case TASK_BASIC_INFO_64:
2210         {
2211                 task_basic_info_64_t    basic_info;
2212                 vm_map_t                                map;
2213                 clock_sec_t                             secs;
2214                 clock_usec_t                    usecs;
2215
2216                 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
2217                     error = KERN_INVALID_ARGUMENT;
2218                     break;
2219                 }
2220
2221                 basic_info = (task_basic_info_64_t)task_info_out;
2222
2223                 map = (task == kernel_task)? kernel_map: task->map;
2224                 basic_info->virtual_size  = map->size;
2225                 basic_info->resident_size =
2226                         (mach_vm_size_t)(pmap_resident_count(map->pmap))
2227                         * PAGE_SIZE_64;
2228
2229                 basic_info->policy = ((task != kernel_task)?
2230                                                                                   POLICY_TIMESHARE: POLICY_RR);
2231                 basic_info->suspend_count = task->user_stop_count;
2232
2233                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2234                 basic_info->user_time.seconds =
2235                         (typeof(basic_info->user_time.seconds))secs;
2236                 basic_info->user_time.microseconds = usecs;
2237
2238                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2239                 basic_info->system_time.seconds =
2240                         (typeof(basic_info->system_time.seconds))secs;
2241                 basic_info->system_time.microseconds = usecs;
2242
2243                 *task_info_count = TASK_BASIC_INFO_64_COUNT;
2244                 break;
2245         }
2246
2247         case MACH_TASK_BASIC_INFO:
2248         {
2249                 mach_task_basic_info_t  basic_info;
2250                 vm_map_t                map;
2251                 clock_sec_t             secs;
2252                 clock_usec_t            usecs;
2253
2254                 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
2255                     error = KERN_INVALID_ARGUMENT;
2256                     break;
2257                 }
2258
2259                 basic_info = (mach_task_basic_info_t)task_info_out;
2260
2261                 map = (task == kernel_task) ? kernel_map : task->map;
2262
2263                 basic_info->virtual_size  = map->size;
2264
2265                 basic_info->resident_size =
2266                     (mach_vm_size_t)(pmap_resident_count(map->pmap));
2267                 basic_info->resident_size *= PAGE_SIZE_64;
2268
2269                 basic_info->resident_size_max =
2270                     (mach_vm_size_t)(pmap_resident_max(map->pmap));
2271                 basic_info->resident_size_max *= PAGE_SIZE_64;
2272
2273                 basic_info->policy = ((task != kernel_task) ?
2274                                       POLICY_TIMESHARE : POLICY_RR);
2275
2276                 basic_info->suspend_count = task->user_stop_count;
2277
2278                 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2279                 basic_info->user_time.seconds =
2280                     (typeof(basic_info->user_time.seconds))secs;
2281                 basic_info->user_time.microseconds = usecs;
2282
2283                 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2284                 basic_info->system_time.seconds =
2285                     (typeof(basic_info->system_time.seconds))secs;
2286                 basic_info->system_time.microseconds = usecs;
2287
2288                 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
2289                 break;
2290         }
2291
2292         case TASK_THREAD_TIMES_INFO:
2293         {
2294                 register task_thread_times_info_t       times_info;
2295                 register thread_t                                       thread;
2296
2297                 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
2298                     error = KERN_INVALID_ARGUMENT;
2299                     break;
2300                 }
2301
2302                 times_info = (task_thread_times_info_t) task_info_out;
2303                 times_info->user_time.seconds = 0;
2304                 times_info->user_time.microseconds = 0;
2305                 times_info->system_time.seconds = 0;
2306                 times_info->system_time.microseconds = 0;
2307
2308
2309                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2310                         time_value_t    user_time, system_time;
2311
2312                         if (thread->options & TH_OPT_IDLE_THREAD)
2313                                 continue;
2314
2315                         thread_read_times(thread, &user_time, &system_time);
2316
2317                         time_value_add(&times_info->user_time, &user_time);
2318                         time_value_add(&times_info->system_time, &system_time);
2319                 }
2320
2321                 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
2322                 break;
2323         }
2324
2325         case TASK_ABSOLUTETIME_INFO:
2326         {
2327                 task_absolutetime_info_t        info;
2328                 register thread_t                       thread;
2329
2330                 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
2331                         error = KERN_INVALID_ARGUMENT;
2332                         break;
2333                 }
2334
2335                 info = (task_absolutetime_info_t)task_info_out;
2336                 info->threads_user = info->threads_system = 0;
2337
2338
2339                 info->total_user = task->total_user_time;
2340                 info->total_system = task->total_system_time;
2341
2342                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2343                         uint64_t        tval;
2344                         spl_t           x;
2345
2346                         if (thread->options & TH_OPT_IDLE_THREAD)
2347                                 continue;
2348
2349                         x = splsched();
2350                         thread_lock(thread);
2351
2352                         tval = timer_grab(&thread->user_timer);
2353                         info->threads_user += tval;
2354                         info->total_user += tval;
2355
2356                         tval = timer_grab(&thread->system_timer);
2357                         if (thread->precise_user_kernel_time) {
2358                                 info->threads_system += tval;
2359                                 info->total_system += tval;
2360                         } else {
2361                                 /* system_timer may represent either sys or user */
2362                                 info->threads_user += tval;
2363                                 info->total_user += tval;
2364                         }
2365
2366                         thread_unlock(thread);
2367                         splx(x);
2368                 }
2369
2370
2371                 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
2372                 break;
2373         }
2374
2375         case TASK_DYLD_INFO:
2376         {
2377                 task_dyld_info_t info;
2378
2379                 /*
2380                  * We added the format field to TASK_DYLD_INFO output.  For
2381                  * temporary backward compatibility, accept the fact that
2382                  * clients may ask for the old version - distinquished by the
2383                  * size of the expected result structure.
2384                  */
2385 #define TASK_LEGACY_DYLD_INFO_COUNT \
2386                 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
2387
2388                 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
2389                         error = KERN_INVALID_ARGUMENT;
2390                         break;
2391                 }
2392
2393                 info = (task_dyld_info_t)task_info_out;
2394                 info->all_image_info_addr = task->all_image_info_addr;
2395                 info->all_image_info_size = task->all_image_info_size;
2396
2397                 /* only set format on output for those expecting it */
2398                 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
2399                         info->all_image_info_format = task_has_64BitAddr(task) ?
2400                                                  TASK_DYLD_ALL_IMAGE_INFO_64 :
2401                                                  TASK_DYLD_ALL_IMAGE_INFO_32 ;
2402                         *task_info_count = TASK_DYLD_INFO_COUNT;
2403                 } else {
2404                         *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
2405                 }
2406                 break;
2407         }
2408
2409         case TASK_EXTMOD_INFO:
2410         {
2411                 task_extmod_info_t info;
2412                 void *p;
2413
2414                 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
2415                         error = KERN_INVALID_ARGUMENT;
2416                         break;
2417                 }
2418
2419                 info = (task_extmod_info_t)task_info_out;
2420
2421                 p = get_bsdtask_info(task);
2422                 if (p) {
2423                         proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
2424                 } else {
2425                         bzero(info->task_uuid, sizeof(info->task_uuid));
2426                 }
2427                 info->extmod_statistics = task->extmod_statistics;
2428                 *task_info_count = TASK_EXTMOD_INFO_COUNT;
2429
2430                 break;
2431         }
2432
2433         case TASK_KERNELMEMORY_INFO:
2434         {
2435                 task_kernelmemory_info_t        tkm_info;
2436                 ledger_amount_t                 credit, debit;
2437
2438                 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
2439                    error = KERN_INVALID_ARGUMENT;
2440                    break;
2441                 }
2442
2443                 tkm_info = (task_kernelmemory_info_t) task_info_out;
2444                 tkm_info->total_palloc = 0;
2445                 tkm_info->total_pfree = 0;
2446                 tkm_info->total_salloc = 0;
2447                 tkm_info->total_sfree = 0;
2448
2449                 if (task == kernel_task) {
2450                         /*
2451                          * All shared allocs/frees from other tasks count against
2452                          * the kernel private memory usage.  If we are looking up
2453                          * info for the kernel task, gather from everywhere.
2454                          */
2455                         task_unlock(task);
2456
2457                         /* start by accounting for all the terminated tasks against the kernel */
2458                         tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
2459                         tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
2460
2461                         /* count all other task/thread shared alloc/free against the kernel */
2462                         lck_mtx_lock(&tasks_threads_lock);
2463
2464                         /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
2465                         queue_iterate(&tasks, task, task_t, tasks) {
2466                                 if (task == kernel_task) {
2467                                         if (ledger_get_entries(task->ledger,
2468                                             task_ledgers.tkm_private, &credit,
2469                                             &debit) == KERN_SUCCESS) {
2470                                                 tkm_info->total_palloc += credit;
2471                                                 tkm_info->total_pfree += debit;
2472                                         }
2473                                 }
2474                                 if (!ledger_get_entries(task->ledger,
2475                                     task_ledgers.tkm_shared, &credit, &debit)) {
2476                                         tkm_info->total_palloc += credit;
2477                                         tkm_info->total_pfree += debit;
2478                                 }
2479                         }
2480                         lck_mtx_unlock(&tasks_threads_lock);
2481                 } else {
2482                         if (!ledger_get_entries(task->ledger,
2483                             task_ledgers.tkm_private, &credit, &debit)) {
2484                                 tkm_info->total_palloc = credit;
2485                                 tkm_info->total_pfree = debit;
2486                         }
2487                         if (!ledger_get_entries(task->ledger,
2488                             task_ledgers.tkm_shared, &credit, &debit)) {
2489                                 tkm_info->total_salloc = credit;
2490                                 tkm_info->total_sfree = debit;
2491                         }
2492                         task_unlock(task);
2493                 }
2494
2495                 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
2496                 return KERN_SUCCESS;
2497         }
2498
2499         /* OBSOLETE */
2500         case TASK_SCHED_FIFO_INFO:
2501         {
2502
2503                 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
2504                         error = KERN_INVALID_ARGUMENT;
2505                         break;
2506                 }
2507
2508                 error = KERN_INVALID_POLICY;
2509                 break;
2510         }
2511
2512         /* OBSOLETE */
2513         case TASK_SCHED_RR_INFO:
2514         {
2515                 register policy_rr_base_t       rr_base;
2516                 uint32_t quantum_time;
2517                 uint64_t quantum_ns;
2518
2519                 if (*task_info_count < POLICY_RR_BASE_COUNT) {
2520                         error = KERN_INVALID_ARGUMENT;
2521                         break;
2522                 }
2523
2524                 rr_base = (policy_rr_base_t) task_info_out;
2525
2526                 if (task != kernel_task) {
2527                         error = KERN_INVALID_POLICY;
2528                         break;
2529                 }
2530
2531                 rr_base->base_priority = task->priority;
2532
2533                 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
2534                 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
2535
2536                 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
2537
2538                 *task_info_count = POLICY_RR_BASE_COUNT;
2539                 break;
2540         }
2541
2542         /* OBSOLETE */
2543         case TASK_SCHED_TIMESHARE_INFO:
2544         {
2545                 register policy_timeshare_base_t        ts_base;
2546
2547                 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
2548                         error = KERN_INVALID_ARGUMENT;
2549                         break;
2550                 }
2551
2552                 ts_base = (policy_timeshare_base_t) task_info_out;
2553
2554                 if (task == kernel_task) {
2555                         error = KERN_INVALID_POLICY;
2556                         break;
2557                 }
2558
2559                 ts_base->base_priority = task->priority;
2560
2561                 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
2562                 break;
2563         }
2564
2565         case TASK_SECURITY_TOKEN:
2566         {
2567                 register security_token_t       *sec_token_p;
2568
2569                 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
2570                     error = KERN_INVALID_ARGUMENT;
2571                     break;
2572                 }
2573
2574                 sec_token_p = (security_token_t *) task_info_out;
2575
2576                 *sec_token_p = task->sec_token;
2577
2578                 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
2579                 break;
2580         }
2581
2582         case TASK_AUDIT_TOKEN:
2583         {
2584                 register audit_token_t  *audit_token_p;
2585
2586                 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
2587                     error = KERN_INVALID_ARGUMENT;
2588                     break;
2589                 }
2590
2591                 audit_token_p = (audit_token_t *) task_info_out;
2592
2593                 *audit_token_p = task->audit_token;
2594
2595                 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
2596                 break;
2597         }
2598
2599         case TASK_SCHED_INFO:
2600                 error = KERN_INVALID_ARGUMENT;
2601                 break;
2602
2603         case TASK_EVENTS_INFO:
2604         {
2605                 register task_events_info_t     events_info;
2606                 register thread_t                       thread;
2607
2608                 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
2609                    error = KERN_INVALID_ARGUMENT;
2610                    break;
2611                 }
2612
2613                 events_info = (task_events_info_t) task_info_out;
2614
2615
2616                 events_info->faults = task->faults;
2617                 events_info->pageins = task->pageins;
2618                 events_info->cow_faults = task->cow_faults;
2619                 events_info->messages_sent = task->messages_sent;
2620                 events_info->messages_received = task->messages_received;
2621                 events_info->syscalls_mach = task->syscalls_mach;
2622                 events_info->syscalls_unix = task->syscalls_unix;
2623
2624                 events_info->csw = task->c_switch;
2625
2626                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2627                         events_info->csw           += thread->c_switch;
2628                         events_info->syscalls_mach += thread->syscalls_mach;
2629                         events_info->syscalls_unix += thread->syscalls_unix;
2630                 }
2631
2632
2633                 *task_info_count = TASK_EVENTS_INFO_COUNT;
2634                 break;
2635         }
2636         case TASK_AFFINITY_TAG_INFO:
2637         {
2638                 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
2639                     error = KERN_INVALID_ARGUMENT;
2640                     break;
2641                 }
2642
2643                 error = task_affinity_info(task, task_info_out, task_info_count);
2644                 break;
2645         }
2646         case TASK_POWER_INFO:
2647         {
2648                 if (*task_info_count < TASK_POWER_INFO_COUNT) {
2649                         error = KERN_INVALID_ARGUMENT;
2650                         break;
2651                 }
2652
2653                 task_power_info_locked(task, (task_power_info_t)task_info_out);
2654                 break;
2655         }
2656
2657         case TASK_VM_INFO:
2658         case TASK_VM_INFO_PURGEABLE:
2659         {
2660                 task_vm_info_t          vm_info;
2661                 vm_map_t                map;
2662
2663                 if (*task_info_count < TASK_VM_INFO_COUNT) {
2664                     error = KERN_INVALID_ARGUMENT;
2665                     break;
2666                 }
2667
2668                 vm_info = (task_vm_info_t)task_info_out;
2669
2670                 if (task == kernel_task) {
2671                         map = kernel_map;
2672                         /* no lock */
2673                 } else {
2674                         map = task->map;
2675                         vm_map_lock_read(map);
2676                 }
2677
2678                 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
2679                 vm_info->region_count = map->hdr.nentries;
2680                 vm_info->page_size = vm_map_page_size(map);
2681
2682                 vm_info->resident_size = pmap_resident_count(map->pmap);
2683                 vm_info->resident_size *= PAGE_SIZE;
2684                 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
2685                 vm_info->resident_size_peak *= PAGE_SIZE;
2686
2687 #define _VM_INFO(_name) \
2688         vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
2689
2690                 _VM_INFO(device);
2691                 _VM_INFO(device_peak);
2692                 _VM_INFO(external);
2693                 _VM_INFO(external_peak);
2694                 _VM_INFO(internal);
2695                 _VM_INFO(internal_peak);
2696                 _VM_INFO(reusable);
2697                 _VM_INFO(reusable_peak);
2698                 _VM_INFO(compressed);
2699                 _VM_INFO(compressed_peak);
2700                 _VM_INFO(compressed_lifetime);
2701
2702                 vm_info->purgeable_volatile_pmap = 0;
2703                 vm_info->purgeable_volatile_resident = 0;
2704                 vm_info->purgeable_volatile_virtual = 0;
2705                 if (task == kernel_task) {
2706                         /*
2707                          * We do not maintain the detailed stats for the
2708                          * kernel_pmap, so just count everything as
2709                          * "internal"...
2710                          */
2711                         vm_info->internal = vm_info->resident_size;
2712                         /*
2713                          * ... but since the memory held by the VM compressor
2714                          * in the kernel address space ought to be attributed
2715                          * to user-space tasks, we subtract it from "internal"
2716                          * to give memory reporting tools a more accurate idea
2717                          * of what the kernel itself is actually using, instead
2718                          * of making it look like the kernel is leaking memory
2719                          * when the system is under memory pressure.
2720                          */
2721                         vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
2722                                               PAGE_SIZE);
2723                 } else {
2724                         mach_vm_size_t  volatile_virtual_size;
2725                         mach_vm_size_t  volatile_resident_size;
2726                         mach_vm_size_t  volatile_pmap_size;
2727                         kern_return_t   kr;
2728
2729                         if (flavor == TASK_VM_INFO_PURGEABLE) {
2730                                 kr = vm_map_query_volatile(
2731                                         map,
2732                                         &volatile_virtual_size,
2733                                         &volatile_resident_size,
2734                                         &volatile_pmap_size);
2735                                 if (kr == KERN_SUCCESS) {
2736                                         vm_info->purgeable_volatile_pmap =
2737                                                 volatile_pmap_size;
2738                                         vm_info->purgeable_volatile_resident =
2739                                                 volatile_resident_size;
2740                                         vm_info->purgeable_volatile_virtual =
2741                                                 volatile_virtual_size;
2742                                 }
2743                         }
2744                         vm_map_unlock_read(map);
2745                 }
2746
2747                 *task_info_count = TASK_VM_INFO_COUNT;
2748                 break;
2749         }
2750
2751         default:
2752                 error = KERN_INVALID_ARGUMENT;
2753         }
2754
2755         task_unlock(task);
2756         return (error);
2757 }
2758
2759 /*
2760  *      task_power_info
2761  *
2762  *      Returns power stats for the task.
2763  *      Note: Called with task locked.
2764  */
2765 void
2766 task_power_info_locked(
2767         task_t                  task,
2768         task_power_info_t       info)
2769 {
2770         thread_t                thread;
2771         ledger_amount_t         tmp;
2772
2773         task_lock_assert_owned(task);
2774
2775         ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
2776                 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
2777         ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
2778                 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
2779
2780         info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
2781         info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
2782
2783         info->total_user = task->total_user_time;
2784         info->total_system = task->total_system_time;
2785
2786         queue_iterate(&task->threads, thread, thread_t, task_threads) {
2787                 uint64_t        tval;
2788                 spl_t           x;
2789
2790                 if (thread->options & TH_OPT_IDLE_THREAD)
2791                         continue;
2792
2793                 x = splsched();
2794                 thread_lock(thread);
2795
2796                 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
2797                 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
2798
2799                 tval = timer_grab(&thread->user_timer);
2800                 info->total_user += tval;
2801
2802                 tval = timer_grab(&thread->system_timer);
2803                 if (thread->precise_user_kernel_time) {
2804                         info->total_system += tval;
2805                 } else {
2806                         /* system_timer may represent either sys or user */
2807                         info->total_user += tval;
2808                 }
2809
2810                 thread_unlock(thread);
2811                 splx(x);
2812         }
2813 }
2814
2815 kern_return_t
2816 task_purgable_info(
2817         task_t                  task,
2818         task_purgable_info_t    *stats)
2819 {
2820         if (task == TASK_NULL || stats == NULL)
2821                 return KERN_INVALID_ARGUMENT;
2822         /* Take task reference */
2823         task_reference(task);
2824         vm_purgeable_stats((vm_purgeable_info_t)stats, task);
2825         /* Drop task reference */
2826         task_deallocate(task);
2827         return KERN_SUCCESS;
2828 }
2829
2830 void
2831 task_vtimer_set(
2832         task_t          task,
2833         integer_t       which)
2834 {
2835         thread_t        thread;
2836         spl_t           x;
2837
2838         /* assert(task == current_task()); */ /* bogus assert 4803227 4807483 */
2839
2840         task_lock(task);
2841
2842         task->vtimers |= which;
2843
2844         switch (which) {
2845
2846         case TASK_VTIMER_USER:
2847                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2848                         x = splsched();
2849                         thread_lock(thread);
2850                         if (thread->precise_user_kernel_time)
2851                                 thread->vtimer_user_save = timer_grab(&thread->user_timer);
2852                         else
2853                                 thread->vtimer_user_save = timer_grab(&thread->system_timer);
2854                         thread_unlock(thread);
2855                         splx(x);
2856                 }
2857                 break;
2858
2859         case TASK_VTIMER_PROF:
2860                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2861                         x = splsched();
2862                         thread_lock(thread);
2863                         thread->vtimer_prof_save = timer_grab(&thread->user_timer);
2864                         thread->vtimer_prof_save += timer_grab(&thread->system_timer);
2865                         thread_unlock(thread);
2866                         splx(x);
2867                 }
2868                 break;
2869
2870         case TASK_VTIMER_RLIM:
2871                 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2872                         x = splsched();
2873                         thread_lock(thread);
2874                         thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
2875                         thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
2876                         thread_unlock(thread);
2877                         splx(x);
2878                 }
2879                 break;
2880         }
2881
2882         task_unlock(task);
2883 }
2884
2885 void
2886 task_vtimer_clear(
2887         task_t          task,
2888         integer_t       which)
2889 {
2890         assert(task == current_task());
2891
2892         task_lock(task);
2893
2894         task->vtimers &= ~which;
2895
2896         task_unlock(task);
2897 }
2898
2899 void
2900 task_vtimer_update(
2901 __unused
2902         task_t          task,
2903         integer_t       which,
2904         uint32_t        *microsecs)
2905 {
2906         thread_t        thread = current_thread();
2907         uint32_t        tdelt;
2908         clock_sec_t     secs;
2909         uint64_t        tsum;
2910
2911         assert(task == current_task());
2912
2913         assert(task->vtimers & which);
2914
2915         secs = tdelt = 0;
2916
2917         switch (which) {
2918
2919         case TASK_VTIMER_USER:
2920                 if (thread->precise_user_kernel_time) {
2921                         tdelt = (uint32_t)timer_delta(&thread->user_timer,
2922                                                                 &thread->vtimer_user_save);
2923                 } else {
2924                         tdelt = (uint32_t)timer_delta(&thread->system_timer,
2925                                                                 &thread->vtimer_user_save);
2926                 }
2927                 absolutetime_to_microtime(tdelt, &secs, microsecs);
2928                 break;
2929
2930         case TASK_VTIMER_PROF:
2931                 tsum = timer_grab(&thread->user_timer);
2932                 tsum += timer_grab(&thread->system_timer);
2933                 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
2934                 absolutetime_to_microtime(tdelt, &secs, microsecs);
2935                 /* if the time delta is smaller than a usec, ignore */
2936                 if (*microsecs != 0)
2937                         thread->vtimer_prof_save = tsum;
2938                 break;
2939
2940         case TASK_VTIMER_RLIM:
2941                 tsum = timer_grab(&thread->user_timer);
2942                 tsum += timer_grab(&thread->system_timer);
2943                 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
2944                 thread->vtimer_rlim_save = tsum;
2945                 absolutetime_to_microtime(tdelt, &secs, microsecs);
2946                 break;
2947         }
2948
2949 }
2950
2951 /*
2952  *      task_assign:
2953  *
2954  *      Change the assigned processor set for the task
2955  */
2956 kern_return_t
2957 task_assign(
2958         __unused task_t         task,
2959         __unused processor_set_t        new_pset,
2960         __unused boolean_t      assign_threads)
2961 {
2962         return(KERN_FAILURE);
2963 }
2964
2965 /*
2966  *      task_assign_default:
2967  *
2968  *      Version of task_assign to assign to default processor set.
2969  */
2970 kern_return_t
2971 task_assign_default(
2972         task_t          task,
2973         boolean_t       assign_threads)
2974 {
2975     return (task_assign(task, &pset0, assign_threads));
2976 }
2977
2978 /*
2979  *      task_get_assignment
2980  *
2981  *      Return name of processor set that task is assigned to.
2982  */
2983 kern_return_t
2984 task_get_assignment(
2985         task_t          task,
2986         processor_set_t *pset)
2987 {
2988         if (!task->active)
2989                 return(KERN_FAILURE);
2990
2991         *pset = &pset0;
2992
2993         return (KERN_SUCCESS);
2994 }
2995
2996
2997 /*
2998  *      task_policy
2999  *
3000  *      Set scheduling policy and parameters, both base and limit, for
3001  *      the given task. Policy must be a policy which is enabled for the
3002  *      processor set. Change contained threads if requested.
3003  */
3004 kern_return_t
3005 task_policy(
3006         __unused task_t                 task,
3007         __unused policy_t                       policy_id,
3008         __unused policy_base_t          base,
3009         __unused mach_msg_type_number_t count,
3010         __unused boolean_t                      set_limit,
3011         __unused boolean_t                      change)
3012 {
3013         return(KERN_FAILURE);
3014 }
3015
3016 /*
3017  *      task_set_policy
3018  *
3019  *      Set scheduling policy and parameters, both base and limit, for
3020  *      the given task. Policy can be any policy implemented by the
3021  *      processor set, whether enabled or not. Change contained threads
3022  *      if requested.
3023  */
3024 kern_return_t
3025 task_set_policy(
3026         __unused task_t                 task,
3027         __unused processor_set_t                pset,
3028         __unused policy_t                       policy_id,
3029         __unused policy_base_t          base,
3030         __unused mach_msg_type_number_t base_count,
3031         __unused policy_limit_t         limit,
3032         __unused mach_msg_type_number_t limit_count,
3033         __unused boolean_t                      change)
3034 {
3035         return(KERN_FAILURE);
3036 }
3037
3038 #if     FAST_TAS
3039 kern_return_t
3040 task_set_ras_pc(
3041         task_t          task,
3042         vm_offset_t     pc,
3043         vm_offset_t     endpc)
3044 {
3045         extern int fast_tas_debug;
3046
3047         if (fast_tas_debug) {
3048                 printf("task 0x%x: setting fast_tas to [0x%x, 0x%x]\n",
3049                        task, pc, endpc);
3050         }
3051         task_lock(task);
3052         task->fast_tas_base = pc;
3053         task->fast_tas_end =  endpc;
3054         task_unlock(task);
3055         return KERN_SUCCESS;
3056 }
3057 #else   /* FAST_TAS */
3058 kern_return_t
3059 task_set_ras_pc(
3060         __unused task_t task,
3061         __unused vm_offset_t    pc,
3062         __unused vm_offset_t    endpc)
3063 {
3064         return KERN_FAILURE;
3065 }
3066 #endif  /* FAST_TAS */
3067
3068 void
3069 task_synchronizer_destroy_all(task_t task)
3070 {
3071         semaphore_t     semaphore;
3072
3073         /*
3074          *  Destroy owned semaphores
3075          */
3076
3077         while (!queue_empty(&task->semaphore_list)) {
3078                 semaphore = (semaphore_t) queue_first(&task->semaphore_list);
3079                 (void) semaphore_destroy(task, semaphore);
3080         }
3081 }
3082
3083 /*
3084  * Install default (machine-dependent) initial thread state
3085  * on the task.  Subsequent thread creation will have this initial
3086  * state set on the thread by machine_thread_inherit_taskwide().
3087  * Flavors and structures are exactly the same as those to thread_set_state()
3088  */
3089 kern_return_t
3090 task_set_state(
3091         task_t task,
3092         int flavor,
3093         thread_state_t state,
3094         mach_msg_type_number_t state_count)
3095 {
3096         kern_return_t ret;
3097
3098         if (task == TASK_NULL) {
3099                 return (KERN_INVALID_ARGUMENT);
3100         }
3101
3102         task_lock(task);
3103
3104         if (!task->active) {
3105                 task_unlock(task);
3106                 return (KERN_FAILURE);
3107         }
3108
3109         ret = machine_task_set_state(task, flavor, state, state_count);
3110
3111         task_unlock(task);
3112         return ret;
3113 }
3114
3115 /*
3116  * Examine the default (machine-dependent) initial thread state
3117  * on the task, as set by task_set_state().  Flavors and structures
3118  * are exactly the same as those passed to thread_get_state().
3119  */
3120 kern_return_t
3121 task_get_state(
3122         task_t  task,
3123         int     flavor,
3124         thread_state_t state,
3125         mach_msg_type_number_t *state_count)
3126 {
3127         kern_return_t ret;
3128
3129         if (task == TASK_NULL) {
3130                 return (KERN_INVALID_ARGUMENT);
3131         }
3132
3133         task_lock(task);
3134
3135         if (!task->active) {
3136                 task_unlock(task);
3137                 return (KERN_FAILURE);
3138         }
3139
3140         ret = machine_task_get_state(task, flavor, state, state_count);
3141
3142         task_unlock(task);
3143         return ret;
3144 }
3145
3146 #if CONFIG_JETSAM
3147 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
3148
3149 void __attribute__((noinline))
3150 THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb)
3151 {
3152         task_t                                          task            = current_task();
3153         int                                                     pid         = 0;
3154         char                                    *procname       = (char *) "unknown";
3155         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
3156
3157 #ifdef MACH_BSD
3158         pid = proc_selfpid();
3159         if (task->bsd_info != NULL)
3160                 procname = proc_name_address(current_task()->bsd_info);
3161 #endif
3162
3163         if (hwm_user_cores) {
3164                 int                             error;
3165                 uint64_t                starttime, end;
3166                 clock_sec_t             secs = 0;
3167                 uint32_t                microsecs = 0;
3168
3169                 starttime = mach_absolute_time();
3170                 /*
3171                  * Trigger a coredump of this process. Don't proceed unless we know we won't
3172                  * be filling up the disk; and ignore the core size resource limit for this
3173                  * core file.
3174                  */
3175                 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, 1)) != 0) {
3176                         printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
3177                 }
3178                 /*
3179                 * coredump() leaves the task suspended.
3180                 */
3181                 task_resume_internal(current_task());
3182
3183                 end = mach_absolute_time();
3184                 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
3185                 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
3186                        proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
3187         }
3188
3189         if (disable_exc_resource) {
3190                 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
3191                         "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
3192                 return;
3193         }
3194
3195         printf("process %s[%d] crossed memory high watermark (%d MB); sending "
3196                 "EXC_RESOURCE.\n", procname, pid, max_footprint_mb);
3197
3198         code[0] = code[1] = 0;
3199         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
3200         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
3201         EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
3202         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
3203 }
3204
3205 /*
3206  * Callback invoked when a task exceeds its physical footprint limit.
3207  */
3208 void
3209 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
3210 {
3211         ledger_amount_t max_footprint_mb;
3212
3213         if (warning == LEDGER_WARNING_DIPPED_BELOW) {
3214                 /*
3215                  * Task memory limits only provide a warning on the way up.
3216                  */
3217                 return;
3218         }
3219
3220         ledger_get_limit(current_task()->ledger, task_ledgers.phys_footprint, &max_footprint_mb);
3221         max_footprint_mb >>= 20;
3222
3223         /*
3224          * If this an actual violation (not a warning),
3225          * generate a non-fatal high watermark EXC_RESOURCE.
3226          */
3227         if ((warning == 0) && (current_task()->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) {
3228                 THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE((int)max_footprint_mb);
3229         }
3230
3231         memorystatus_on_ledger_footprint_exceeded((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE,
3232                 (int)max_footprint_mb);
3233 }
3234
3235 extern int proc_check_footprint_priv(void);
3236
3237 kern_return_t
3238 task_set_phys_footprint_limit(
3239         task_t task,
3240         int new_limit_mb,
3241         int *old_limit_mb)
3242 {
3243         kern_return_t error;
3244
3245         if ((error = proc_check_footprint_priv())) {
3246                 return (KERN_NO_ACCESS);
3247         }
3248
3249         return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, FALSE);
3250 }
3251
3252 kern_return_t
3253 task_set_phys_footprint_limit_internal(
3254         task_t task,
3255         int new_limit_mb,
3256         int *old_limit_mb,
3257         boolean_t trigger_exception)
3258 {
3259         ledger_amount_t old;
3260
3261         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
3262
3263         if (old_limit_mb) {
3264                 *old_limit_mb = old >> 20;
3265         }
3266
3267         if (new_limit_mb == -1) {
3268                 /*
3269                  * Caller wishes to remove the limit.
3270                  */
3271                 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
3272                                  max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
3273                                  max_task_footprint ? PHYS_FOOTPRINT_WARNING_LEVEL : 0);
3274                 return (KERN_SUCCESS);
3275         }
3276
3277 #ifdef CONFIG_NOMONITORS
3278         return (KERN_SUCCESS);
3279 #endif /* CONFIG_NOMONITORS */
3280
3281         task_lock(task);
3282
3283         if (trigger_exception) {
3284                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
3285         } else {
3286                 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
3287         }
3288
3289         ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
3290                 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
3291
3292         task_unlock(task);
3293
3294         return (KERN_SUCCESS);
3295 }
3296
3297 kern_return_t
3298 task_get_phys_footprint_limit(
3299         task_t task,
3300         int *limit_mb)
3301 {
3302         ledger_amount_t limit;
3303
3304         ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
3305         *limit_mb = limit >> 20;
3306
3307         return (KERN_SUCCESS);
3308 }
3309 #else /* CONFIG_JETSAM */
3310 kern_return_t
3311 task_set_phys_footprint_limit(
3312         __unused task_t task,
3313         __unused int new_limit_mb,
3314         __unused int *old_limit_mb)
3315 {
3316         return (KERN_FAILURE);
3317 }
3318
3319 kern_return_t
3320 task_get_phys_footprint_limit(
3321         __unused task_t task,
3322         __unused int *limit_mb)
3323 {
3324         return (KERN_FAILURE);
3325 }
3326 #endif /* CONFIG_JETSAM */
3327
3328 /*
3329  * We need to export some functions to other components that
3330  * are currently implemented in macros within the osfmk
3331  * component.  Just export them as functions of the same name.
3332  */
3333 boolean_t is_kerneltask(task_t t)
3334 {
3335         if (t == kernel_task)
3336                 return (TRUE);
3337
3338         return (FALSE);
3339 }
3340
3341 int
3342 check_for_tasksuspend(task_t task)
3343 {
3344
3345         if (task == TASK_NULL)
3346                 return (0);
3347
3348         return (task->suspend_count > 0);
3349 }
3350
3351 #undef current_task
3352 task_t current_task(void);
3353 task_t current_task(void)
3354 {
3355         return (current_task_fast());
3356 }
3357
3358 #undef task_reference
3359 void task_reference(task_t task);
3360 void
3361 task_reference(
3362         task_t          task)
3363 {
3364         if (task != TASK_NULL)
3365                 task_reference_internal(task);
3366 }
3367
3368 /*
3369  * This routine is called always with task lock held.
3370  * And it returns a thread handle without reference as the caller
3371  * operates on it under the task lock held.
3372  */
3373 thread_t
3374 task_findtid(task_t task, uint64_t tid)
3375 {
3376         thread_t thread= THREAD_NULL;
3377
3378         queue_iterate(&task->threads, thread, thread_t, task_threads) {
3379                         if (thread->thread_id == tid)
3380                                 return(thread);
3381         }
3382         return(THREAD_NULL);
3383 }
3384
3385
3386 #if CONFIG_MACF_MACH
3387 /*
3388  * Protect 2 task labels against modification by adding a reference on
3389  * both label handles. The locks do not actually have to be held while
3390  * using the labels as only labels with one reference can be modified
3391  * in place.
3392  */
3393
3394 void
3395 tasklabel_lock2(
3396         task_t a,
3397         task_t b)
3398 {
3399         labelh_reference(a->label);
3400         labelh_reference(b->label);
3401 }
3402
3403 void
3404 tasklabel_unlock2(
3405         task_t a,
3406         task_t b)
3407 {
3408         labelh_release(a->label);
3409         labelh_release(b->label);
3410 }
3411
3412 void
3413 mac_task_label_update_internal(
3414         struct label    *pl,
3415         struct task     *task)
3416 {
3417
3418         tasklabel_lock(task);
3419         task->label = labelh_modify(task->label);
3420         mac_task_label_update(pl, &task->maclabel);
3421         tasklabel_unlock(task);
3422         ip_lock(task->itk_self);
3423         mac_port_label_update_cred(pl, &task->itk_self->ip_label);
3424         ip_unlock(task->itk_self);
3425 }
3426
3427 void
3428 mac_task_label_modify(
3429         struct task     *task,
3430         void            *arg,
3431         void (*f)       (struct label *l, void *arg))
3432 {
3433
3434         tasklabel_lock(task);
3435         task->label = labelh_modify(task->label);
3436         (*f)(&task->maclabel, arg);
3437         tasklabel_unlock(task);
3438 }
3439
3440 struct label *
3441 mac_task_get_label(struct task *task)
3442 {
3443         return (&task->maclabel);
3444 }
3445 #endif
3446
3447 /*
3448  * Control the CPU usage monitor for a task.
3449  */
3450 kern_return_t
3451 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
3452 {
3453         int error = KERN_SUCCESS;
3454
3455         if (*flags & CPUMON_MAKE_FATAL) {
3456                 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
3457         } else {
3458                 error = KERN_INVALID_ARGUMENT;
3459         }
3460
3461         return error;
3462 }
3463
3464 /*
3465  * Control the wakeups monitor for a task.
3466  */
3467 kern_return_t
3468 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
3469 {
3470         ledger_t ledger = task->ledger;
3471
3472         task_lock(task);
3473         if (*flags & WAKEMON_GET_PARAMS) {
3474                 ledger_amount_t limit;
3475                 uint64_t                period;
3476
3477                 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
3478                 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
3479
3480                 if (limit != LEDGER_LIMIT_INFINITY) {
3481                         /*
3482                          * An active limit means the wakeups monitor is enabled.
3483                          */
3484                         *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
3485                         *flags = WAKEMON_ENABLE;
3486                         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
3487                                 *flags |= WAKEMON_MAKE_FATAL;
3488                         }
3489                 } else {
3490                         *flags = WAKEMON_DISABLE;
3491                         *rate_hz = -1;
3492                 }
3493
3494                 /*
3495                  * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
3496                  */
3497                 task_unlock(task);
3498                 return KERN_SUCCESS;
3499         }
3500
3501         if (*flags & WAKEMON_ENABLE) {
3502                 if (*flags & WAKEMON_SET_DEFAULTS) {
3503                         *rate_hz = task_wakeups_monitor_rate;
3504                 }
3505
3506 #ifndef CONFIG_NOMONITORS
3507                 if (*flags & WAKEMON_MAKE_FATAL) {
3508                         task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
3509                 }
3510 #endif /* CONFIG_NOMONITORS */
3511
3512                 if (*rate_hz < 0) {
3513                         task_unlock(task);
3514                         return KERN_INVALID_ARGUMENT;
3515                 }
3516
3517 #ifndef CONFIG_NOMONITORS
3518                 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
3519                         task_wakeups_monitor_ustackshots_trigger_pct);
3520                 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
3521                 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
3522 #endif /* CONFIG_NOMONITORS */
3523         } else if (*flags & WAKEMON_DISABLE) {
3524                 /*
3525                  * Caller wishes to disable wakeups monitor on the task.
3526                  *
3527                  * Disable telemetry if it was triggered by the wakeups monitor, and
3528                  * remove the limit & callback on the wakeups ledger entry.
3529                  */
3530 #if CONFIG_TELEMETRY
3531                 telemetry_task_ctl_locked(current_task(), TF_WAKEMON_WARNING, 0);
3532 #endif
3533                 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
3534                 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
3535         }
3536
3537         task_unlock(task);
3538         return KERN_SUCCESS;
3539 }
3540
3541 void
3542 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
3543 {
3544         if (warning == LEDGER_WARNING_ROSE_ABOVE) {
3545 #if CONFIG_TELEMETRY
3546                 /*
3547                  * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
3548                  * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
3549                  */
3550                 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
3551 #endif
3552                 return;
3553         }
3554
3555 #if CONFIG_TELEMETRY
3556         /*
3557          * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
3558          * exceeded the limit, turn telemetry off for the task.
3559          */
3560         telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
3561 #endif
3562
3563         if (warning == 0) {
3564                 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE();
3565         }
3566 }
3567
3568 void __attribute__((noinline))
3569 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void)
3570 {
3571         task_t                                          task            = current_task();
3572         int                                                     pid         = 0;
3573         char                                    *procname       = (char *) "unknown";
3574         uint64_t                                        observed_wakeups_rate;
3575         uint64_t                                        permitted_wakeups_rate;
3576         uint64_t                                        observation_interval;
3577         mach_exception_data_type_t      code[EXCEPTION_CODE_MAX];
3578         struct ledger_entry_info        lei;
3579
3580 #ifdef MACH_BSD
3581         pid = proc_selfpid();
3582         if (task->bsd_info != NULL)
3583                 procname = proc_name_address(current_task()->bsd_info);
3584 #endif
3585
3586         ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
3587
3588         /*
3589          * Disable the exception notification so we don't overwhelm
3590          * the listener with an endless stream of redundant exceptions.
3591          */
3592         uint32_t flags = WAKEMON_DISABLE;
3593         task_wakeups_monitor_ctl(task, &flags, NULL);
3594
3595         observed_wakeups_rate = (lei.lei_balance * (int64_t)NSEC_PER_SEC) / lei.lei_last_refill;
3596         permitted_wakeups_rate = lei.lei_limit / task_wakeups_monitor_interval;
3597         observation_interval = lei.lei_refill_period / NSEC_PER_SEC;
3598
3599         if (disable_exc_resource) {
3600                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
3601                         "supressed by a boot-arg\n", procname, pid);
3602                 return;
3603         }
3604         if (audio_active) {
3605                 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
3606                        "supressed due to audio playback\n", procname, pid);
3607                 return;
3608         }
3609         printf("process %s[%d] caught causing excessive wakeups. Observed wakeups rate "
3610                 "(per sec): %lld; Maximum permitted wakeups rate (per sec): %lld; Observation "
3611                 "period: %lld seconds; Task lifetime number of wakeups: %lld\n",
3612                 procname, pid, observed_wakeups_rate, permitted_wakeups_rate,
3613                 observation_interval, lei.lei_credit);
3614
3615         code[0] = code[1] = 0;
3616         EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
3617         EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
3618         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0], task_wakeups_monitor_rate);
3619         EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0], observation_interval);
3620         EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1], lei.lei_balance * (int64_t)NSEC_PER_SEC / lei.lei_last_refill);
3621         exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
3622
3623         if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
3624                 task_terminate_internal(task);
3625         }
3626 }