]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/task.c
d0e982ee42a0b03d8b95cb9cbfccbe3547b87138
[apple/xnu.git] / osfmk / kern / task.c
1 /*
2 * Copyright (c) 2000-2010, 2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/task.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59 * David Black
60 *
61 * Task management primitives implementation.
62 */
63 /*
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL). All rights reserved.
66 *
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
72 *
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76 *
77 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
78 * improvements that they make and grant CSL redistribution rights.
79 *
80 */
81 /*
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections. This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
85 * Version 2.0.
86 * Copyright (c) 2005 SPARTA, Inc.
87 */
88
89 #include <mach/mach_types.h>
90 #include <mach/boolean.h>
91 #include <mach/host_priv.h>
92 #include <mach/machine/vm_types.h>
93 #include <mach/vm_param.h>
94 #include <mach/mach_vm.h>
95 #include <mach/semaphore.h>
96 #include <mach/task_info.h>
97 #include <mach/task_special_ports.h>
98
99 #include <ipc/ipc_importance.h>
100 #include <ipc/ipc_types.h>
101 #include <ipc/ipc_space.h>
102 #include <ipc/ipc_entry.h>
103 #include <ipc/ipc_hash.h>
104
105 #include <kern/kern_types.h>
106 #include <kern/mach_param.h>
107 #include <kern/misc_protos.h>
108 #include <kern/task.h>
109 #include <kern/thread.h>
110 #include <kern/coalition.h>
111 #include <kern/zalloc.h>
112 #include <kern/kalloc.h>
113 #include <kern/kern_cdata.h>
114 #include <kern/processor.h>
115 #include <kern/sched_prim.h> /* for thread_wakeup */
116 #include <kern/ipc_tt.h>
117 #include <kern/host.h>
118 #include <kern/clock.h>
119 #include <kern/timer.h>
120 #include <kern/assert.h>
121 #include <kern/sync_lock.h>
122 #include <kern/affinity.h>
123 #include <kern/exc_resource.h>
124 #include <kern/machine.h>
125 #include <corpses/task_corpse.h>
126 #if CONFIG_TELEMETRY
127 #include <kern/telemetry.h>
128 #endif
129
130 #include <vm/pmap.h>
131 #include <vm/vm_map.h>
132 #include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
133 #include <vm/vm_pageout.h>
134 #include <vm/vm_protos.h>
135 #include <vm/vm_purgeable_internal.h>
136
137 #include <sys/resource.h>
138 #include <sys/signalvar.h> /* for coredump */
139
140 /*
141 * Exported interfaces
142 */
143
144 #include <mach/task_server.h>
145 #include <mach/mach_host_server.h>
146 #include <mach/host_security_server.h>
147 #include <mach/mach_port_server.h>
148
149 #include <vm/vm_shared_region.h>
150
151 #include <libkern/OSDebug.h>
152 #include <libkern/OSAtomic.h>
153
154 #if CONFIG_ATM
155 #include <atm/atm_internal.h>
156 #endif
157
158 #include <kern/sfi.h>
159
160 #if KPERF
161 extern int kpc_force_all_ctrs(task_t, int);
162 #endif
163
164 uint32_t qos_override_mode;
165
166 task_t kernel_task;
167 zone_t task_zone;
168 lck_attr_t task_lck_attr;
169 lck_grp_t task_lck_grp;
170 lck_grp_attr_t task_lck_grp_attr;
171
172 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
173 int audio_active = 0;
174
175 zinfo_usage_store_t tasks_tkm_private;
176 zinfo_usage_store_t tasks_tkm_shared;
177
178 /* A container to accumulate statistics for expired tasks */
179 expired_task_statistics_t dead_task_statistics;
180 lck_spin_t dead_task_statistics_lock;
181
182 ledger_template_t task_ledger_template = NULL;
183
184 struct _task_ledger_indices task_ledgers __attribute__((used)) =
185 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
186 { 0 /* initialized at runtime */},
187 #ifdef CONFIG_BANK
188 -1, -1,
189 #endif
190 };
191
192 /* System sleep state */
193 boolean_t tasks_suspend_state;
194
195
196 void init_task_ledgers(void);
197 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
198 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
199 void __attribute__((noinline)) THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void);
200 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb);
201
202 kern_return_t task_suspend_internal(task_t);
203 kern_return_t task_resume_internal(task_t);
204 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
205
206
207 void proc_init_cpumon_params(void);
208 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
209
210 // Warn tasks when they hit 80% of their memory limit.
211 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
212
213 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
214 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
215
216 /*
217 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
218 *
219 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
220 * stacktraces, aka micro-stackshots)
221 */
222 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
223
224 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
225 int task_wakeups_monitor_rate; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
226
227 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
228
229 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
230
231 ledger_amount_t max_task_footprint = 0; /* Per-task limit on physical memory consumption in bytes */
232 int max_task_footprint_mb = 0; /* Per-task limit on physical memory consumption in megabytes */
233
234 #if MACH_ASSERT
235 int pmap_ledgers_panic = 1;
236 #endif /* MACH_ASSERT */
237
238 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
239
240 int hwm_user_cores = 0; /* high watermark violations generate user core files */
241
242 #ifdef MACH_BSD
243 extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
244 extern int proc_pid(struct proc *p);
245 extern int proc_selfpid(void);
246 extern char *proc_name_address(struct proc *p);
247 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
248 #if CONFIG_JETSAM
249 extern void proc_memstat_terminated(struct proc* p, boolean_t set);
250 extern void memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb);
251 #endif
252 #endif
253 #if MACH_ASSERT
254 extern int pmap_ledgers_panic;
255 #endif /* MACH_ASSERT */
256
257 /* Forwards */
258
259 void task_hold_locked(
260 task_t task);
261 void task_wait_locked(
262 task_t task,
263 boolean_t until_not_runnable);
264 void task_release_locked(
265 task_t task);
266 void task_free(
267 task_t task );
268 void task_synchronizer_destroy_all(
269 task_t task);
270
271 int check_for_tasksuspend(
272 task_t task);
273
274 void
275 task_backing_store_privileged(
276 task_t task)
277 {
278 task_lock(task);
279 task->priv_flags |= VM_BACKING_STORE_PRIV;
280 task_unlock(task);
281 return;
282 }
283
284
285 void
286 task_set_64bit(
287 task_t task,
288 boolean_t is64bit)
289 {
290 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
291 thread_t thread;
292 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
293
294 task_lock(task);
295
296 if (is64bit) {
297 if (task_has_64BitAddr(task))
298 goto out;
299 task_set_64BitAddr(task);
300 } else {
301 if ( !task_has_64BitAddr(task))
302 goto out;
303 task_clear_64BitAddr(task);
304 }
305 /* FIXME: On x86, the thread save state flavor can diverge from the
306 * task's 64-bit feature flag due to the 32-bit/64-bit register save
307 * state dichotomy. Since we can be pre-empted in this interval,
308 * certain routines may observe the thread as being in an inconsistent
309 * state with respect to its task's 64-bitness.
310 */
311
312 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
313 queue_iterate(&task->threads, thread, thread_t, task_threads) {
314 thread_mtx_lock(thread);
315 machine_thread_switch_addrmode(thread);
316 thread_mtx_unlock(thread);
317 }
318 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
319
320 out:
321 task_unlock(task);
322 }
323
324
325 void
326 task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size)
327 {
328 task_lock(task);
329 task->all_image_info_addr = addr;
330 task->all_image_info_size = size;
331 task_unlock(task);
332 }
333
334 void
335 task_atm_reset(__unused task_t task) {
336
337 #if CONFIG_ATM
338 if (task->atm_context != NULL) {
339 atm_task_descriptor_destroy(task->atm_context);
340 task->atm_context = NULL;
341 }
342 #endif
343
344 }
345
346 #if TASK_REFERENCE_LEAK_DEBUG
347 #include <kern/btlog.h>
348
349 decl_simple_lock_data(static,task_ref_lock);
350 static btlog_t *task_ref_btlog;
351 #define TASK_REF_OP_INCR 0x1
352 #define TASK_REF_OP_DECR 0x2
353
354 #define TASK_REF_BTDEPTH 7
355
356 static void
357 task_ref_lock_lock(void *context)
358 {
359 simple_lock((simple_lock_t)context);
360 }
361 static void
362 task_ref_lock_unlock(void *context)
363 {
364 simple_unlock((simple_lock_t)context);
365 }
366
367 void
368 task_reference_internal(task_t task)
369 {
370 void * bt[TASK_REF_BTDEPTH];
371 int numsaved = 0;
372
373 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
374
375 (void)hw_atomic_add(&(task)->ref_count, 1);
376 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
377 bt, numsaved);
378 }
379
380 uint32_t
381 task_deallocate_internal(task_t task)
382 {
383 void * bt[TASK_REF_BTDEPTH];
384 int numsaved = 0;
385
386 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
387
388 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
389 bt, numsaved);
390 return hw_atomic_sub(&(task)->ref_count, 1);
391 }
392
393 #endif /* TASK_REFERENCE_LEAK_DEBUG */
394
395 void
396 task_init(void)
397 {
398
399 lck_grp_attr_setdefault(&task_lck_grp_attr);
400 lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
401 lck_attr_setdefault(&task_lck_attr);
402 lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
403
404 task_zone = zinit(
405 sizeof(struct task),
406 task_max * sizeof(struct task),
407 TASK_CHUNK * sizeof(struct task),
408 "tasks");
409
410 zone_change(task_zone, Z_NOENCRYPT, TRUE);
411
412 /*
413 * Configure per-task memory limit.
414 * The boot-arg is interpreted as Megabytes,
415 * and takes precedence over the device tree.
416 * Setting the boot-arg to 0 disables task limits.
417 */
418 if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
419 sizeof (max_task_footprint_mb))) {
420 /*
421 * No limit was found in boot-args, so go look in the device tree.
422 */
423 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
424 sizeof(max_task_footprint_mb))) {
425 /*
426 * No limit was found in device tree.
427 */
428 max_task_footprint_mb = 0;
429 }
430 }
431
432 if (max_task_footprint_mb != 0) {
433 #if CONFIG_JETSAM
434 if (max_task_footprint_mb < 50) {
435 printf("Warning: max_task_pmem %d below minimum.\n",
436 max_task_footprint_mb);
437 max_task_footprint_mb = 50;
438 }
439 printf("Limiting task physical memory footprint to %d MB\n",
440 max_task_footprint_mb);
441
442 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
443 #else
444 printf("Warning: max_task_footprint specified, but jetsam not configured; ignoring.\n");
445 #endif
446 }
447
448 #if MACH_ASSERT
449 PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
450 sizeof (pmap_ledgers_panic));
451 #endif /* MACH_ASSERT */
452
453 if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
454 sizeof (hwm_user_cores))) {
455 hwm_user_cores = 0;
456 }
457
458 if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
459 printf("QOS override mode: 0x%08x\n", qos_override_mode);
460 } else {
461 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
462 }
463
464 proc_init_cpumon_params();
465
466 if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
467 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
468 }
469
470 if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
471 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
472 }
473
474 if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
475 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
476 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
477 }
478
479 if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
480 sizeof (disable_exc_resource))) {
481 disable_exc_resource = 0;
482 }
483
484 /*
485 * If we have coalitions, coalition_init() will call init_task_ledgers() as it
486 * sets up the ledgers for the default coalition. If we don't have coalitions,
487 * then we have to call it now.
488 */
489 #if CONFIG_COALITIONS
490 assert(task_ledger_template);
491 #else /* CONFIG_COALITIONS */
492 init_task_ledgers();
493 #endif /* CONFIG_COALITIONS */
494
495 #if TASK_REFERENCE_LEAK_DEBUG
496 simple_lock_init(&task_ref_lock, 0);
497 task_ref_btlog = btlog_create(100000,
498 TASK_REF_BTDEPTH,
499 task_ref_lock_lock,
500 task_ref_lock_unlock,
501 &task_ref_lock);
502 assert(task_ref_btlog);
503 #endif
504
505 /*
506 * Create the kernel task as the first task.
507 */
508 #ifdef __LP64__
509 if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS)
510 #else
511 if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS)
512 #endif
513 panic("task_init\n");
514
515 vm_map_deallocate(kernel_task->map);
516 kernel_task->map = kernel_map;
517 lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
518
519 }
520
521 /*
522 * Create a task running in the kernel address space. It may
523 * have its own map of size mem_size and may have ipc privileges.
524 */
525 kern_return_t
526 kernel_task_create(
527 __unused task_t parent_task,
528 __unused vm_offset_t map_base,
529 __unused vm_size_t map_size,
530 __unused task_t *child_task)
531 {
532 return (KERN_INVALID_ARGUMENT);
533 }
534
535 kern_return_t
536 task_create(
537 task_t parent_task,
538 __unused ledger_port_array_t ledger_ports,
539 __unused mach_msg_type_number_t num_ledger_ports,
540 __unused boolean_t inherit_memory,
541 __unused task_t *child_task) /* OUT */
542 {
543 if (parent_task == TASK_NULL)
544 return(KERN_INVALID_ARGUMENT);
545
546 /*
547 * No longer supported: too many calls assume that a task has a valid
548 * process attached.
549 */
550 return(KERN_FAILURE);
551 }
552
553 kern_return_t
554 host_security_create_task_token(
555 host_security_t host_security,
556 task_t parent_task,
557 __unused security_token_t sec_token,
558 __unused audit_token_t audit_token,
559 __unused host_priv_t host_priv,
560 __unused ledger_port_array_t ledger_ports,
561 __unused mach_msg_type_number_t num_ledger_ports,
562 __unused boolean_t inherit_memory,
563 __unused task_t *child_task) /* OUT */
564 {
565 if (parent_task == TASK_NULL)
566 return(KERN_INVALID_ARGUMENT);
567
568 if (host_security == HOST_NULL)
569 return(KERN_INVALID_SECURITY);
570
571 /*
572 * No longer supported.
573 */
574 return(KERN_FAILURE);
575 }
576
577 /*
578 * Task ledgers
579 * ------------
580 *
581 * phys_footprint
582 * Physical footprint: This is the sum of:
583 * + (internal - alternate_accounting)
584 * + (internal_compressed - alternate_accounting_compressed)
585 * + iokit_mapped
586 * + purgeable_nonvolatile
587 * + purgeable_nonvolatile_compressed
588 *
589 * internal
590 * The task's anonymous memory, which on iOS is always resident.
591 *
592 * internal_compressed
593 * Amount of this task's internal memory which is held by the compressor.
594 * Such memory is no longer actually resident for the task [i.e., resident in its pmap],
595 * and could be either decompressed back into memory, or paged out to storage, depending
596 * on our implementation.
597 *
598 * iokit_mapped
599 * IOKit mappings: The total size of all IOKit mappings in this task, regardless of
600 clean/dirty or internal/external state].
601 *
602 * alternate_accounting
603 * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
604 * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
605 * double counting.
606 */
607 void
608 init_task_ledgers(void)
609 {
610 ledger_template_t t;
611
612 assert(task_ledger_template == NULL);
613 assert(kernel_task == TASK_NULL);
614
615 if ((t = ledger_template_create("Per-task ledger")) == NULL)
616 panic("couldn't create task ledger template");
617
618 task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
619 task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
620 "physmem", "bytes");
621 task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
622 "bytes");
623 task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
624 "bytes");
625 task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
626 "bytes");
627 task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
628 "bytes");
629 task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
630 "bytes");
631 task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
632 "bytes");
633 task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
634 "bytes");
635 task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
636 "bytes");
637 task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
638 "bytes");
639 task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
640 task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
641 task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
642 task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
643 task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
644 "count");
645 task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
646 "count");
647
648 #if CONFIG_SCHED_SFI
649 sfi_class_id_t class_id, ledger_alias;
650 for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
651 task_ledgers.sfi_wait_times[class_id] = -1;
652 }
653
654 /* don't account for UNSPECIFIED */
655 for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
656 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
657 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
658 /* Check to see if alias has been registered yet */
659 if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
660 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
661 } else {
662 /* Otherwise, initialize it first */
663 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
664 }
665 } else {
666 task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
667 }
668
669 if (task_ledgers.sfi_wait_times[class_id] < 0) {
670 panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
671 }
672 }
673
674 assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
675 #endif /* CONFIG_SCHED_SFI */
676
677 #ifdef CONFIG_BANK
678 task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
679 task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
680 #endif
681 if ((task_ledgers.cpu_time < 0) ||
682 (task_ledgers.tkm_private < 0) ||
683 (task_ledgers.tkm_shared < 0) ||
684 (task_ledgers.phys_mem < 0) ||
685 (task_ledgers.wired_mem < 0) ||
686 (task_ledgers.internal < 0) ||
687 (task_ledgers.iokit_mapped < 0) ||
688 (task_ledgers.alternate_accounting < 0) ||
689 (task_ledgers.alternate_accounting_compressed < 0) ||
690 (task_ledgers.phys_footprint < 0) ||
691 (task_ledgers.internal_compressed < 0) ||
692 (task_ledgers.purgeable_volatile < 0) ||
693 (task_ledgers.purgeable_nonvolatile < 0) ||
694 (task_ledgers.purgeable_volatile_compressed < 0) ||
695 (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
696 (task_ledgers.platform_idle_wakeups < 0) ||
697 (task_ledgers.interrupt_wakeups < 0)
698 #ifdef CONFIG_BANK
699 || (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0)
700 #endif
701 ) {
702 panic("couldn't create entries for task ledger template");
703 }
704
705 ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
706 #if MACH_ASSERT
707 if (pmap_ledgers_panic) {
708 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
709 ledger_panic_on_negative(t, task_ledgers.internal);
710 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
711 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
712 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
713 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
714 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
715 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
716 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
717 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
718 }
719 #endif /* MACH_ASSERT */
720
721 #if CONFIG_JETSAM
722 ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
723 #endif
724
725 ledger_set_callback(t, task_ledgers.interrupt_wakeups,
726 task_wakeups_rate_exceeded, NULL, NULL);
727
728 task_ledger_template = t;
729 }
730
731 kern_return_t
732 task_create_internal(
733 task_t parent_task,
734 coalition_t *parent_coalitions __unused,
735 boolean_t inherit_memory,
736 boolean_t is_64bit,
737 task_t *child_task) /* OUT */
738 {
739 task_t new_task;
740 vm_shared_region_t shared_region;
741 ledger_t ledger = NULL;
742
743 new_task = (task_t) zalloc(task_zone);
744
745 if (new_task == TASK_NULL)
746 return(KERN_RESOURCE_SHORTAGE);
747
748 /* one ref for just being alive; one for our caller */
749 new_task->ref_count = 2;
750
751 /* allocate with active entries */
752 assert(task_ledger_template != NULL);
753 if ((ledger = ledger_instantiate(task_ledger_template,
754 LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
755 zfree(task_zone, new_task);
756 return(KERN_RESOURCE_SHORTAGE);
757 }
758
759 new_task->ledger = ledger;
760
761 #if defined(CONFIG_SCHED_MULTIQ)
762 new_task->sched_group = sched_group_create();
763 #endif
764
765 /* if inherit_memory is true, parent_task MUST not be NULL */
766 if (inherit_memory)
767 new_task->map = vm_map_fork(ledger, parent_task->map);
768 else
769 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
770 (vm_map_offset_t)(VM_MIN_ADDRESS),
771 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
772
773 /* Inherit memlock limit from parent */
774 if (parent_task)
775 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
776
777 lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
778 queue_init(&new_task->threads);
779 new_task->suspend_count = 0;
780 new_task->thread_count = 0;
781 new_task->active_thread_count = 0;
782 new_task->user_stop_count = 0;
783 new_task->legacy_stop_count = 0;
784 new_task->active = TRUE;
785 new_task->halting = FALSE;
786 new_task->user_data = NULL;
787 new_task->faults = 0;
788 new_task->cow_faults = 0;
789 new_task->pageins = 0;
790 new_task->messages_sent = 0;
791 new_task->messages_received = 0;
792 new_task->syscalls_mach = 0;
793 new_task->priv_flags = 0;
794 new_task->syscalls_unix=0;
795 new_task->c_switch = new_task->p_switch = new_task->ps_switch = 0;
796 new_task->t_flags = 0;
797 new_task->importance = 0;
798
799 #if CONFIG_ATM
800 new_task->atm_context = NULL;
801 #endif
802 #if CONFIG_BANK
803 new_task->bank_context = NULL;
804 #endif
805
806 zinfo_task_init(new_task);
807
808 #ifdef MACH_BSD
809 new_task->bsd_info = NULL;
810 new_task->corpse_info = NULL;
811 #endif /* MACH_BSD */
812
813 #if CONFIG_JETSAM
814 if (max_task_footprint != 0) {
815 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
816 }
817 #endif
818
819 if (task_wakeups_monitor_rate != 0) {
820 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
821 int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
822 task_wakeups_monitor_ctl(new_task, &flags, &rate);
823 }
824
825 #if defined(__i386__) || defined(__x86_64__)
826 new_task->i386_ldt = 0;
827 #endif
828
829 new_task->task_debug = NULL;
830
831 queue_init(&new_task->semaphore_list);
832 new_task->semaphores_owned = 0;
833
834 ipc_task_init(new_task, parent_task);
835
836 new_task->total_user_time = 0;
837 new_task->total_system_time = 0;
838
839 new_task->vtimers = 0;
840
841 new_task->shared_region = NULL;
842
843 new_task->affinity_space = NULL;
844
845 new_task->pidsuspended = FALSE;
846 new_task->frozen = FALSE;
847 new_task->changing_freeze_state = FALSE;
848 new_task->rusage_cpu_flags = 0;
849 new_task->rusage_cpu_percentage = 0;
850 new_task->rusage_cpu_interval = 0;
851 new_task->rusage_cpu_deadline = 0;
852 new_task->rusage_cpu_callt = NULL;
853 #if MACH_ASSERT
854 new_task->suspends_outstanding = 0;
855 #endif
856
857 #if HYPERVISOR
858 new_task->hv_task_target = NULL;
859 #endif /* HYPERVISOR */
860
861
862 new_task->low_mem_notified_warn = 0;
863 new_task->low_mem_notified_critical = 0;
864 new_task->low_mem_privileged_listener = 0;
865 new_task->purged_memory_warn = 0;
866 new_task->purged_memory_critical = 0;
867 new_task->mem_notify_reserved = 0;
868 #if IMPORTANCE_INHERITANCE
869 new_task->task_imp_base = NULL;
870 #endif /* IMPORTANCE_INHERITANCE */
871
872 #if defined(__x86_64__)
873 new_task->uexc_range_start = new_task->uexc_range_size = new_task->uexc_handler = 0;
874 #endif
875
876 new_task->requested_policy = default_task_requested_policy;
877 new_task->effective_policy = default_task_effective_policy;
878 new_task->pended_policy = default_task_pended_policy;
879
880 if (parent_task != TASK_NULL) {
881 new_task->sec_token = parent_task->sec_token;
882 new_task->audit_token = parent_task->audit_token;
883
884 /* inherit the parent's shared region */
885 shared_region = vm_shared_region_get(parent_task);
886 vm_shared_region_set(new_task, shared_region);
887
888 if(task_has_64BitAddr(parent_task))
889 task_set_64BitAddr(new_task);
890 new_task->all_image_info_addr = parent_task->all_image_info_addr;
891 new_task->all_image_info_size = parent_task->all_image_info_size;
892
893 #if defined(__i386__) || defined(__x86_64__)
894 if (inherit_memory && parent_task->i386_ldt)
895 new_task->i386_ldt = user_ldt_copy(parent_task->i386_ldt);
896 #endif
897 if (inherit_memory && parent_task->affinity_space)
898 task_affinity_create(parent_task, new_task);
899
900 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
901
902 #if IMPORTANCE_INHERITANCE
903 ipc_importance_task_t new_task_imp = IIT_NULL;
904
905 if (task_is_marked_importance_donor(parent_task)) {
906 new_task_imp = ipc_importance_for_task(new_task, FALSE);
907 assert(IIT_NULL != new_task_imp);
908 ipc_importance_task_mark_donor(new_task_imp, TRUE);
909 }
910 /* Embedded doesn't want this to inherit */
911 if (task_is_marked_importance_receiver(parent_task)) {
912 if (IIT_NULL == new_task_imp)
913 new_task_imp = ipc_importance_for_task(new_task, FALSE);
914 assert(IIT_NULL != new_task_imp);
915 ipc_importance_task_mark_receiver(new_task_imp, TRUE);
916 }
917 if (task_is_marked_importance_denap_receiver(parent_task)) {
918 if (IIT_NULL == new_task_imp)
919 new_task_imp = ipc_importance_for_task(new_task, FALSE);
920 assert(IIT_NULL != new_task_imp);
921 ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
922 }
923
924 if (IIT_NULL != new_task_imp) {
925 assert(new_task->task_imp_base == new_task_imp);
926 ipc_importance_task_release(new_task_imp);
927 }
928 #endif /* IMPORTANCE_INHERITANCE */
929
930 new_task->priority = BASEPRI_DEFAULT;
931 new_task->max_priority = MAXPRI_USER;
932
933 new_task->requested_policy.t_apptype = parent_task->requested_policy.t_apptype;
934
935 new_task->requested_policy.int_darwinbg = parent_task->requested_policy.int_darwinbg;
936 new_task->requested_policy.ext_darwinbg = parent_task->requested_policy.ext_darwinbg;
937 new_task->requested_policy.int_iotier = parent_task->requested_policy.int_iotier;
938 new_task->requested_policy.ext_iotier = parent_task->requested_policy.ext_iotier;
939 new_task->requested_policy.int_iopassive = parent_task->requested_policy.int_iopassive;
940 new_task->requested_policy.ext_iopassive = parent_task->requested_policy.ext_iopassive;
941 new_task->requested_policy.bg_iotier = parent_task->requested_policy.bg_iotier;
942 new_task->requested_policy.terminated = parent_task->requested_policy.terminated;
943 new_task->requested_policy.t_qos_clamp = parent_task->requested_policy.t_qos_clamp;
944
945 task_policy_create(new_task, parent_task->requested_policy.t_boosted);
946 } else {
947 new_task->sec_token = KERNEL_SECURITY_TOKEN;
948 new_task->audit_token = KERNEL_AUDIT_TOKEN;
949 #ifdef __LP64__
950 if(is_64bit)
951 task_set_64BitAddr(new_task);
952 #endif
953 new_task->all_image_info_addr = (mach_vm_address_t)0;
954 new_task->all_image_info_size = (mach_vm_size_t)0;
955
956 new_task->pset_hint = PROCESSOR_SET_NULL;
957
958 if (kernel_task == TASK_NULL) {
959 new_task->priority = BASEPRI_KERNEL;
960 new_task->max_priority = MAXPRI_KERNEL;
961 } else {
962 new_task->priority = BASEPRI_DEFAULT;
963 new_task->max_priority = MAXPRI_USER;
964 }
965 }
966
967 bzero(new_task->coalition, sizeof(new_task->coalition));
968 for (int i = 0; i < COALITION_NUM_TYPES; i++)
969 queue_chain_init(new_task->task_coalition[i]);
970
971 /* Allocate I/O Statistics */
972 new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
973 assert(new_task->task_io_stats != NULL);
974 bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
975 new_task->task_immediate_writes = 0;
976 new_task->task_deferred_writes = 0;
977 new_task->task_invalidated_writes = 0;
978 new_task->task_metadata_writes = 0;
979
980 bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats));
981
982 bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
983 new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0;
984 new_task->task_gpu_ns = 0;
985
986 #if CONFIG_COALITIONS
987
988 /* TODO: there is no graceful failure path here... */
989 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
990 coalitions_adopt_task(parent_coalitions, new_task);
991 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
992 /*
993 * all tasks at least have a resource coalition, so
994 * if the parent has one then inherit all coalitions
995 * the parent is a part of
996 */
997 coalitions_adopt_task(parent_task->coalition, new_task);
998 } else {
999 /* TODO: assert that new_task will be PID 1 (launchd) */
1000 coalitions_adopt_init_task(new_task);
1001 }
1002
1003 if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1004 panic("created task is not a member of a resource coalition");
1005 }
1006 #endif /* CONFIG_COALITIONS */
1007
1008 new_task->dispatchqueue_offset = 0;
1009 if (parent_task != NULL) {
1010 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1011 }
1012
1013 if (vm_backing_store_low && parent_task != NULL)
1014 new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
1015
1016 new_task->task_volatile_objects = 0;
1017 new_task->task_nonvolatile_objects = 0;
1018 new_task->task_purgeable_disowning = FALSE;
1019 new_task->task_purgeable_disowned = FALSE;
1020
1021 ipc_task_enable(new_task);
1022
1023 lck_mtx_lock(&tasks_threads_lock);
1024 queue_enter(&tasks, new_task, task_t, tasks);
1025 tasks_count++;
1026 if (tasks_suspend_state) {
1027 task_suspend_internal(new_task);
1028 }
1029 lck_mtx_unlock(&tasks_threads_lock);
1030
1031 *child_task = new_task;
1032 return(KERN_SUCCESS);
1033 }
1034
1035 int task_dropped_imp_count = 0;
1036
1037 /*
1038 * task_deallocate:
1039 *
1040 * Drop a reference on a task.
1041 */
1042 void
1043 task_deallocate(
1044 task_t task)
1045 {
1046 ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1047 uint32_t refs;
1048
1049 if (task == TASK_NULL)
1050 return;
1051
1052 refs = task_deallocate_internal(task);
1053
1054 #if IMPORTANCE_INHERITANCE
1055 if (refs > 1)
1056 return;
1057
1058 if (refs == 1) {
1059 /*
1060 * If last ref potentially comes from the task's importance,
1061 * disconnect it. But more task refs may be added before
1062 * that completes, so wait for the reference to go to zero
1063 * naturually (it may happen on a recursive task_deallocate()
1064 * from the ipc_importance_disconnect_task() call).
1065 */
1066 if (IIT_NULL != task->task_imp_base)
1067 ipc_importance_disconnect_task(task);
1068 return;
1069 }
1070 #else
1071 if (refs > 0)
1072 return;
1073 #endif /* IMPORTANCE_INHERITANCE */
1074
1075 lck_mtx_lock(&tasks_threads_lock);
1076 queue_remove(&terminated_tasks, task, task_t, tasks);
1077 terminated_tasks_count--;
1078 lck_mtx_unlock(&tasks_threads_lock);
1079
1080 /*
1081 * remove the reference on atm descriptor
1082 */
1083 task_atm_reset(task);
1084
1085 #if CONFIG_BANK
1086 /*
1087 * remove the reference on bank context
1088 */
1089 if (task->bank_context != NULL) {
1090 bank_task_destroy(task->bank_context);
1091 task->bank_context = NULL;
1092 }
1093 #endif
1094
1095 if (task->task_io_stats)
1096 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1097
1098 /*
1099 * Give the machine dependent code a chance
1100 * to perform cleanup before ripping apart
1101 * the task.
1102 */
1103 machine_task_terminate(task);
1104
1105 ipc_task_terminate(task);
1106
1107 if (task->affinity_space)
1108 task_affinity_deallocate(task);
1109
1110 #if MACH_ASSERT
1111 if (task->ledger != NULL &&
1112 task->map != NULL &&
1113 task->map->pmap != NULL &&
1114 task->map->pmap->ledger != NULL) {
1115 assert(task->ledger == task->map->pmap->ledger);
1116 }
1117 #endif /* MACH_ASSERT */
1118
1119 vm_purgeable_disown(task);
1120 assert(task->task_purgeable_disowned);
1121 if (task->task_volatile_objects != 0 ||
1122 task->task_nonvolatile_objects != 0) {
1123 panic("task_deallocate(%p): "
1124 "volatile_objects=%d nonvolatile_objects=%d\n",
1125 task,
1126 task->task_volatile_objects,
1127 task->task_nonvolatile_objects);
1128 }
1129
1130 vm_map_deallocate(task->map);
1131 is_release(task->itk_space);
1132
1133 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1134 &interrupt_wakeups, &debit);
1135 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1136 &platform_idle_wakeups, &debit);
1137
1138 #if defined(CONFIG_SCHED_MULTIQ)
1139 sched_group_destroy(task->sched_group);
1140 #endif
1141
1142 /* Accumulate statistics for dead tasks */
1143 lck_spin_lock(&dead_task_statistics_lock);
1144 dead_task_statistics.total_user_time += task->total_user_time;
1145 dead_task_statistics.total_system_time += task->total_system_time;
1146
1147 dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1148 dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1149
1150 dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1151 dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1152
1153 lck_spin_unlock(&dead_task_statistics_lock);
1154 lck_mtx_destroy(&task->lock, &task_lck_grp);
1155
1156 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1157 &debit)) {
1158 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1159 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1160 }
1161 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1162 &debit)) {
1163 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1164 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1165 }
1166 ledger_dereference(task->ledger);
1167 zinfo_task_free(task);
1168
1169 #if TASK_REFERENCE_LEAK_DEBUG
1170 btlog_remove_entries_for_element(task_ref_btlog, task);
1171 #endif
1172
1173 #if CONFIG_COALITIONS
1174 if (!task->coalition[COALITION_TYPE_RESOURCE])
1175 panic("deallocating task was not a member of a resource coalition");
1176 task_release_coalitions(task);
1177 #endif /* CONFIG_COALITIONS */
1178
1179 bzero(task->coalition, sizeof(task->coalition));
1180
1181 #if MACH_BSD
1182 /* clean up collected information since last reference to task is gone */
1183 if (task->corpse_info) {
1184 task_crashinfo_destroy(task->corpse_info);
1185 task->corpse_info = NULL;
1186 }
1187 #endif
1188
1189 zfree(task_zone, task);
1190 }
1191
1192 /*
1193 * task_name_deallocate:
1194 *
1195 * Drop a reference on a task name.
1196 */
1197 void
1198 task_name_deallocate(
1199 task_name_t task_name)
1200 {
1201 return(task_deallocate((task_t)task_name));
1202 }
1203
1204 /*
1205 * task_suspension_token_deallocate:
1206 *
1207 * Drop a reference on a task suspension token.
1208 */
1209 void
1210 task_suspension_token_deallocate(
1211 task_suspension_token_t token)
1212 {
1213 return(task_deallocate((task_t)token));
1214 }
1215
1216
1217 /*
1218 * task_collect_crash_info:
1219 *
1220 * collect crash info from bsd and mach based data
1221 */
1222 kern_return_t
1223 task_collect_crash_info(task_t task)
1224 {
1225 kern_return_t kr = KERN_SUCCESS;
1226
1227 kcdata_descriptor_t crash_data = NULL;
1228 kcdata_descriptor_t crash_data_release = NULL;
1229 mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1230 mach_vm_offset_t crash_data_user_ptr = 0;
1231
1232 if (!corpses_enabled()) {
1233 return KERN_NOT_SUPPORTED;
1234 }
1235
1236 task_lock(task);
1237 assert(task->bsd_info != NULL);
1238 if (task->corpse_info == NULL && task->bsd_info != NULL) {
1239 task_unlock(task);
1240 /* map crash data memory in task's vm map */
1241 kr = mach_vm_allocate(task->map, &crash_data_user_ptr, size, (VM_MAKE_TAG(VM_MEMORY_CORPSEINFO) | VM_FLAGS_ANYWHERE));
1242
1243 if (kr != KERN_SUCCESS)
1244 goto out_no_lock;
1245
1246 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_user_ptr, size);
1247 if (crash_data) {
1248 task_lock(task);
1249 crash_data_release = task->corpse_info;
1250 task->corpse_info = crash_data;
1251 task_unlock(task);
1252 kr = KERN_SUCCESS;
1253 } else {
1254 /* if failed to create corpse info, free the mapping */
1255 if (KERN_SUCCESS != mach_vm_deallocate(task->map, crash_data_user_ptr, size)) {
1256 printf("mach_vm_deallocate failed to clear corpse_data for pid %d.\n", task_pid(task));
1257 }
1258 kr = KERN_FAILURE;
1259 }
1260
1261 if (crash_data_release != NULL) {
1262 task_crashinfo_destroy(crash_data_release);
1263 }
1264 } else {
1265 task_unlock(task);
1266 }
1267
1268 out_no_lock:
1269 return kr;
1270 }
1271
1272 /*
1273 * task_deliver_crash_notification:
1274 *
1275 * Makes outcall to registered host port for a corpse.
1276 */
1277 kern_return_t
1278 task_deliver_crash_notification(task_t task)
1279 {
1280 kcdata_descriptor_t crash_info = task->corpse_info;
1281 thread_t th_iter = NULL;
1282 kern_return_t kr = KERN_SUCCESS;
1283 wait_interrupt_t wsave;
1284 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1285
1286 if (crash_info == NULL)
1287 return KERN_FAILURE;
1288
1289 code[0] = crash_info->kcd_addr_begin;
1290 code[1] = crash_info->kcd_length;
1291
1292 task_lock(task);
1293 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1294 {
1295 ipc_thread_reset(th_iter);
1296 }
1297 task_unlock(task);
1298
1299 wsave = thread_interrupt_level(THREAD_UNINT);
1300 kr = exception_triage(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX);
1301 if (kr != KERN_SUCCESS) {
1302 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
1303 }
1304
1305 /*
1306 * crash reporting is done. Now release threads
1307 * for reaping by thread_terminate_daemon
1308 */
1309 task_lock(task);
1310 assert(task->active_thread_count == 0);
1311 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1312 {
1313 thread_mtx_lock(th_iter);
1314 assert(th_iter->inspection == TRUE);
1315 th_iter->inspection = FALSE;
1316 /* now that the corpse has been autopsied, dispose of the thread name */
1317 uthread_cleanup_name(th_iter->uthread);
1318 thread_mtx_unlock(th_iter);
1319 }
1320
1321 thread_terminate_crashed_threads();
1322 /* remove the pending corpse report flag */
1323 task_clear_corpse_pending_report(task);
1324
1325 task_unlock(task);
1326
1327 (void)thread_interrupt_level(wsave);
1328 task_terminate_internal(task);
1329
1330 return kr;
1331 }
1332
1333 /*
1334 * task_terminate:
1335 *
1336 * Terminate the specified task. See comments on thread_terminate
1337 * (kern/thread.c) about problems with terminating the "current task."
1338 */
1339
1340 kern_return_t
1341 task_terminate(
1342 task_t task)
1343 {
1344 if (task == TASK_NULL)
1345 return (KERN_INVALID_ARGUMENT);
1346
1347 if (task->bsd_info)
1348 return (KERN_FAILURE);
1349
1350 return (task_terminate_internal(task));
1351 }
1352
1353 #if MACH_ASSERT
1354 extern int proc_pid(struct proc *);
1355 extern void proc_name_kdp(task_t t, char *buf, int size);
1356 #endif /* MACH_ASSERT */
1357
1358 #define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
1359 static void
1360 __unused task_partial_reap(task_t task, __unused int pid)
1361 {
1362 unsigned int reclaimed_resident = 0;
1363 unsigned int reclaimed_compressed = 0;
1364 uint64_t task_page_count;
1365
1366 task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1367
1368 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
1369 pid, task_page_count, 0, 0, 0);
1370
1371 vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1372
1373 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
1374 pid, reclaimed_resident, reclaimed_compressed, 0, 0);
1375 }
1376
1377 kern_return_t
1378 task_mark_corpse(task_t task)
1379 {
1380 kern_return_t kr = KERN_SUCCESS;
1381 thread_t self_thread;
1382 (void) self_thread;
1383 wait_interrupt_t wsave;
1384
1385 assert(task != kernel_task);
1386 assert(task == current_task());
1387 assert(!task_is_a_corpse(task));
1388
1389 kr = task_collect_crash_info(task);
1390 if (kr != KERN_SUCCESS) {
1391 return kr;
1392 }
1393
1394 self_thread = current_thread();
1395
1396 wsave = thread_interrupt_level(THREAD_UNINT);
1397 task_lock(task);
1398
1399 task_set_corpse_pending_report(task);
1400 task_set_corpse(task);
1401
1402 kr = task_start_halt_locked(task, TRUE);
1403 assert(kr == KERN_SUCCESS);
1404 ipc_task_reset(task);
1405 ipc_task_enable(task);
1406
1407 task_unlock(task);
1408 /* terminate the ipc space */
1409 ipc_space_terminate(task->itk_space);
1410
1411 task_start_halt(task);
1412 thread_terminate_internal(self_thread);
1413 (void) thread_interrupt_level(wsave);
1414 assert(task->halting == TRUE);
1415 return kr;
1416 }
1417
1418 kern_return_t
1419 task_terminate_internal(
1420 task_t task)
1421 {
1422 thread_t thread, self;
1423 task_t self_task;
1424 boolean_t interrupt_save;
1425 int pid = 0;
1426
1427 assert(task != kernel_task);
1428
1429 self = current_thread();
1430 self_task = self->task;
1431
1432 /*
1433 * Get the task locked and make sure that we are not racing
1434 * with someone else trying to terminate us.
1435 */
1436 if (task == self_task)
1437 task_lock(task);
1438 else
1439 if (task < self_task) {
1440 task_lock(task);
1441 task_lock(self_task);
1442 }
1443 else {
1444 task_lock(self_task);
1445 task_lock(task);
1446 }
1447
1448 if (!task->active) {
1449 /*
1450 * Task is already being terminated.
1451 * Just return an error. If we are dying, this will
1452 * just get us to our AST special handler and that
1453 * will get us to finalize the termination of ourselves.
1454 */
1455 task_unlock(task);
1456 if (self_task != task)
1457 task_unlock(self_task);
1458
1459 return (KERN_FAILURE);
1460 }
1461
1462 if (task_corpse_pending_report(task)) {
1463 /*
1464 * Task is marked for reporting as corpse.
1465 * Just return an error. This will
1466 * just get us to our AST special handler and that
1467 * will get us to finish the path to death
1468 */
1469 task_unlock(task);
1470 if (self_task != task)
1471 task_unlock(self_task);
1472
1473 return (KERN_FAILURE);
1474 }
1475
1476 if (self_task != task)
1477 task_unlock(self_task);
1478
1479 /*
1480 * Make sure the current thread does not get aborted out of
1481 * the waits inside these operations.
1482 */
1483 interrupt_save = thread_interrupt_level(THREAD_UNINT);
1484
1485 /*
1486 * Indicate that we want all the threads to stop executing
1487 * at user space by holding the task (we would have held
1488 * each thread independently in thread_terminate_internal -
1489 * but this way we may be more likely to already find it
1490 * held there). Mark the task inactive, and prevent
1491 * further task operations via the task port.
1492 */
1493 task_hold_locked(task);
1494 task->active = FALSE;
1495 ipc_task_disable(task);
1496
1497 #if CONFIG_TELEMETRY
1498 /*
1499 * Notify telemetry that this task is going away.
1500 */
1501 telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
1502 #endif
1503
1504 /*
1505 * Terminate each thread in the task.
1506 */
1507 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1508 thread_terminate_internal(thread);
1509 }
1510
1511 #ifdef MACH_BSD
1512 if (task->bsd_info != NULL) {
1513 pid = proc_pid(task->bsd_info);
1514 }
1515 #endif /* MACH_BSD */
1516
1517 task_unlock(task);
1518
1519 proc_set_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE,
1520 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
1521
1522 /* Early object reap phase */
1523
1524 // PR-17045188: Revisit implementation
1525 // task_partial_reap(task, pid);
1526
1527
1528 /*
1529 * Destroy all synchronizers owned by the task.
1530 */
1531 task_synchronizer_destroy_all(task);
1532
1533 /*
1534 * Destroy the IPC space, leaving just a reference for it.
1535 */
1536 ipc_space_terminate(task->itk_space);
1537
1538 #if 00
1539 /* if some ledgers go negative on tear-down again... */
1540 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1541 task_ledgers.phys_footprint);
1542 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1543 task_ledgers.internal);
1544 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1545 task_ledgers.internal_compressed);
1546 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1547 task_ledgers.iokit_mapped);
1548 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1549 task_ledgers.alternate_accounting);
1550 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1551 task_ledgers.alternate_accounting_compressed);
1552 #endif
1553
1554 /*
1555 * If the current thread is a member of the task
1556 * being terminated, then the last reference to
1557 * the task will not be dropped until the thread
1558 * is finally reaped. To avoid incurring the
1559 * expense of removing the address space regions
1560 * at reap time, we do it explictly here.
1561 */
1562
1563 vm_map_lock(task->map);
1564 vm_map_disable_hole_optimization(task->map);
1565 vm_map_unlock(task->map);
1566
1567 vm_map_remove(task->map,
1568 task->map->min_offset,
1569 task->map->max_offset,
1570 /* no unnesting on final cleanup: */
1571 VM_MAP_REMOVE_NO_UNNESTING);
1572
1573 /* release our shared region */
1574 vm_shared_region_set(task, NULL);
1575
1576
1577 #if MACH_ASSERT
1578 /*
1579 * Identify the pmap's process, in case the pmap ledgers drift
1580 * and we have to report it.
1581 */
1582 char procname[17];
1583 if (task->bsd_info) {
1584 pid = proc_pid(task->bsd_info);
1585 proc_name_kdp(task, procname, sizeof (procname));
1586 } else {
1587 pid = 0;
1588 strlcpy(procname, "<unknown>", sizeof (procname));
1589 }
1590 pmap_set_process(task->map->pmap, pid, procname);
1591 #endif /* MACH_ASSERT */
1592
1593 lck_mtx_lock(&tasks_threads_lock);
1594 queue_remove(&tasks, task, task_t, tasks);
1595 queue_enter(&terminated_tasks, task, task_t, tasks);
1596 tasks_count--;
1597 terminated_tasks_count++;
1598 lck_mtx_unlock(&tasks_threads_lock);
1599
1600 /*
1601 * We no longer need to guard against being aborted, so restore
1602 * the previous interruptible state.
1603 */
1604 thread_interrupt_level(interrupt_save);
1605
1606 #if KPERF
1607 /* force the task to release all ctrs */
1608 if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS)
1609 kpc_force_all_ctrs(task, 0);
1610 #endif
1611
1612 #if CONFIG_COALITIONS
1613 /*
1614 * Leave our coalitions. (drop activation but not reference)
1615 */
1616 coalitions_remove_task(task);
1617 #endif
1618
1619 /*
1620 * Get rid of the task active reference on itself.
1621 */
1622 task_deallocate(task);
1623
1624 return (KERN_SUCCESS);
1625 }
1626
1627 void
1628 tasks_system_suspend(boolean_t suspend)
1629 {
1630 task_t task;
1631
1632 lck_mtx_lock(&tasks_threads_lock);
1633 assert(tasks_suspend_state != suspend);
1634 tasks_suspend_state = suspend;
1635 queue_iterate(&tasks, task, task_t, tasks) {
1636 if (task == kernel_task) {
1637 continue;
1638 }
1639 suspend ? task_suspend_internal(task) : task_resume_internal(task);
1640 }
1641 lck_mtx_unlock(&tasks_threads_lock);
1642 }
1643
1644 /*
1645 * task_start_halt:
1646 *
1647 * Shut the current task down (except for the current thread) in
1648 * preparation for dramatic changes to the task (probably exec).
1649 * We hold the task and mark all other threads in the task for
1650 * termination.
1651 */
1652 kern_return_t
1653 task_start_halt(task_t task)
1654 {
1655 kern_return_t kr = KERN_SUCCESS;
1656 task_lock(task);
1657 kr = task_start_halt_locked(task, FALSE);
1658 task_unlock(task);
1659 return kr;
1660 }
1661
1662 static kern_return_t
1663 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
1664 {
1665 thread_t thread, self;
1666 uint64_t dispatchqueue_offset;
1667
1668 assert(task != kernel_task);
1669
1670 self = current_thread();
1671
1672 if (task != self->task)
1673 return (KERN_INVALID_ARGUMENT);
1674
1675 if (task->halting || !task->active || !self->active) {
1676 /*
1677 * Task or current thread is already being terminated.
1678 * Hurry up and return out of the current kernel context
1679 * so that we run our AST special handler to terminate
1680 * ourselves.
1681 */
1682 return (KERN_FAILURE);
1683 }
1684
1685 task->halting = TRUE;
1686
1687 /*
1688 * Mark all the threads to keep them from starting any more
1689 * user-level execution. The thread_terminate_internal code
1690 * would do this on a thread by thread basis anyway, but this
1691 * gives us a better chance of not having to wait there.
1692 */
1693 task_hold_locked(task);
1694 dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
1695
1696 /*
1697 * Terminate all the other threads in the task.
1698 */
1699 queue_iterate(&task->threads, thread, thread_t, task_threads)
1700 {
1701 if (should_mark_corpse) {
1702 thread_mtx_lock(thread);
1703 thread->inspection = TRUE;
1704 thread_mtx_unlock(thread);
1705 }
1706 if (thread != self)
1707 thread_terminate_internal(thread);
1708 }
1709 task->dispatchqueue_offset = dispatchqueue_offset;
1710
1711 task_release_locked(task);
1712
1713 return KERN_SUCCESS;
1714 }
1715
1716
1717 /*
1718 * task_complete_halt:
1719 *
1720 * Complete task halt by waiting for threads to terminate, then clean
1721 * up task resources (VM, port namespace, etc...) and then let the
1722 * current thread go in the (practically empty) task context.
1723 */
1724 void
1725 task_complete_halt(task_t task)
1726 {
1727 task_lock(task);
1728 assert(task->halting);
1729 assert(task == current_task());
1730
1731 /*
1732 * Wait for the other threads to get shut down.
1733 * When the last other thread is reaped, we'll be
1734 * woken up.
1735 */
1736 if (task->thread_count > 1) {
1737 assert_wait((event_t)&task->halting, THREAD_UNINT);
1738 task_unlock(task);
1739 thread_block(THREAD_CONTINUE_NULL);
1740 } else {
1741 task_unlock(task);
1742 }
1743
1744 /*
1745 * Give the machine dependent code a chance
1746 * to perform cleanup of task-level resources
1747 * associated with the current thread before
1748 * ripping apart the task.
1749 */
1750 machine_task_terminate(task);
1751
1752 /*
1753 * Destroy all synchronizers owned by the task.
1754 */
1755 task_synchronizer_destroy_all(task);
1756
1757 /*
1758 * Destroy the contents of the IPC space, leaving just
1759 * a reference for it.
1760 */
1761 ipc_space_clean(task->itk_space);
1762
1763 /*
1764 * Clean out the address space, as we are going to be
1765 * getting a new one.
1766 */
1767 vm_map_remove(task->map, task->map->min_offset,
1768 task->map->max_offset,
1769 /* no unnesting on final cleanup: */
1770 VM_MAP_REMOVE_NO_UNNESTING);
1771
1772 task->halting = FALSE;
1773 }
1774
1775 /*
1776 * task_hold_locked:
1777 *
1778 * Suspend execution of the specified task.
1779 * This is a recursive-style suspension of the task, a count of
1780 * suspends is maintained.
1781 *
1782 * CONDITIONS: the task is locked and active.
1783 */
1784 void
1785 task_hold_locked(
1786 register task_t task)
1787 {
1788 register thread_t thread;
1789
1790 assert(task->active);
1791
1792 if (task->suspend_count++ > 0)
1793 return;
1794
1795 /*
1796 * Iterate through all the threads and hold them.
1797 */
1798 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1799 thread_mtx_lock(thread);
1800 thread_hold(thread);
1801 thread_mtx_unlock(thread);
1802 }
1803 }
1804
1805 /*
1806 * task_hold:
1807 *
1808 * Same as the internal routine above, except that is must lock
1809 * and verify that the task is active. This differs from task_suspend
1810 * in that it places a kernel hold on the task rather than just a
1811 * user-level hold. This keeps users from over resuming and setting
1812 * it running out from under the kernel.
1813 *
1814 * CONDITIONS: the caller holds a reference on the task
1815 */
1816 kern_return_t
1817 task_hold(
1818 register task_t task)
1819 {
1820 if (task == TASK_NULL)
1821 return (KERN_INVALID_ARGUMENT);
1822
1823 task_lock(task);
1824
1825 if (!task->active) {
1826 task_unlock(task);
1827
1828 return (KERN_FAILURE);
1829 }
1830
1831 task_hold_locked(task);
1832 task_unlock(task);
1833
1834 return (KERN_SUCCESS);
1835 }
1836
1837 kern_return_t
1838 task_wait(
1839 task_t task,
1840 boolean_t until_not_runnable)
1841 {
1842 if (task == TASK_NULL)
1843 return (KERN_INVALID_ARGUMENT);
1844
1845 task_lock(task);
1846
1847 if (!task->active) {
1848 task_unlock(task);
1849
1850 return (KERN_FAILURE);
1851 }
1852
1853 task_wait_locked(task, until_not_runnable);
1854 task_unlock(task);
1855
1856 return (KERN_SUCCESS);
1857 }
1858
1859 /*
1860 * task_wait_locked:
1861 *
1862 * Wait for all threads in task to stop.
1863 *
1864 * Conditions:
1865 * Called with task locked, active, and held.
1866 */
1867 void
1868 task_wait_locked(
1869 register task_t task,
1870 boolean_t until_not_runnable)
1871 {
1872 register thread_t thread, self;
1873
1874 assert(task->active);
1875 assert(task->suspend_count > 0);
1876
1877 self = current_thread();
1878
1879 /*
1880 * Iterate through all the threads and wait for them to
1881 * stop. Do not wait for the current thread if it is within
1882 * the task.
1883 */
1884 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1885 if (thread != self)
1886 thread_wait(thread, until_not_runnable);
1887 }
1888 }
1889
1890 /*
1891 * task_release_locked:
1892 *
1893 * Release a kernel hold on a task.
1894 *
1895 * CONDITIONS: the task is locked and active
1896 */
1897 void
1898 task_release_locked(
1899 register task_t task)
1900 {
1901 register thread_t thread;
1902
1903 assert(task->active);
1904 assert(task->suspend_count > 0);
1905
1906 if (--task->suspend_count > 0)
1907 return;
1908
1909 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1910 thread_mtx_lock(thread);
1911 thread_release(thread);
1912 thread_mtx_unlock(thread);
1913 }
1914 }
1915
1916 /*
1917 * task_release:
1918 *
1919 * Same as the internal routine above, except that it must lock
1920 * and verify that the task is active.
1921 *
1922 * CONDITIONS: The caller holds a reference to the task
1923 */
1924 kern_return_t
1925 task_release(
1926 task_t task)
1927 {
1928 if (task == TASK_NULL)
1929 return (KERN_INVALID_ARGUMENT);
1930
1931 task_lock(task);
1932
1933 if (!task->active) {
1934 task_unlock(task);
1935
1936 return (KERN_FAILURE);
1937 }
1938
1939 task_release_locked(task);
1940 task_unlock(task);
1941
1942 return (KERN_SUCCESS);
1943 }
1944
1945 kern_return_t
1946 task_threads(
1947 task_t task,
1948 thread_act_array_t *threads_out,
1949 mach_msg_type_number_t *count)
1950 {
1951 mach_msg_type_number_t actual;
1952 thread_t *thread_list;
1953 thread_t thread;
1954 vm_size_t size, size_needed;
1955 void *addr;
1956 unsigned int i, j;
1957
1958 if (task == TASK_NULL)
1959 return (KERN_INVALID_ARGUMENT);
1960
1961 size = 0; addr = NULL;
1962
1963 for (;;) {
1964 task_lock(task);
1965 if (!task->active) {
1966 task_unlock(task);
1967
1968 if (size != 0)
1969 kfree(addr, size);
1970
1971 return (KERN_FAILURE);
1972 }
1973
1974 actual = task->thread_count;
1975
1976 /* do we have the memory we need? */
1977 size_needed = actual * sizeof (mach_port_t);
1978 if (size_needed <= size)
1979 break;
1980
1981 /* unlock the task and allocate more memory */
1982 task_unlock(task);
1983
1984 if (size != 0)
1985 kfree(addr, size);
1986
1987 assert(size_needed > 0);
1988 size = size_needed;
1989
1990 addr = kalloc(size);
1991 if (addr == 0)
1992 return (KERN_RESOURCE_SHORTAGE);
1993 }
1994
1995 /* OK, have memory and the task is locked & active */
1996 thread_list = (thread_t *)addr;
1997
1998 i = j = 0;
1999
2000 for (thread = (thread_t)queue_first(&task->threads); i < actual;
2001 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
2002 thread_reference_internal(thread);
2003 thread_list[j++] = thread;
2004 }
2005
2006 assert(queue_end(&task->threads, (queue_entry_t)thread));
2007
2008 actual = j;
2009 size_needed = actual * sizeof (mach_port_t);
2010
2011 /* can unlock task now that we've got the thread refs */
2012 task_unlock(task);
2013
2014 if (actual == 0) {
2015 /* no threads, so return null pointer and deallocate memory */
2016
2017 *threads_out = NULL;
2018 *count = 0;
2019
2020 if (size != 0)
2021 kfree(addr, size);
2022 }
2023 else {
2024 /* if we allocated too much, must copy */
2025
2026 if (size_needed < size) {
2027 void *newaddr;
2028
2029 newaddr = kalloc(size_needed);
2030 if (newaddr == 0) {
2031 for (i = 0; i < actual; ++i)
2032 thread_deallocate(thread_list[i]);
2033 kfree(addr, size);
2034 return (KERN_RESOURCE_SHORTAGE);
2035 }
2036
2037 bcopy(addr, newaddr, size_needed);
2038 kfree(addr, size);
2039 thread_list = (thread_t *)newaddr;
2040 }
2041
2042 *threads_out = thread_list;
2043 *count = actual;
2044
2045 /* do the conversion that Mig should handle */
2046
2047 for (i = 0; i < actual; ++i)
2048 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
2049 }
2050
2051 return (KERN_SUCCESS);
2052 }
2053
2054 #define TASK_HOLD_NORMAL 0
2055 #define TASK_HOLD_PIDSUSPEND 1
2056 #define TASK_HOLD_LEGACY 2
2057 #define TASK_HOLD_LEGACY_ALL 3
2058
2059 static kern_return_t
2060 place_task_hold (
2061 register task_t task,
2062 int mode)
2063 {
2064 if (!task->active) {
2065 return (KERN_FAILURE);
2066 }
2067
2068 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2069 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
2070 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2071 task->user_stop_count, task->user_stop_count + 1, 0);
2072
2073 #if MACH_ASSERT
2074 current_task()->suspends_outstanding++;
2075 #endif
2076
2077 if (mode == TASK_HOLD_LEGACY)
2078 task->legacy_stop_count++;
2079
2080 if (task->user_stop_count++ > 0) {
2081 /*
2082 * If the stop count was positive, the task is
2083 * already stopped and we can exit.
2084 */
2085 return (KERN_SUCCESS);
2086 }
2087
2088 /*
2089 * Put a kernel-level hold on the threads in the task (all
2090 * user-level task suspensions added together represent a
2091 * single kernel-level hold). We then wait for the threads
2092 * to stop executing user code.
2093 */
2094 task_hold_locked(task);
2095 task_wait_locked(task, FALSE);
2096
2097 return (KERN_SUCCESS);
2098 }
2099
2100 static kern_return_t
2101 release_task_hold (
2102 register task_t task,
2103 int mode)
2104 {
2105 register boolean_t release = FALSE;
2106
2107 if (!task->active) {
2108 return (KERN_FAILURE);
2109 }
2110
2111 if (mode == TASK_HOLD_PIDSUSPEND) {
2112 if (task->pidsuspended == FALSE) {
2113 return (KERN_FAILURE);
2114 }
2115 task->pidsuspended = FALSE;
2116 }
2117
2118 if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
2119
2120 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2121 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
2122 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2123 task->user_stop_count, mode, task->legacy_stop_count);
2124
2125 #if MACH_ASSERT
2126 /*
2127 * This is obviously not robust; if we suspend one task and then resume a different one,
2128 * we'll fly under the radar. This is only meant to catch the common case of a crashed
2129 * or buggy suspender.
2130 */
2131 current_task()->suspends_outstanding--;
2132 #endif
2133
2134 if (mode == TASK_HOLD_LEGACY_ALL) {
2135 if (task->legacy_stop_count >= task->user_stop_count) {
2136 task->user_stop_count = 0;
2137 release = TRUE;
2138 } else {
2139 task->user_stop_count -= task->legacy_stop_count;
2140 }
2141 task->legacy_stop_count = 0;
2142 } else {
2143 if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
2144 task->legacy_stop_count--;
2145 if (--task->user_stop_count == 0)
2146 release = TRUE;
2147 }
2148 }
2149 else {
2150 return (KERN_FAILURE);
2151 }
2152
2153 /*
2154 * Release the task if necessary.
2155 */
2156 if (release)
2157 task_release_locked(task);
2158
2159 return (KERN_SUCCESS);
2160 }
2161
2162
2163 /*
2164 * task_suspend:
2165 *
2166 * Implement an (old-fashioned) user-level suspension on a task.
2167 *
2168 * Because the user isn't expecting to have to manage a suspension
2169 * token, we'll track it for him in the kernel in the form of a naked
2170 * send right to the task's resume port. All such send rights
2171 * account for a single suspension against the task (unlike task_suspend2()
2172 * where each caller gets a unique suspension count represented by a
2173 * unique send-once right).
2174 *
2175 * Conditions:
2176 * The caller holds a reference to the task
2177 */
2178 kern_return_t
2179 task_suspend(
2180 register task_t task)
2181 {
2182 kern_return_t kr;
2183 mach_port_t port, send, old_notify;
2184 mach_port_name_t name;
2185
2186 if (task == TASK_NULL || task == kernel_task)
2187 return (KERN_INVALID_ARGUMENT);
2188
2189 task_lock(task);
2190
2191 /*
2192 * Claim a send right on the task resume port, and request a no-senders
2193 * notification on that port (if none outstanding).
2194 */
2195 if (task->itk_resume == IP_NULL) {
2196 task->itk_resume = ipc_port_alloc_kernel();
2197 if (!IP_VALID(task->itk_resume))
2198 panic("failed to create resume port");
2199 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
2200 }
2201
2202 port = task->itk_resume;
2203 ip_lock(port);
2204 assert(ip_active(port));
2205
2206 send = ipc_port_make_send_locked(port);
2207 assert(IP_VALID(send));
2208
2209 if (port->ip_nsrequest == IP_NULL) {
2210 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2211 assert(old_notify == IP_NULL);
2212 /* port unlocked */
2213 } else {
2214 ip_unlock(port);
2215 }
2216
2217 /*
2218 * place a legacy hold on the task.
2219 */
2220 kr = place_task_hold(task, TASK_HOLD_LEGACY);
2221 if (kr != KERN_SUCCESS) {
2222 task_unlock(task);
2223 ipc_port_release_send(send);
2224 return kr;
2225 }
2226
2227 task_unlock(task);
2228
2229 /*
2230 * Copyout the send right into the calling task's IPC space. It won't know it is there,
2231 * but we'll look it up when calling a traditional resume. Any IPC operations that
2232 * deallocate the send right will auto-release the suspension.
2233 */
2234 if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
2235 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
2236 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
2237 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2238 task_pid(task), kr);
2239 return (kr);
2240 }
2241
2242 return (kr);
2243 }
2244
2245 /*
2246 * task_resume:
2247 * Release a user hold on a task.
2248 *
2249 * Conditions:
2250 * The caller holds a reference to the task
2251 */
2252 kern_return_t
2253 task_resume(
2254 register task_t task)
2255 {
2256 kern_return_t kr;
2257 mach_port_name_t resume_port_name;
2258 ipc_entry_t resume_port_entry;
2259 ipc_space_t space = current_task()->itk_space;
2260
2261 if (task == TASK_NULL || task == kernel_task )
2262 return (KERN_INVALID_ARGUMENT);
2263
2264 /* release a legacy task hold */
2265 task_lock(task);
2266 kr = release_task_hold(task, TASK_HOLD_LEGACY);
2267 task_unlock(task);
2268
2269 is_write_lock(space);
2270 if (is_active(space) && IP_VALID(task->itk_resume) &&
2271 ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
2272 /*
2273 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
2274 * we are holding one less legacy hold on the task from this caller. If the release failed,
2275 * go ahead and drop all the rights, as someone either already released our holds or the task
2276 * is gone.
2277 */
2278 if (kr == KERN_SUCCESS)
2279 ipc_right_dealloc(space, resume_port_name, resume_port_entry);
2280 else
2281 ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
2282 /* space unlocked */
2283 } else {
2284 is_write_unlock(space);
2285 if (kr == KERN_SUCCESS)
2286 printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
2287 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2288 task_pid(task));
2289 }
2290
2291 return kr;
2292 }
2293
2294 /*
2295 * Suspend the target task.
2296 * Making/holding a token/reference/port is the callers responsibility.
2297 */
2298 kern_return_t
2299 task_suspend_internal(task_t task)
2300 {
2301 kern_return_t kr;
2302
2303 if (task == TASK_NULL || task == kernel_task)
2304 return (KERN_INVALID_ARGUMENT);
2305
2306 task_lock(task);
2307 kr = place_task_hold(task, TASK_HOLD_NORMAL);
2308 task_unlock(task);
2309 return (kr);
2310 }
2311
2312 /*
2313 * Suspend the target task, and return a suspension token. The token
2314 * represents a reference on the suspended task.
2315 */
2316 kern_return_t
2317 task_suspend2(
2318 register task_t task,
2319 task_suspension_token_t *suspend_token)
2320 {
2321 kern_return_t kr;
2322
2323 kr = task_suspend_internal(task);
2324 if (kr != KERN_SUCCESS) {
2325 *suspend_token = TASK_NULL;
2326 return (kr);
2327 }
2328
2329 /*
2330 * Take a reference on the target task and return that to the caller
2331 * as a "suspension token," which can be converted into an SO right to
2332 * the now-suspended task's resume port.
2333 */
2334 task_reference_internal(task);
2335 *suspend_token = task;
2336
2337 return (KERN_SUCCESS);
2338 }
2339
2340 /*
2341 * Resume the task
2342 * (reference/token/port management is caller's responsibility).
2343 */
2344 kern_return_t
2345 task_resume_internal(
2346 register task_suspension_token_t task)
2347 {
2348 kern_return_t kr;
2349
2350 if (task == TASK_NULL || task == kernel_task)
2351 return (KERN_INVALID_ARGUMENT);
2352
2353 task_lock(task);
2354 kr = release_task_hold(task, TASK_HOLD_NORMAL);
2355 task_unlock(task);
2356 return (kr);
2357 }
2358
2359 /*
2360 * Resume the task using a suspension token. Consumes the token's ref.
2361 */
2362 kern_return_t
2363 task_resume2(
2364 register task_suspension_token_t task)
2365 {
2366 kern_return_t kr;
2367
2368 kr = task_resume_internal(task);
2369 task_suspension_token_deallocate(task);
2370
2371 return (kr);
2372 }
2373
2374 boolean_t
2375 task_suspension_notify(mach_msg_header_t *request_header)
2376 {
2377 ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
2378 task_t task = convert_port_to_task_suspension_token(port);
2379 mach_msg_type_number_t not_count;
2380
2381 if (task == TASK_NULL || task == kernel_task)
2382 return TRUE; /* nothing to do */
2383
2384 switch (request_header->msgh_id) {
2385
2386 case MACH_NOTIFY_SEND_ONCE:
2387 /* release the hold held by this specific send-once right */
2388 task_lock(task);
2389 release_task_hold(task, TASK_HOLD_NORMAL);
2390 task_unlock(task);
2391 break;
2392
2393 case MACH_NOTIFY_NO_SENDERS:
2394 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
2395
2396 task_lock(task);
2397 ip_lock(port);
2398 if (port->ip_mscount == not_count) {
2399
2400 /* release all the [remaining] outstanding legacy holds */
2401 assert(port->ip_nsrequest == IP_NULL);
2402 ip_unlock(port);
2403 release_task_hold(task, TASK_HOLD_LEGACY_ALL);
2404 task_unlock(task);
2405
2406 } else if (port->ip_nsrequest == IP_NULL) {
2407 ipc_port_t old_notify;
2408
2409 task_unlock(task);
2410 /* new send rights, re-arm notification at current make-send count */
2411 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2412 assert(old_notify == IP_NULL);
2413 /* port unlocked */
2414 } else {
2415 ip_unlock(port);
2416 task_unlock(task);
2417 }
2418 break;
2419
2420 default:
2421 break;
2422 }
2423
2424 task_suspension_token_deallocate(task); /* drop token reference */
2425 return TRUE;
2426 }
2427
2428 kern_return_t
2429 task_pidsuspend_locked(task_t task)
2430 {
2431 kern_return_t kr;
2432
2433 if (task->pidsuspended) {
2434 kr = KERN_FAILURE;
2435 goto out;
2436 }
2437
2438 task->pidsuspended = TRUE;
2439
2440 kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
2441 if (kr != KERN_SUCCESS) {
2442 task->pidsuspended = FALSE;
2443 }
2444 out:
2445 return(kr);
2446 }
2447
2448
2449 /*
2450 * task_pidsuspend:
2451 *
2452 * Suspends a task by placing a hold on its threads.
2453 *
2454 * Conditions:
2455 * The caller holds a reference to the task
2456 */
2457 kern_return_t
2458 task_pidsuspend(
2459 register task_t task)
2460 {
2461 kern_return_t kr;
2462
2463 if (task == TASK_NULL || task == kernel_task)
2464 return (KERN_INVALID_ARGUMENT);
2465
2466 task_lock(task);
2467
2468 kr = task_pidsuspend_locked(task);
2469
2470 task_unlock(task);
2471
2472 return (kr);
2473 }
2474
2475 /* If enabled, we bring all the frozen pages back in prior to resumption; otherwise, they're faulted back in on demand */
2476 #define THAW_ON_RESUME 1
2477
2478 /*
2479 * task_pidresume:
2480 * Resumes a previously suspended task.
2481 *
2482 * Conditions:
2483 * The caller holds a reference to the task
2484 */
2485 kern_return_t
2486 task_pidresume(
2487 register task_t task)
2488 {
2489 kern_return_t kr;
2490
2491 if (task == TASK_NULL || task == kernel_task)
2492 return (KERN_INVALID_ARGUMENT);
2493
2494 task_lock(task);
2495
2496 #if (CONFIG_FREEZE && THAW_ON_RESUME)
2497
2498 while (task->changing_freeze_state) {
2499
2500 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2501 task_unlock(task);
2502 thread_block(THREAD_CONTINUE_NULL);
2503
2504 task_lock(task);
2505 }
2506 task->changing_freeze_state = TRUE;
2507 #endif
2508
2509 kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
2510
2511 task_unlock(task);
2512
2513 #if (CONFIG_FREEZE && THAW_ON_RESUME)
2514 if ((kr == KERN_SUCCESS) && (task->frozen == TRUE)) {
2515
2516 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2517
2518 kr = KERN_SUCCESS;
2519 } else {
2520
2521 kr = vm_map_thaw(task->map);
2522 }
2523 }
2524 task_lock(task);
2525
2526 if (kr == KERN_SUCCESS)
2527 task->frozen = FALSE;
2528 task->changing_freeze_state = FALSE;
2529 thread_wakeup(&task->changing_freeze_state);
2530
2531 task_unlock(task);
2532 #endif
2533
2534 return (kr);
2535 }
2536
2537 #if CONFIG_FREEZE
2538
2539 /*
2540 * task_freeze:
2541 *
2542 * Freeze a task.
2543 *
2544 * Conditions:
2545 * The caller holds a reference to the task
2546 */
2547 extern void vm_wake_compactor_swapper();
2548 extern queue_head_t c_swapout_list_head;
2549
2550 kern_return_t
2551 task_freeze(
2552 register task_t task,
2553 uint32_t *purgeable_count,
2554 uint32_t *wired_count,
2555 uint32_t *clean_count,
2556 uint32_t *dirty_count,
2557 uint32_t dirty_budget,
2558 boolean_t *shared,
2559 boolean_t walk_only)
2560 {
2561 kern_return_t kr;
2562
2563 if (task == TASK_NULL || task == kernel_task)
2564 return (KERN_INVALID_ARGUMENT);
2565
2566 task_lock(task);
2567
2568 while (task->changing_freeze_state) {
2569
2570 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2571 task_unlock(task);
2572 thread_block(THREAD_CONTINUE_NULL);
2573
2574 task_lock(task);
2575 }
2576 if (task->frozen) {
2577 task_unlock(task);
2578 return (KERN_FAILURE);
2579 }
2580 task->changing_freeze_state = TRUE;
2581
2582 task_unlock(task);
2583
2584 if (walk_only) {
2585 kr = vm_map_freeze_walk(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2586 } else {
2587 kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2588 }
2589
2590 task_lock(task);
2591
2592 if (walk_only == FALSE && kr == KERN_SUCCESS)
2593 task->frozen = TRUE;
2594 task->changing_freeze_state = FALSE;
2595 thread_wakeup(&task->changing_freeze_state);
2596
2597 task_unlock(task);
2598
2599 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2600 vm_wake_compactor_swapper();
2601 /*
2602 * We do an explicit wakeup of the swapout thread here
2603 * because the compact_and_swap routines don't have
2604 * knowledge about these kind of "per-task packed c_segs"
2605 * and so will not be evaluating whether we need to do
2606 * a wakeup there.
2607 */
2608 thread_wakeup((event_t)&c_swapout_list_head);
2609 }
2610
2611 return (kr);
2612 }
2613
2614 /*
2615 * task_thaw:
2616 *
2617 * Thaw a currently frozen task.
2618 *
2619 * Conditions:
2620 * The caller holds a reference to the task
2621 */
2622 kern_return_t
2623 task_thaw(
2624 register task_t task)
2625 {
2626 kern_return_t kr;
2627
2628 if (task == TASK_NULL || task == kernel_task)
2629 return (KERN_INVALID_ARGUMENT);
2630
2631 task_lock(task);
2632
2633 while (task->changing_freeze_state) {
2634
2635 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2636 task_unlock(task);
2637 thread_block(THREAD_CONTINUE_NULL);
2638
2639 task_lock(task);
2640 }
2641 if (!task->frozen) {
2642 task_unlock(task);
2643 return (KERN_FAILURE);
2644 }
2645 task->changing_freeze_state = TRUE;
2646
2647 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) {
2648 task_unlock(task);
2649
2650 kr = vm_map_thaw(task->map);
2651
2652 task_lock(task);
2653
2654 if (kr == KERN_SUCCESS)
2655 task->frozen = FALSE;
2656 } else {
2657 task->frozen = FALSE;
2658 kr = KERN_SUCCESS;
2659 }
2660
2661 task->changing_freeze_state = FALSE;
2662 thread_wakeup(&task->changing_freeze_state);
2663
2664 task_unlock(task);
2665
2666 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2667 vm_wake_compactor_swapper();
2668 }
2669
2670 return (kr);
2671 }
2672
2673 #endif /* CONFIG_FREEZE */
2674
2675 kern_return_t
2676 host_security_set_task_token(
2677 host_security_t host_security,
2678 task_t task,
2679 security_token_t sec_token,
2680 audit_token_t audit_token,
2681 host_priv_t host_priv)
2682 {
2683 ipc_port_t host_port;
2684 kern_return_t kr;
2685
2686 if (task == TASK_NULL)
2687 return(KERN_INVALID_ARGUMENT);
2688
2689 if (host_security == HOST_NULL)
2690 return(KERN_INVALID_SECURITY);
2691
2692 task_lock(task);
2693 task->sec_token = sec_token;
2694 task->audit_token = audit_token;
2695
2696 task_unlock(task);
2697
2698 if (host_priv != HOST_PRIV_NULL) {
2699 kr = host_get_host_priv_port(host_priv, &host_port);
2700 } else {
2701 kr = host_get_host_port(host_priv_self(), &host_port);
2702 }
2703 assert(kr == KERN_SUCCESS);
2704 kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
2705 return(kr);
2706 }
2707
2708 kern_return_t
2709 task_send_trace_memory(
2710 task_t target_task,
2711 __unused uint32_t pid,
2712 __unused uint64_t uniqueid)
2713 {
2714 kern_return_t kr = KERN_INVALID_ARGUMENT;
2715 if (target_task == TASK_NULL)
2716 return (KERN_INVALID_ARGUMENT);
2717
2718 #if CONFIG_ATM
2719 kr = atm_send_proc_inspect_notification(target_task,
2720 pid,
2721 uniqueid);
2722
2723 #endif
2724 return (kr);
2725 }
2726 /*
2727 * This routine was added, pretty much exclusively, for registering the
2728 * RPC glue vector for in-kernel short circuited tasks. Rather than
2729 * removing it completely, I have only disabled that feature (which was
2730 * the only feature at the time). It just appears that we are going to
2731 * want to add some user data to tasks in the future (i.e. bsd info,
2732 * task names, etc...), so I left it in the formal task interface.
2733 */
2734 kern_return_t
2735 task_set_info(
2736 task_t task,
2737 task_flavor_t flavor,
2738 __unused task_info_t task_info_in, /* pointer to IN array */
2739 __unused mach_msg_type_number_t task_info_count)
2740 {
2741 if (task == TASK_NULL)
2742 return(KERN_INVALID_ARGUMENT);
2743
2744 switch (flavor) {
2745
2746 #if CONFIG_ATM
2747 case TASK_TRACE_MEMORY_INFO:
2748 {
2749 if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
2750 return (KERN_INVALID_ARGUMENT);
2751
2752 assert(task_info_in != NULL);
2753 task_trace_memory_info_t mem_info;
2754 mem_info = (task_trace_memory_info_t) task_info_in;
2755 kern_return_t kr = atm_register_trace_memory(task,
2756 mem_info->user_memory_address,
2757 mem_info->buffer_size);
2758 return kr;
2759 break;
2760 }
2761
2762 #endif
2763 default:
2764 return (KERN_INVALID_ARGUMENT);
2765 }
2766 return (KERN_SUCCESS);
2767 }
2768
2769 int radar_20146450 = 1;
2770 kern_return_t
2771 task_info(
2772 task_t task,
2773 task_flavor_t flavor,
2774 task_info_t task_info_out,
2775 mach_msg_type_number_t *task_info_count)
2776 {
2777 kern_return_t error = KERN_SUCCESS;
2778
2779 if (task == TASK_NULL)
2780 return (KERN_INVALID_ARGUMENT);
2781
2782 task_lock(task);
2783
2784 if ((task != current_task()) && (!task->active)) {
2785 task_unlock(task);
2786 return (KERN_INVALID_ARGUMENT);
2787 }
2788
2789 switch (flavor) {
2790
2791 case TASK_BASIC_INFO_32:
2792 case TASK_BASIC2_INFO_32:
2793 {
2794 task_basic_info_32_t basic_info;
2795 vm_map_t map;
2796 clock_sec_t secs;
2797 clock_usec_t usecs;
2798
2799 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
2800 error = KERN_INVALID_ARGUMENT;
2801 break;
2802 }
2803
2804 basic_info = (task_basic_info_32_t)task_info_out;
2805
2806 map = (task == kernel_task)? kernel_map: task->map;
2807 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
2808 if (flavor == TASK_BASIC2_INFO_32) {
2809 /*
2810 * The "BASIC2" flavor gets the maximum resident
2811 * size instead of the current resident size...
2812 */
2813 basic_info->resident_size = pmap_resident_max(map->pmap);
2814 } else {
2815 basic_info->resident_size = pmap_resident_count(map->pmap);
2816 }
2817 basic_info->resident_size *= PAGE_SIZE;
2818
2819 basic_info->policy = ((task != kernel_task)?
2820 POLICY_TIMESHARE: POLICY_RR);
2821 basic_info->suspend_count = task->user_stop_count;
2822
2823 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2824 basic_info->user_time.seconds =
2825 (typeof(basic_info->user_time.seconds))secs;
2826 basic_info->user_time.microseconds = usecs;
2827
2828 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2829 basic_info->system_time.seconds =
2830 (typeof(basic_info->system_time.seconds))secs;
2831 basic_info->system_time.microseconds = usecs;
2832
2833 *task_info_count = TASK_BASIC_INFO_32_COUNT;
2834 break;
2835 }
2836
2837 case TASK_BASIC_INFO_64:
2838 {
2839 task_basic_info_64_t basic_info;
2840 vm_map_t map;
2841 clock_sec_t secs;
2842 clock_usec_t usecs;
2843
2844 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
2845 error = KERN_INVALID_ARGUMENT;
2846 break;
2847 }
2848
2849 basic_info = (task_basic_info_64_t)task_info_out;
2850
2851 map = (task == kernel_task)? kernel_map: task->map;
2852 basic_info->virtual_size = map->size;
2853 basic_info->resident_size =
2854 (mach_vm_size_t)(pmap_resident_count(map->pmap))
2855 * PAGE_SIZE_64;
2856
2857 basic_info->policy = ((task != kernel_task)?
2858 POLICY_TIMESHARE: POLICY_RR);
2859 basic_info->suspend_count = task->user_stop_count;
2860
2861 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2862 basic_info->user_time.seconds =
2863 (typeof(basic_info->user_time.seconds))secs;
2864 basic_info->user_time.microseconds = usecs;
2865
2866 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2867 basic_info->system_time.seconds =
2868 (typeof(basic_info->system_time.seconds))secs;
2869 basic_info->system_time.microseconds = usecs;
2870
2871 *task_info_count = TASK_BASIC_INFO_64_COUNT;
2872 break;
2873 }
2874
2875 case MACH_TASK_BASIC_INFO:
2876 {
2877 mach_task_basic_info_t basic_info;
2878 vm_map_t map;
2879 clock_sec_t secs;
2880 clock_usec_t usecs;
2881
2882 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
2883 error = KERN_INVALID_ARGUMENT;
2884 break;
2885 }
2886
2887 basic_info = (mach_task_basic_info_t)task_info_out;
2888
2889 map = (task == kernel_task) ? kernel_map : task->map;
2890
2891 basic_info->virtual_size = map->size;
2892
2893 basic_info->resident_size =
2894 (mach_vm_size_t)(pmap_resident_count(map->pmap));
2895 basic_info->resident_size *= PAGE_SIZE_64;
2896
2897 basic_info->resident_size_max =
2898 (mach_vm_size_t)(pmap_resident_max(map->pmap));
2899 basic_info->resident_size_max *= PAGE_SIZE_64;
2900
2901 basic_info->policy = ((task != kernel_task) ?
2902 POLICY_TIMESHARE : POLICY_RR);
2903
2904 basic_info->suspend_count = task->user_stop_count;
2905
2906 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2907 basic_info->user_time.seconds =
2908 (typeof(basic_info->user_time.seconds))secs;
2909 basic_info->user_time.microseconds = usecs;
2910
2911 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2912 basic_info->system_time.seconds =
2913 (typeof(basic_info->system_time.seconds))secs;
2914 basic_info->system_time.microseconds = usecs;
2915
2916 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
2917 break;
2918 }
2919
2920 case TASK_THREAD_TIMES_INFO:
2921 {
2922 register task_thread_times_info_t times_info;
2923 register thread_t thread;
2924
2925 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
2926 error = KERN_INVALID_ARGUMENT;
2927 break;
2928 }
2929
2930 times_info = (task_thread_times_info_t) task_info_out;
2931 times_info->user_time.seconds = 0;
2932 times_info->user_time.microseconds = 0;
2933 times_info->system_time.seconds = 0;
2934 times_info->system_time.microseconds = 0;
2935
2936
2937 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2938 time_value_t user_time, system_time;
2939
2940 if (thread->options & TH_OPT_IDLE_THREAD)
2941 continue;
2942
2943 thread_read_times(thread, &user_time, &system_time);
2944
2945 time_value_add(&times_info->user_time, &user_time);
2946 time_value_add(&times_info->system_time, &system_time);
2947 }
2948
2949 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
2950 break;
2951 }
2952
2953 case TASK_ABSOLUTETIME_INFO:
2954 {
2955 task_absolutetime_info_t info;
2956 register thread_t thread;
2957
2958 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
2959 error = KERN_INVALID_ARGUMENT;
2960 break;
2961 }
2962
2963 info = (task_absolutetime_info_t)task_info_out;
2964 info->threads_user = info->threads_system = 0;
2965
2966
2967 info->total_user = task->total_user_time;
2968 info->total_system = task->total_system_time;
2969
2970 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2971 uint64_t tval;
2972 spl_t x;
2973
2974 if (thread->options & TH_OPT_IDLE_THREAD)
2975 continue;
2976
2977 x = splsched();
2978 thread_lock(thread);
2979
2980 tval = timer_grab(&thread->user_timer);
2981 info->threads_user += tval;
2982 info->total_user += tval;
2983
2984 tval = timer_grab(&thread->system_timer);
2985 if (thread->precise_user_kernel_time) {
2986 info->threads_system += tval;
2987 info->total_system += tval;
2988 } else {
2989 /* system_timer may represent either sys or user */
2990 info->threads_user += tval;
2991 info->total_user += tval;
2992 }
2993
2994 thread_unlock(thread);
2995 splx(x);
2996 }
2997
2998
2999 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
3000 break;
3001 }
3002
3003 case TASK_DYLD_INFO:
3004 {
3005 task_dyld_info_t info;
3006
3007 /*
3008 * We added the format field to TASK_DYLD_INFO output. For
3009 * temporary backward compatibility, accept the fact that
3010 * clients may ask for the old version - distinquished by the
3011 * size of the expected result structure.
3012 */
3013 #define TASK_LEGACY_DYLD_INFO_COUNT \
3014 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
3015
3016 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
3017 error = KERN_INVALID_ARGUMENT;
3018 break;
3019 }
3020
3021 info = (task_dyld_info_t)task_info_out;
3022 info->all_image_info_addr = task->all_image_info_addr;
3023 info->all_image_info_size = task->all_image_info_size;
3024
3025 /* only set format on output for those expecting it */
3026 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
3027 info->all_image_info_format = task_has_64BitAddr(task) ?
3028 TASK_DYLD_ALL_IMAGE_INFO_64 :
3029 TASK_DYLD_ALL_IMAGE_INFO_32 ;
3030 *task_info_count = TASK_DYLD_INFO_COUNT;
3031 } else {
3032 *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
3033 }
3034 break;
3035 }
3036
3037 case TASK_EXTMOD_INFO:
3038 {
3039 task_extmod_info_t info;
3040 void *p;
3041
3042 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
3043 error = KERN_INVALID_ARGUMENT;
3044 break;
3045 }
3046
3047 info = (task_extmod_info_t)task_info_out;
3048
3049 p = get_bsdtask_info(task);
3050 if (p) {
3051 proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
3052 } else {
3053 bzero(info->task_uuid, sizeof(info->task_uuid));
3054 }
3055 info->extmod_statistics = task->extmod_statistics;
3056 *task_info_count = TASK_EXTMOD_INFO_COUNT;
3057
3058 break;
3059 }
3060
3061 case TASK_KERNELMEMORY_INFO:
3062 {
3063 task_kernelmemory_info_t tkm_info;
3064 ledger_amount_t credit, debit;
3065
3066 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
3067 error = KERN_INVALID_ARGUMENT;
3068 break;
3069 }
3070
3071 tkm_info = (task_kernelmemory_info_t) task_info_out;
3072 tkm_info->total_palloc = 0;
3073 tkm_info->total_pfree = 0;
3074 tkm_info->total_salloc = 0;
3075 tkm_info->total_sfree = 0;
3076
3077 if (task == kernel_task) {
3078 /*
3079 * All shared allocs/frees from other tasks count against
3080 * the kernel private memory usage. If we are looking up
3081 * info for the kernel task, gather from everywhere.
3082 */
3083 task_unlock(task);
3084
3085 /* start by accounting for all the terminated tasks against the kernel */
3086 tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
3087 tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
3088
3089 /* count all other task/thread shared alloc/free against the kernel */
3090 lck_mtx_lock(&tasks_threads_lock);
3091
3092 /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
3093 queue_iterate(&tasks, task, task_t, tasks) {
3094 if (task == kernel_task) {
3095 if (ledger_get_entries(task->ledger,
3096 task_ledgers.tkm_private, &credit,
3097 &debit) == KERN_SUCCESS) {
3098 tkm_info->total_palloc += credit;
3099 tkm_info->total_pfree += debit;
3100 }
3101 }
3102 if (!ledger_get_entries(task->ledger,
3103 task_ledgers.tkm_shared, &credit, &debit)) {
3104 tkm_info->total_palloc += credit;
3105 tkm_info->total_pfree += debit;
3106 }
3107 }
3108 lck_mtx_unlock(&tasks_threads_lock);
3109 } else {
3110 if (!ledger_get_entries(task->ledger,
3111 task_ledgers.tkm_private, &credit, &debit)) {
3112 tkm_info->total_palloc = credit;
3113 tkm_info->total_pfree = debit;
3114 }
3115 if (!ledger_get_entries(task->ledger,
3116 task_ledgers.tkm_shared, &credit, &debit)) {
3117 tkm_info->total_salloc = credit;
3118 tkm_info->total_sfree = debit;
3119 }
3120 task_unlock(task);
3121 }
3122
3123 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
3124 return KERN_SUCCESS;
3125 }
3126
3127 /* OBSOLETE */
3128 case TASK_SCHED_FIFO_INFO:
3129 {
3130
3131 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
3132 error = KERN_INVALID_ARGUMENT;
3133 break;
3134 }
3135
3136 error = KERN_INVALID_POLICY;
3137 break;
3138 }
3139
3140 /* OBSOLETE */
3141 case TASK_SCHED_RR_INFO:
3142 {
3143 register policy_rr_base_t rr_base;
3144 uint32_t quantum_time;
3145 uint64_t quantum_ns;
3146
3147 if (*task_info_count < POLICY_RR_BASE_COUNT) {
3148 error = KERN_INVALID_ARGUMENT;
3149 break;
3150 }
3151
3152 rr_base = (policy_rr_base_t) task_info_out;
3153
3154 if (task != kernel_task) {
3155 error = KERN_INVALID_POLICY;
3156 break;
3157 }
3158
3159 rr_base->base_priority = task->priority;
3160
3161 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
3162 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
3163
3164 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
3165
3166 *task_info_count = POLICY_RR_BASE_COUNT;
3167 break;
3168 }
3169
3170 /* OBSOLETE */
3171 case TASK_SCHED_TIMESHARE_INFO:
3172 {
3173 register policy_timeshare_base_t ts_base;
3174
3175 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
3176 error = KERN_INVALID_ARGUMENT;
3177 break;
3178 }
3179
3180 ts_base = (policy_timeshare_base_t) task_info_out;
3181
3182 if (task == kernel_task) {
3183 error = KERN_INVALID_POLICY;
3184 break;
3185 }
3186
3187 ts_base->base_priority = task->priority;
3188
3189 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
3190 break;
3191 }
3192
3193 case TASK_SECURITY_TOKEN:
3194 {
3195 register security_token_t *sec_token_p;
3196
3197 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
3198 error = KERN_INVALID_ARGUMENT;
3199 break;
3200 }
3201
3202 sec_token_p = (security_token_t *) task_info_out;
3203
3204 *sec_token_p = task->sec_token;
3205
3206 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
3207 break;
3208 }
3209
3210 case TASK_AUDIT_TOKEN:
3211 {
3212 register audit_token_t *audit_token_p;
3213
3214 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
3215 error = KERN_INVALID_ARGUMENT;
3216 break;
3217 }
3218
3219 audit_token_p = (audit_token_t *) task_info_out;
3220
3221 *audit_token_p = task->audit_token;
3222
3223 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
3224 break;
3225 }
3226
3227 case TASK_SCHED_INFO:
3228 error = KERN_INVALID_ARGUMENT;
3229 break;
3230
3231 case TASK_EVENTS_INFO:
3232 {
3233 register task_events_info_t events_info;
3234 register thread_t thread;
3235
3236 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
3237 error = KERN_INVALID_ARGUMENT;
3238 break;
3239 }
3240
3241 events_info = (task_events_info_t) task_info_out;
3242
3243
3244 events_info->faults = task->faults;
3245 events_info->pageins = task->pageins;
3246 events_info->cow_faults = task->cow_faults;
3247 events_info->messages_sent = task->messages_sent;
3248 events_info->messages_received = task->messages_received;
3249 events_info->syscalls_mach = task->syscalls_mach;
3250 events_info->syscalls_unix = task->syscalls_unix;
3251
3252 events_info->csw = task->c_switch;
3253
3254 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3255 events_info->csw += thread->c_switch;
3256 events_info->syscalls_mach += thread->syscalls_mach;
3257 events_info->syscalls_unix += thread->syscalls_unix;
3258 }
3259
3260
3261 *task_info_count = TASK_EVENTS_INFO_COUNT;
3262 break;
3263 }
3264 case TASK_AFFINITY_TAG_INFO:
3265 {
3266 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
3267 error = KERN_INVALID_ARGUMENT;
3268 break;
3269 }
3270
3271 error = task_affinity_info(task, task_info_out, task_info_count);
3272 break;
3273 }
3274 case TASK_POWER_INFO:
3275 {
3276 if (*task_info_count < TASK_POWER_INFO_COUNT) {
3277 error = KERN_INVALID_ARGUMENT;
3278 break;
3279 }
3280
3281 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL);
3282 break;
3283 }
3284
3285 case TASK_POWER_INFO_V2:
3286 {
3287 if (*task_info_count < TASK_POWER_INFO_V2_COUNT) {
3288 error = KERN_INVALID_ARGUMENT;
3289 break;
3290 }
3291 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
3292 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy);
3293 break;
3294 }
3295
3296 case TASK_VM_INFO:
3297 case TASK_VM_INFO_PURGEABLE:
3298 {
3299 task_vm_info_t vm_info;
3300 vm_map_t map;
3301
3302 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
3303 error = KERN_INVALID_ARGUMENT;
3304 break;
3305 }
3306
3307 vm_info = (task_vm_info_t)task_info_out;
3308
3309 if (task == kernel_task) {
3310 map = kernel_map;
3311 /* no lock */
3312 } else {
3313 map = task->map;
3314 vm_map_lock_read(map);
3315 }
3316
3317 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
3318 vm_info->region_count = map->hdr.nentries;
3319 vm_info->page_size = vm_map_page_size(map);
3320
3321 vm_info->resident_size = pmap_resident_count(map->pmap);
3322 vm_info->resident_size *= PAGE_SIZE;
3323 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
3324 vm_info->resident_size_peak *= PAGE_SIZE;
3325
3326 #define _VM_INFO(_name) \
3327 vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
3328
3329 _VM_INFO(device);
3330 _VM_INFO(device_peak);
3331 _VM_INFO(external);
3332 _VM_INFO(external_peak);
3333 _VM_INFO(internal);
3334 _VM_INFO(internal_peak);
3335 _VM_INFO(reusable);
3336 _VM_INFO(reusable_peak);
3337 _VM_INFO(compressed);
3338 _VM_INFO(compressed_peak);
3339 _VM_INFO(compressed_lifetime);
3340
3341 vm_info->purgeable_volatile_pmap = 0;
3342 vm_info->purgeable_volatile_resident = 0;
3343 vm_info->purgeable_volatile_virtual = 0;
3344 if (task == kernel_task) {
3345 /*
3346 * We do not maintain the detailed stats for the
3347 * kernel_pmap, so just count everything as
3348 * "internal"...
3349 */
3350 vm_info->internal = vm_info->resident_size;
3351 /*
3352 * ... but since the memory held by the VM compressor
3353 * in the kernel address space ought to be attributed
3354 * to user-space tasks, we subtract it from "internal"
3355 * to give memory reporting tools a more accurate idea
3356 * of what the kernel itself is actually using, instead
3357 * of making it look like the kernel is leaking memory
3358 * when the system is under memory pressure.
3359 */
3360 vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
3361 PAGE_SIZE);
3362 } else {
3363 mach_vm_size_t volatile_virtual_size;
3364 mach_vm_size_t volatile_resident_size;
3365 mach_vm_size_t volatile_compressed_size;
3366 mach_vm_size_t volatile_pmap_size;
3367 mach_vm_size_t volatile_compressed_pmap_size;
3368 kern_return_t kr;
3369
3370 if (flavor == TASK_VM_INFO_PURGEABLE) {
3371 kr = vm_map_query_volatile(
3372 map,
3373 &volatile_virtual_size,
3374 &volatile_resident_size,
3375 &volatile_compressed_size,
3376 &volatile_pmap_size,
3377 &volatile_compressed_pmap_size);
3378 if (kr == KERN_SUCCESS) {
3379 vm_info->purgeable_volatile_pmap =
3380 volatile_pmap_size;
3381 if (radar_20146450) {
3382 vm_info->compressed -=
3383 volatile_compressed_pmap_size;
3384 }
3385 vm_info->purgeable_volatile_resident =
3386 volatile_resident_size;
3387 vm_info->purgeable_volatile_virtual =
3388 volatile_virtual_size;
3389 }
3390 }
3391 vm_map_unlock_read(map);
3392 }
3393
3394 if (*task_info_count >= TASK_VM_INFO_COUNT) {
3395 vm_info->phys_footprint = 0;
3396 *task_info_count = TASK_VM_INFO_COUNT;
3397 } else {
3398 *task_info_count = TASK_VM_INFO_REV0_COUNT;
3399 }
3400
3401 break;
3402 }
3403
3404 case TASK_WAIT_STATE_INFO:
3405 {
3406 /*
3407 * Deprecated flavor. Currently allowing some results until all users
3408 * stop calling it. The results may not be accurate.
3409 */
3410 task_wait_state_info_t wait_state_info;
3411 uint64_t total_sfi_ledger_val = 0;
3412
3413 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
3414 error = KERN_INVALID_ARGUMENT;
3415 break;
3416 }
3417
3418 wait_state_info = (task_wait_state_info_t) task_info_out;
3419
3420 wait_state_info->total_wait_state_time = 0;
3421 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
3422
3423 #if CONFIG_SCHED_SFI
3424 int i, prev_lentry = -1;
3425 int64_t val_credit, val_debit;
3426
3427 for (i = 0; i < MAX_SFI_CLASS_ID; i++){
3428 val_credit =0;
3429 /*
3430 * checking with prev_lentry != entry ensures adjacent classes
3431 * which share the same ledger do not add wait times twice.
3432 * Note: Use ledger() call to get data for each individual sfi class.
3433 */
3434 if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
3435 KERN_SUCCESS == ledger_get_entries(task->ledger,
3436 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
3437 total_sfi_ledger_val += val_credit;
3438 }
3439 prev_lentry = task_ledgers.sfi_wait_times[i];
3440 }
3441
3442 #endif /* CONFIG_SCHED_SFI */
3443 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
3444 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
3445
3446 break;
3447 }
3448 case TASK_VM_INFO_PURGEABLE_ACCOUNT:
3449 {
3450 #if DEVELOPMENT || DEBUG
3451 pvm_account_info_t acnt_info;
3452
3453 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
3454 error = KERN_INVALID_ARGUMENT;
3455 break;
3456 }
3457
3458 if (task_info_out == NULL) {
3459 error = KERN_INVALID_ARGUMENT;
3460 break;
3461 }
3462
3463 acnt_info = (pvm_account_info_t) task_info_out;
3464
3465 error = vm_purgeable_account(task, acnt_info);
3466
3467 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
3468
3469 break;
3470 #else /* DEVELOPMENT || DEBUG */
3471 error = KERN_NOT_SUPPORTED;
3472 break;
3473 #endif /* DEVELOPMENT || DEBUG */
3474 }
3475 case TASK_FLAGS_INFO:
3476 {
3477 task_flags_info_t flags_info;
3478
3479 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
3480 error = KERN_INVALID_ARGUMENT;
3481 break;
3482 }
3483
3484 flags_info = (task_flags_info_t)task_info_out;
3485
3486 /* only publish the 64-bit flag of the task */
3487 flags_info->flags = task->t_flags & TF_64B_ADDR;
3488
3489 *task_info_count = TASK_FLAGS_INFO_COUNT;
3490 break;
3491 }
3492
3493 case TASK_DEBUG_INFO_INTERNAL:
3494 {
3495 #if DEVELOPMENT || DEBUG
3496 task_debug_info_internal_t dbg_info;
3497 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
3498 error = KERN_NOT_SUPPORTED;
3499 break;
3500 }
3501
3502 if (task_info_out == NULL) {
3503 error = KERN_INVALID_ARGUMENT;
3504 break;
3505 }
3506 dbg_info = (task_debug_info_internal_t) task_info_out;
3507 dbg_info->ipc_space_size = 0;
3508 if (task->itk_space){
3509 dbg_info->ipc_space_size = task->itk_space->is_table_size;
3510 }
3511
3512 error = KERN_SUCCESS;
3513 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
3514 break;
3515 #else /* DEVELOPMENT || DEBUG */
3516 error = KERN_NOT_SUPPORTED;
3517 break;
3518 #endif /* DEVELOPMENT || DEBUG */
3519 }
3520 default:
3521 error = KERN_INVALID_ARGUMENT;
3522 }
3523
3524 task_unlock(task);
3525 return (error);
3526 }
3527
3528 /*
3529 * task_power_info
3530 *
3531 * Returns power stats for the task.
3532 * Note: Called with task locked.
3533 */
3534 void
3535 task_power_info_locked(
3536 task_t task,
3537 task_power_info_t info,
3538 gpu_energy_data_t ginfo)
3539 {
3540 thread_t thread;
3541 ledger_amount_t tmp;
3542
3543 task_lock_assert_owned(task);
3544
3545 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
3546 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
3547 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
3548 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
3549
3550 info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
3551 info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
3552
3553 info->total_user = task->total_user_time;
3554 info->total_system = task->total_system_time;
3555
3556 if (ginfo) {
3557 ginfo->task_gpu_utilisation = task->task_gpu_ns;
3558 }
3559
3560 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3561 uint64_t tval;
3562 spl_t x;
3563
3564 if (thread->options & TH_OPT_IDLE_THREAD)
3565 continue;
3566
3567 x = splsched();
3568 thread_lock(thread);
3569
3570 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
3571 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
3572
3573 tval = timer_grab(&thread->user_timer);
3574 info->total_user += tval;
3575
3576 tval = timer_grab(&thread->system_timer);
3577 if (thread->precise_user_kernel_time) {
3578 info->total_system += tval;
3579 } else {
3580 /* system_timer may represent either sys or user */
3581 info->total_user += tval;
3582 }
3583
3584 if (ginfo) {
3585 ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
3586 }
3587 thread_unlock(thread);
3588 splx(x);
3589 }
3590 }
3591
3592 /*
3593 * task_gpu_utilisation
3594 *
3595 * Returns the total gpu time used by the all the threads of the task
3596 * (both dead and alive)
3597 */
3598 uint64_t
3599 task_gpu_utilisation(
3600 task_t task)
3601 {
3602 uint64_t gpu_time = 0;
3603 thread_t thread;
3604
3605 task_lock(task);
3606 gpu_time += task->task_gpu_ns;
3607
3608 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3609 spl_t x;
3610 x = splsched();
3611 thread_lock(thread);
3612 gpu_time += ml_gpu_stat(thread);
3613 thread_unlock(thread);
3614 splx(x);
3615 }
3616
3617 task_unlock(task);
3618 return gpu_time;
3619 }
3620
3621 kern_return_t
3622 task_purgable_info(
3623 task_t task,
3624 task_purgable_info_t *stats)
3625 {
3626 if (task == TASK_NULL || stats == NULL)
3627 return KERN_INVALID_ARGUMENT;
3628 /* Take task reference */
3629 task_reference(task);
3630 vm_purgeable_stats((vm_purgeable_info_t)stats, task);
3631 /* Drop task reference */
3632 task_deallocate(task);
3633 return KERN_SUCCESS;
3634 }
3635
3636 void
3637 task_vtimer_set(
3638 task_t task,
3639 integer_t which)
3640 {
3641 thread_t thread;
3642 spl_t x;
3643
3644 /* assert(task == current_task()); */ /* bogus assert 4803227 4807483 */
3645
3646 task_lock(task);
3647
3648 task->vtimers |= which;
3649
3650 switch (which) {
3651
3652 case TASK_VTIMER_USER:
3653 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3654 x = splsched();
3655 thread_lock(thread);
3656 if (thread->precise_user_kernel_time)
3657 thread->vtimer_user_save = timer_grab(&thread->user_timer);
3658 else
3659 thread->vtimer_user_save = timer_grab(&thread->system_timer);
3660 thread_unlock(thread);
3661 splx(x);
3662 }
3663 break;
3664
3665 case TASK_VTIMER_PROF:
3666 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3667 x = splsched();
3668 thread_lock(thread);
3669 thread->vtimer_prof_save = timer_grab(&thread->user_timer);
3670 thread->vtimer_prof_save += timer_grab(&thread->system_timer);
3671 thread_unlock(thread);
3672 splx(x);
3673 }
3674 break;
3675
3676 case TASK_VTIMER_RLIM:
3677 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3678 x = splsched();
3679 thread_lock(thread);
3680 thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
3681 thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
3682 thread_unlock(thread);
3683 splx(x);
3684 }
3685 break;
3686 }
3687
3688 task_unlock(task);
3689 }
3690
3691 void
3692 task_vtimer_clear(
3693 task_t task,
3694 integer_t which)
3695 {
3696 assert(task == current_task());
3697
3698 task_lock(task);
3699
3700 task->vtimers &= ~which;
3701
3702 task_unlock(task);
3703 }
3704
3705 void
3706 task_vtimer_update(
3707 __unused
3708 task_t task,
3709 integer_t which,
3710 uint32_t *microsecs)
3711 {
3712 thread_t thread = current_thread();
3713 uint32_t tdelt;
3714 clock_sec_t secs;
3715 uint64_t tsum;
3716
3717 assert(task == current_task());
3718
3719 assert(task->vtimers & which);
3720
3721 secs = tdelt = 0;
3722
3723 switch (which) {
3724
3725 case TASK_VTIMER_USER:
3726 if (thread->precise_user_kernel_time) {
3727 tdelt = (uint32_t)timer_delta(&thread->user_timer,
3728 &thread->vtimer_user_save);
3729 } else {
3730 tdelt = (uint32_t)timer_delta(&thread->system_timer,
3731 &thread->vtimer_user_save);
3732 }
3733 absolutetime_to_microtime(tdelt, &secs, microsecs);
3734 break;
3735
3736 case TASK_VTIMER_PROF:
3737 tsum = timer_grab(&thread->user_timer);
3738 tsum += timer_grab(&thread->system_timer);
3739 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
3740 absolutetime_to_microtime(tdelt, &secs, microsecs);
3741 /* if the time delta is smaller than a usec, ignore */
3742 if (*microsecs != 0)
3743 thread->vtimer_prof_save = tsum;
3744 break;
3745
3746 case TASK_VTIMER_RLIM:
3747 tsum = timer_grab(&thread->user_timer);
3748 tsum += timer_grab(&thread->system_timer);
3749 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
3750 thread->vtimer_rlim_save = tsum;
3751 absolutetime_to_microtime(tdelt, &secs, microsecs);
3752 break;
3753 }
3754
3755 }
3756
3757 /*
3758 * task_assign:
3759 *
3760 * Change the assigned processor set for the task
3761 */
3762 kern_return_t
3763 task_assign(
3764 __unused task_t task,
3765 __unused processor_set_t new_pset,
3766 __unused boolean_t assign_threads)
3767 {
3768 return(KERN_FAILURE);
3769 }
3770
3771 /*
3772 * task_assign_default:
3773 *
3774 * Version of task_assign to assign to default processor set.
3775 */
3776 kern_return_t
3777 task_assign_default(
3778 task_t task,
3779 boolean_t assign_threads)
3780 {
3781 return (task_assign(task, &pset0, assign_threads));
3782 }
3783
3784 /*
3785 * task_get_assignment
3786 *
3787 * Return name of processor set that task is assigned to.
3788 */
3789 kern_return_t
3790 task_get_assignment(
3791 task_t task,
3792 processor_set_t *pset)
3793 {
3794 if (!task->active)
3795 return(KERN_FAILURE);
3796
3797 *pset = &pset0;
3798
3799 return (KERN_SUCCESS);
3800 }
3801
3802 uint64_t
3803 get_task_dispatchqueue_offset(
3804 task_t task)
3805 {
3806 return task->dispatchqueue_offset;
3807 }
3808
3809 /*
3810 * task_policy
3811 *
3812 * Set scheduling policy and parameters, both base and limit, for
3813 * the given task. Policy must be a policy which is enabled for the
3814 * processor set. Change contained threads if requested.
3815 */
3816 kern_return_t
3817 task_policy(
3818 __unused task_t task,
3819 __unused policy_t policy_id,
3820 __unused policy_base_t base,
3821 __unused mach_msg_type_number_t count,
3822 __unused boolean_t set_limit,
3823 __unused boolean_t change)
3824 {
3825 return(KERN_FAILURE);
3826 }
3827
3828 /*
3829 * task_set_policy
3830 *
3831 * Set scheduling policy and parameters, both base and limit, for
3832 * the given task. Policy can be any policy implemented by the
3833 * processor set, whether enabled or not. Change contained threads
3834 * if requested.
3835 */
3836 kern_return_t
3837 task_set_policy(
3838 __unused task_t task,
3839 __unused processor_set_t pset,
3840 __unused policy_t policy_id,
3841 __unused policy_base_t base,
3842 __unused mach_msg_type_number_t base_count,
3843 __unused policy_limit_t limit,
3844 __unused mach_msg_type_number_t limit_count,
3845 __unused boolean_t change)
3846 {
3847 return(KERN_FAILURE);
3848 }
3849
3850 kern_return_t
3851 task_set_ras_pc(
3852 __unused task_t task,
3853 __unused vm_offset_t pc,
3854 __unused vm_offset_t endpc)
3855 {
3856 return KERN_FAILURE;
3857 }
3858
3859 void
3860 task_synchronizer_destroy_all(task_t task)
3861 {
3862 /*
3863 * Destroy owned semaphores
3864 */
3865 semaphore_destroy_all(task);
3866 }
3867
3868 /*
3869 * Install default (machine-dependent) initial thread state
3870 * on the task. Subsequent thread creation will have this initial
3871 * state set on the thread by machine_thread_inherit_taskwide().
3872 * Flavors and structures are exactly the same as those to thread_set_state()
3873 */
3874 kern_return_t
3875 task_set_state(
3876 task_t task,
3877 int flavor,
3878 thread_state_t state,
3879 mach_msg_type_number_t state_count)
3880 {
3881 kern_return_t ret;
3882
3883 if (task == TASK_NULL) {
3884 return (KERN_INVALID_ARGUMENT);
3885 }
3886
3887 task_lock(task);
3888
3889 if (!task->active) {
3890 task_unlock(task);
3891 return (KERN_FAILURE);
3892 }
3893
3894 ret = machine_task_set_state(task, flavor, state, state_count);
3895
3896 task_unlock(task);
3897 return ret;
3898 }
3899
3900 /*
3901 * Examine the default (machine-dependent) initial thread state
3902 * on the task, as set by task_set_state(). Flavors and structures
3903 * are exactly the same as those passed to thread_get_state().
3904 */
3905 kern_return_t
3906 task_get_state(
3907 task_t task,
3908 int flavor,
3909 thread_state_t state,
3910 mach_msg_type_number_t *state_count)
3911 {
3912 kern_return_t ret;
3913
3914 if (task == TASK_NULL) {
3915 return (KERN_INVALID_ARGUMENT);
3916 }
3917
3918 task_lock(task);
3919
3920 if (!task->active) {
3921 task_unlock(task);
3922 return (KERN_FAILURE);
3923 }
3924
3925 ret = machine_task_get_state(task, flavor, state, state_count);
3926
3927 task_unlock(task);
3928 return ret;
3929 }
3930
3931 #if CONFIG_JETSAM
3932 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
3933
3934 void __attribute__((noinline))
3935 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb)
3936 {
3937 task_t task = current_task();
3938 int pid = 0;
3939 const char *procname = "unknown";
3940 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
3941
3942 #ifdef MACH_BSD
3943 pid = proc_selfpid();
3944
3945 if (pid == 1) {
3946 /*
3947 * Cannot have ReportCrash analyzing
3948 * a suspended initproc.
3949 */
3950 return;
3951 }
3952
3953 if (task->bsd_info != NULL)
3954 procname = proc_name_address(current_task()->bsd_info);
3955 #endif
3956
3957 if (hwm_user_cores) {
3958 int error;
3959 uint64_t starttime, end;
3960 clock_sec_t secs = 0;
3961 uint32_t microsecs = 0;
3962
3963 starttime = mach_absolute_time();
3964 /*
3965 * Trigger a coredump of this process. Don't proceed unless we know we won't
3966 * be filling up the disk; and ignore the core size resource limit for this
3967 * core file.
3968 */
3969 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
3970 printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
3971 }
3972 /*
3973 * coredump() leaves the task suspended.
3974 */
3975 task_resume_internal(current_task());
3976
3977 end = mach_absolute_time();
3978 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
3979 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
3980 proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
3981 }
3982
3983 if (disable_exc_resource) {
3984 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
3985 "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
3986 return;
3987 }
3988
3989 /*
3990 * A task that has triggered an EXC_RESOURCE, should not be
3991 * jetsammed when the device is under memory pressure. Here
3992 * we set the P_MEMSTAT_TERMINATED flag so that the process
3993 * will be skipped if the memorystatus_thread wakes up.
3994 */
3995 proc_memstat_terminated(current_task()->bsd_info, TRUE);
3996
3997 printf("process %s[%d] crossed memory high watermark (%d MB); sending "
3998 "EXC_RESOURCE.\n", procname, pid, max_footprint_mb);
3999
4000 code[0] = code[1] = 0;
4001 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
4002 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
4003 EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
4004
4005 /*
4006 * Use the _internal_ variant so that no user-space
4007 * process can resume our task from under us.
4008 */
4009 task_suspend_internal(task);
4010 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
4011 task_resume_internal(task);
4012
4013 /*
4014 * After the EXC_RESOURCE has been handled, we must clear the
4015 * P_MEMSTAT_TERMINATED flag so that the process can again be
4016 * considered for jetsam if the memorystatus_thread wakes up.
4017 */
4018 proc_memstat_terminated(current_task()->bsd_info, FALSE); /* clear the flag */
4019 }
4020
4021 /*
4022 * Callback invoked when a task exceeds its physical footprint limit.
4023 */
4024 void
4025 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4026 {
4027 ledger_amount_t max_footprint, max_footprint_mb;
4028 ledger_amount_t footprint_after_purge;
4029 task_t task;
4030
4031 if (warning == LEDGER_WARNING_DIPPED_BELOW) {
4032 /*
4033 * Task memory limits only provide a warning on the way up.
4034 */
4035 return;
4036 }
4037
4038 task = current_task();
4039
4040 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
4041 max_footprint_mb = max_footprint >> 20;
4042
4043 /*
4044 * Try and purge all "volatile" memory in that task first.
4045 */
4046 (void) task_purge_volatile_memory(task);
4047 /* are we still over the limit ? */
4048 ledger_get_balance(task->ledger,
4049 task_ledgers.phys_footprint,
4050 &footprint_after_purge);
4051 if ((!warning &&
4052 footprint_after_purge <= max_footprint) ||
4053 (warning &&
4054 footprint_after_purge <= ((max_footprint *
4055 PHYS_FOOTPRINT_WARNING_LEVEL) / 100))) {
4056 /* all better now */
4057 ledger_reset_callback_state(task->ledger,
4058 task_ledgers.phys_footprint);
4059 return;
4060 }
4061 /* still over the limit after purging... */
4062
4063 /*
4064 * If this an actual violation (not a warning),
4065 * generate a non-fatal high watermark EXC_RESOURCE.
4066 */
4067 if ((warning == 0) && (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) {
4068 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb);
4069 }
4070
4071 memorystatus_on_ledger_footprint_exceeded((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE,
4072 (int)max_footprint_mb);
4073 }
4074
4075 extern int proc_check_footprint_priv(void);
4076
4077 kern_return_t
4078 task_set_phys_footprint_limit(
4079 task_t task,
4080 int new_limit_mb,
4081 int *old_limit_mb)
4082 {
4083 kern_return_t error;
4084
4085 if ((error = proc_check_footprint_priv())) {
4086 return (KERN_NO_ACCESS);
4087 }
4088
4089 return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, FALSE);
4090 }
4091
4092 kern_return_t
4093 task_convert_phys_footprint_limit(
4094 int limit_mb,
4095 int *converted_limit_mb)
4096 {
4097 if (limit_mb == -1) {
4098 /*
4099 * No limit
4100 */
4101 if (max_task_footprint != 0) {
4102 *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024); /* bytes to MB */
4103 } else {
4104 *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
4105 }
4106 } else {
4107 /* nothing to convert */
4108 *converted_limit_mb = limit_mb;
4109 }
4110 return (KERN_SUCCESS);
4111 }
4112
4113
4114 kern_return_t
4115 task_set_phys_footprint_limit_internal(
4116 task_t task,
4117 int new_limit_mb,
4118 int *old_limit_mb,
4119 boolean_t trigger_exception)
4120 {
4121 ledger_amount_t old;
4122
4123 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
4124
4125 if (old_limit_mb) {
4126 /*
4127 * Check that limit >> 20 will not give an "unexpected" 32-bit
4128 * result. There are, however, implicit assumptions that -1 mb limit
4129 * equates to LEDGER_LIMIT_INFINITY.
4130 */
4131 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
4132 *old_limit_mb = (int)(old >> 20);
4133 }
4134
4135 if (new_limit_mb == -1) {
4136 /*
4137 * Caller wishes to remove the limit.
4138 */
4139 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4140 max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
4141 max_task_footprint ? PHYS_FOOTPRINT_WARNING_LEVEL : 0);
4142 return (KERN_SUCCESS);
4143 }
4144
4145 #ifdef CONFIG_NOMONITORS
4146 return (KERN_SUCCESS);
4147 #endif /* CONFIG_NOMONITORS */
4148
4149 task_lock(task);
4150
4151 if (trigger_exception) {
4152 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4153 } else {
4154 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4155 }
4156
4157 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4158 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
4159
4160 if (task == current_task()) {
4161 ledger_check_new_balance(task->ledger, task_ledgers.phys_footprint);
4162 }
4163
4164 task_unlock(task);
4165
4166 return (KERN_SUCCESS);
4167 }
4168
4169 kern_return_t
4170 task_get_phys_footprint_limit(
4171 task_t task,
4172 int *limit_mb)
4173 {
4174 ledger_amount_t limit;
4175
4176 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
4177 /*
4178 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
4179 * result. There are, however, implicit assumptions that -1 mb limit
4180 * equates to LEDGER_LIMIT_INFINITY.
4181 */
4182 assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
4183 *limit_mb = (int)(limit >> 20);
4184
4185 return (KERN_SUCCESS);
4186 }
4187 #else /* CONFIG_JETSAM */
4188 kern_return_t
4189 task_set_phys_footprint_limit(
4190 __unused task_t task,
4191 __unused int new_limit_mb,
4192 __unused int *old_limit_mb)
4193 {
4194 return (KERN_FAILURE);
4195 }
4196
4197 kern_return_t
4198 task_get_phys_footprint_limit(
4199 __unused task_t task,
4200 __unused int *limit_mb)
4201 {
4202 return (KERN_FAILURE);
4203 }
4204 #endif /* CONFIG_JETSAM */
4205
4206 /*
4207 * We need to export some functions to other components that
4208 * are currently implemented in macros within the osfmk
4209 * component. Just export them as functions of the same name.
4210 */
4211 boolean_t is_kerneltask(task_t t)
4212 {
4213 if (t == kernel_task)
4214 return (TRUE);
4215
4216 return (FALSE);
4217 }
4218
4219 int
4220 check_for_tasksuspend(task_t task)
4221 {
4222
4223 if (task == TASK_NULL)
4224 return (0);
4225
4226 return (task->suspend_count > 0);
4227 }
4228
4229 #undef current_task
4230 task_t current_task(void);
4231 task_t current_task(void)
4232 {
4233 return (current_task_fast());
4234 }
4235
4236 #undef task_reference
4237 void task_reference(task_t task);
4238 void
4239 task_reference(
4240 task_t task)
4241 {
4242 if (task != TASK_NULL)
4243 task_reference_internal(task);
4244 }
4245
4246 /* defined in bsd/kern/kern_prot.c */
4247 extern int get_audit_token_pid(audit_token_t *audit_token);
4248
4249 int task_pid(task_t task)
4250 {
4251 if (task)
4252 return get_audit_token_pid(&task->audit_token);
4253 return -1;
4254 }
4255
4256
4257 /*
4258 * This routine is called always with task lock held.
4259 * And it returns a thread handle without reference as the caller
4260 * operates on it under the task lock held.
4261 */
4262 thread_t
4263 task_findtid(task_t task, uint64_t tid)
4264 {
4265 thread_t thread= THREAD_NULL;
4266
4267 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4268 if (thread->thread_id == tid)
4269 return(thread);
4270 }
4271 return(THREAD_NULL);
4272 }
4273
4274 /*
4275 * Control the CPU usage monitor for a task.
4276 */
4277 kern_return_t
4278 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
4279 {
4280 int error = KERN_SUCCESS;
4281
4282 if (*flags & CPUMON_MAKE_FATAL) {
4283 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
4284 } else {
4285 error = KERN_INVALID_ARGUMENT;
4286 }
4287
4288 return error;
4289 }
4290
4291 /*
4292 * Control the wakeups monitor for a task.
4293 */
4294 kern_return_t
4295 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
4296 {
4297 ledger_t ledger = task->ledger;
4298
4299 task_lock(task);
4300 if (*flags & WAKEMON_GET_PARAMS) {
4301 ledger_amount_t limit;
4302 uint64_t period;
4303
4304 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
4305 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
4306
4307 if (limit != LEDGER_LIMIT_INFINITY) {
4308 /*
4309 * An active limit means the wakeups monitor is enabled.
4310 */
4311 *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
4312 *flags = WAKEMON_ENABLE;
4313 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
4314 *flags |= WAKEMON_MAKE_FATAL;
4315 }
4316 } else {
4317 *flags = WAKEMON_DISABLE;
4318 *rate_hz = -1;
4319 }
4320
4321 /*
4322 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
4323 */
4324 task_unlock(task);
4325 return KERN_SUCCESS;
4326 }
4327
4328 if (*flags & WAKEMON_ENABLE) {
4329 if (*flags & WAKEMON_SET_DEFAULTS) {
4330 *rate_hz = task_wakeups_monitor_rate;
4331 }
4332
4333 #ifndef CONFIG_NOMONITORS
4334 if (*flags & WAKEMON_MAKE_FATAL) {
4335 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
4336 }
4337 #endif /* CONFIG_NOMONITORS */
4338
4339 if (*rate_hz < 0) {
4340 task_unlock(task);
4341 return KERN_INVALID_ARGUMENT;
4342 }
4343
4344 #ifndef CONFIG_NOMONITORS
4345 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
4346 task_wakeups_monitor_ustackshots_trigger_pct);
4347 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
4348 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
4349 #endif /* CONFIG_NOMONITORS */
4350 } else if (*flags & WAKEMON_DISABLE) {
4351 /*
4352 * Caller wishes to disable wakeups monitor on the task.
4353 *
4354 * Disable telemetry if it was triggered by the wakeups monitor, and
4355 * remove the limit & callback on the wakeups ledger entry.
4356 */
4357 #if CONFIG_TELEMETRY
4358 telemetry_task_ctl_locked(current_task(), TF_WAKEMON_WARNING, 0);
4359 #endif
4360 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
4361 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
4362 }
4363
4364 task_unlock(task);
4365 return KERN_SUCCESS;
4366 }
4367
4368 void
4369 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4370 {
4371 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
4372 #if CONFIG_TELEMETRY
4373 /*
4374 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
4375 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
4376 */
4377 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
4378 #endif
4379 return;
4380 }
4381
4382 #if CONFIG_TELEMETRY
4383 /*
4384 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
4385 * exceeded the limit, turn telemetry off for the task.
4386 */
4387 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
4388 #endif
4389
4390 if (warning == 0) {
4391 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE();
4392 }
4393 }
4394
4395 void __attribute__((noinline))
4396 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void)
4397 {
4398 task_t task = current_task();
4399 int pid = 0;
4400 const char *procname = "unknown";
4401 uint64_t observed_wakeups_rate;
4402 uint64_t permitted_wakeups_rate;
4403 uint64_t observation_interval;
4404 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
4405 struct ledger_entry_info lei;
4406
4407 #ifdef MACH_BSD
4408 pid = proc_selfpid();
4409 if (task->bsd_info != NULL)
4410 procname = proc_name_address(current_task()->bsd_info);
4411 #endif
4412
4413 ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
4414
4415 /*
4416 * Disable the exception notification so we don't overwhelm
4417 * the listener with an endless stream of redundant exceptions.
4418 */
4419 uint32_t flags = WAKEMON_DISABLE;
4420 task_wakeups_monitor_ctl(task, &flags, NULL);
4421
4422 observed_wakeups_rate = (lei.lei_balance * (int64_t)NSEC_PER_SEC) / lei.lei_last_refill;
4423 permitted_wakeups_rate = lei.lei_limit / task_wakeups_monitor_interval;
4424 observation_interval = lei.lei_refill_period / NSEC_PER_SEC;
4425
4426 if (disable_exc_resource) {
4427 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
4428 "supressed by a boot-arg\n", procname, pid);
4429 return;
4430 }
4431 if (audio_active) {
4432 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
4433 "supressed due to audio playback\n", procname, pid);
4434 return;
4435 }
4436 printf("process %s[%d] caught causing excessive wakeups. Observed wakeups rate "
4437 "(per sec): %lld; Maximum permitted wakeups rate (per sec): %lld; Observation "
4438 "period: %lld seconds; Task lifetime number of wakeups: %lld\n",
4439 procname, pid, observed_wakeups_rate, permitted_wakeups_rate,
4440 observation_interval, lei.lei_credit);
4441
4442 code[0] = code[1] = 0;
4443 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
4444 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
4445 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0], task_wakeups_monitor_rate);
4446 EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0], observation_interval);
4447 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1], lei.lei_balance * (int64_t)NSEC_PER_SEC / lei.lei_last_refill);
4448 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
4449
4450 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
4451 task_terminate_internal(task);
4452 }
4453 }
4454
4455 kern_return_t
4456 task_purge_volatile_memory(
4457 task_t task)
4458 {
4459 vm_map_t map;
4460 int num_object_purged;
4461
4462 if (task == TASK_NULL)
4463 return KERN_INVALID_TASK;
4464
4465 task_lock(task);
4466
4467 if (!task->active) {
4468 task_unlock(task);
4469 return KERN_INVALID_TASK;
4470 }
4471 map = task->map;
4472 if (map == VM_MAP_NULL) {
4473 task_unlock(task);
4474 return KERN_INVALID_TASK;
4475 }
4476 vm_map_reference(task->map);
4477
4478 task_unlock(task);
4479
4480 num_object_purged = vm_map_purge(map);
4481 vm_map_deallocate(map);
4482
4483 return KERN_SUCCESS;
4484 }
4485
4486 /* Placeholders for the task set/get voucher interfaces */
4487 kern_return_t
4488 task_get_mach_voucher(
4489 task_t task,
4490 mach_voucher_selector_t __unused which,
4491 ipc_voucher_t *voucher)
4492 {
4493 if (TASK_NULL == task)
4494 return KERN_INVALID_TASK;
4495
4496 *voucher = NULL;
4497 return KERN_SUCCESS;
4498 }
4499
4500 kern_return_t
4501 task_set_mach_voucher(
4502 task_t task,
4503 ipc_voucher_t __unused voucher)
4504 {
4505 if (TASK_NULL == task)
4506 return KERN_INVALID_TASK;
4507
4508 return KERN_SUCCESS;
4509 }
4510
4511 kern_return_t
4512 task_swap_mach_voucher(
4513 task_t task,
4514 ipc_voucher_t new_voucher,
4515 ipc_voucher_t *in_out_old_voucher)
4516 {
4517 if (TASK_NULL == task)
4518 return KERN_INVALID_TASK;
4519
4520 *in_out_old_voucher = new_voucher;
4521 return KERN_SUCCESS;
4522 }
4523
4524 void task_set_gpu_denied(task_t task, boolean_t denied)
4525 {
4526 task_lock(task);
4527
4528 if (denied) {
4529 task->t_flags |= TF_GPU_DENIED;
4530 } else {
4531 task->t_flags &= ~TF_GPU_DENIED;
4532 }
4533
4534 task_unlock(task);
4535 }
4536
4537 boolean_t task_is_gpu_denied(task_t task)
4538 {
4539 /* We don't need the lock to read this flag */
4540 return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
4541 }
4542
4543 void task_update_logical_writes(task_t task, uint32_t io_size, int flags)
4544 {
4545 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE, task_pid(task), io_size, flags, 0, 0);
4546 switch(flags) {
4547 case TASK_WRITE_IMMEDIATE:
4548 OSAddAtomic64(io_size, (SInt64 *)&(task->task_immediate_writes));
4549 break;
4550 case TASK_WRITE_DEFERRED:
4551 OSAddAtomic64(io_size, (SInt64 *)&(task->task_deferred_writes));
4552 break;
4553 case TASK_WRITE_INVALIDATED:
4554 OSAddAtomic64(io_size, (SInt64 *)&(task->task_invalidated_writes));
4555 break;
4556 case TASK_WRITE_METADATA:
4557 OSAddAtomic64(io_size, (SInt64 *)&(task->task_metadata_writes));
4558 break;
4559 }
4560 return;
4561 }