]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/task.c
7de13feab8ed3df4ae0f3c98a889abb36ef3bd4b
[apple/xnu.git] / osfmk / kern / task.c
1 /*
2 * Copyright (c) 2000-2010, 2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/task.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59 * David Black
60 *
61 * Task management primitives implementation.
62 */
63 /*
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL). All rights reserved.
66 *
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
72 *
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76 *
77 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
78 * improvements that they make and grant CSL redistribution rights.
79 *
80 */
81 /*
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections. This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
85 * Version 2.0.
86 * Copyright (c) 2005 SPARTA, Inc.
87 */
88
89 #include <mach/mach_types.h>
90 #include <mach/boolean.h>
91 #include <mach/host_priv.h>
92 #include <mach/machine/vm_types.h>
93 #include <mach/vm_param.h>
94 #include <mach/mach_vm.h>
95 #include <mach/semaphore.h>
96 #include <mach/task_info.h>
97 #include <mach/task_special_ports.h>
98
99 #include <ipc/ipc_importance.h>
100 #include <ipc/ipc_types.h>
101 #include <ipc/ipc_space.h>
102 #include <ipc/ipc_entry.h>
103 #include <ipc/ipc_hash.h>
104
105 #include <kern/kern_types.h>
106 #include <kern/mach_param.h>
107 #include <kern/misc_protos.h>
108 #include <kern/task.h>
109 #include <kern/thread.h>
110 #include <kern/coalition.h>
111 #include <kern/zalloc.h>
112 #include <kern/kalloc.h>
113 #include <kern/kern_cdata.h>
114 #include <kern/processor.h>
115 #include <kern/sched_prim.h> /* for thread_wakeup */
116 #include <kern/ipc_tt.h>
117 #include <kern/host.h>
118 #include <kern/clock.h>
119 #include <kern/timer.h>
120 #include <kern/assert.h>
121 #include <kern/sync_lock.h>
122 #include <kern/affinity.h>
123 #include <kern/exc_resource.h>
124 #include <kern/machine.h>
125 #include <corpses/task_corpse.h>
126 #if CONFIG_TELEMETRY
127 #include <kern/telemetry.h>
128 #endif
129
130 #include <vm/pmap.h>
131 #include <vm/vm_map.h>
132 #include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
133 #include <vm/vm_pageout.h>
134 #include <vm/vm_protos.h>
135 #include <vm/vm_purgeable_internal.h>
136
137 #include <sys/resource.h>
138 #include <sys/signalvar.h> /* for coredump */
139
140 /*
141 * Exported interfaces
142 */
143
144 #include <mach/task_server.h>
145 #include <mach/mach_host_server.h>
146 #include <mach/host_security_server.h>
147 #include <mach/mach_port_server.h>
148
149 #include <vm/vm_shared_region.h>
150
151 #include <libkern/OSDebug.h>
152 #include <libkern/OSAtomic.h>
153
154 #if CONFIG_ATM
155 #include <atm/atm_internal.h>
156 #endif
157
158 #include <kern/sfi.h>
159
160 #if KPERF
161 extern int kpc_force_all_ctrs(task_t, int);
162 #endif
163
164 uint32_t qos_override_mode;
165
166 task_t kernel_task;
167 zone_t task_zone;
168 lck_attr_t task_lck_attr;
169 lck_grp_t task_lck_grp;
170 lck_grp_attr_t task_lck_grp_attr;
171
172 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
173 int audio_active = 0;
174
175 zinfo_usage_store_t tasks_tkm_private;
176 zinfo_usage_store_t tasks_tkm_shared;
177
178 /* A container to accumulate statistics for expired tasks */
179 expired_task_statistics_t dead_task_statistics;
180 lck_spin_t dead_task_statistics_lock;
181
182 ledger_template_t task_ledger_template = NULL;
183
184 struct _task_ledger_indices task_ledgers __attribute__((used)) =
185 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
186 { 0 /* initialized at runtime */},
187 #ifdef CONFIG_BANK
188 -1, -1,
189 #endif
190 };
191
192 /* System sleep state */
193 boolean_t tasks_suspend_state;
194
195
196 void init_task_ledgers(void);
197 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
198 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
199 void __attribute__((noinline)) THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void);
200 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb);
201
202 kern_return_t task_suspend_internal(task_t);
203 kern_return_t task_resume_internal(task_t);
204 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
205
206
207 void proc_init_cpumon_params(void);
208 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
209
210 // Warn tasks when they hit 80% of their memory limit.
211 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
212
213 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
214 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
215
216 /*
217 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
218 *
219 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
220 * stacktraces, aka micro-stackshots)
221 */
222 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
223
224 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
225 int task_wakeups_monitor_rate; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
226
227 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
228
229 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
230
231 ledger_amount_t max_task_footprint = 0; /* Per-task limit on physical memory consumption in bytes */
232 int max_task_footprint_mb = 0; /* Per-task limit on physical memory consumption in megabytes */
233
234 #if MACH_ASSERT
235 int pmap_ledgers_panic = 1;
236 #endif /* MACH_ASSERT */
237
238 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
239
240 int hwm_user_cores = 0; /* high watermark violations generate user core files */
241
242 #ifdef MACH_BSD
243 extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
244 extern int proc_pid(struct proc *p);
245 extern int proc_selfpid(void);
246 extern char *proc_name_address(struct proc *p);
247 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
248 #if CONFIG_JETSAM
249 extern void proc_memstat_terminated(struct proc* p, boolean_t set);
250 extern void memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb);
251 #endif
252 #endif
253 #if MACH_ASSERT
254 extern int pmap_ledgers_panic;
255 #endif /* MACH_ASSERT */
256
257 /* Forwards */
258
259 void task_hold_locked(
260 task_t task);
261 void task_wait_locked(
262 task_t task,
263 boolean_t until_not_runnable);
264 void task_release_locked(
265 task_t task);
266 void task_free(
267 task_t task );
268 void task_synchronizer_destroy_all(
269 task_t task);
270
271 int check_for_tasksuspend(
272 task_t task);
273
274 void
275 task_backing_store_privileged(
276 task_t task)
277 {
278 task_lock(task);
279 task->priv_flags |= VM_BACKING_STORE_PRIV;
280 task_unlock(task);
281 return;
282 }
283
284
285 void
286 task_set_64bit(
287 task_t task,
288 boolean_t is64bit)
289 {
290 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
291 thread_t thread;
292 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
293
294 task_lock(task);
295
296 if (is64bit) {
297 if (task_has_64BitAddr(task))
298 goto out;
299 task_set_64BitAddr(task);
300 } else {
301 if ( !task_has_64BitAddr(task))
302 goto out;
303 task_clear_64BitAddr(task);
304 }
305 /* FIXME: On x86, the thread save state flavor can diverge from the
306 * task's 64-bit feature flag due to the 32-bit/64-bit register save
307 * state dichotomy. Since we can be pre-empted in this interval,
308 * certain routines may observe the thread as being in an inconsistent
309 * state with respect to its task's 64-bitness.
310 */
311
312 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
313 queue_iterate(&task->threads, thread, thread_t, task_threads) {
314 thread_mtx_lock(thread);
315 machine_thread_switch_addrmode(thread);
316 thread_mtx_unlock(thread);
317 }
318 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
319
320 out:
321 task_unlock(task);
322 }
323
324
325 void
326 task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size)
327 {
328 task_lock(task);
329 task->all_image_info_addr = addr;
330 task->all_image_info_size = size;
331 task_unlock(task);
332 }
333
334 void
335 task_atm_reset(__unused task_t task) {
336
337 #if CONFIG_ATM
338 if (task->atm_context != NULL) {
339 atm_task_descriptor_destroy(task->atm_context);
340 task->atm_context = NULL;
341 }
342 #endif
343
344 }
345
346 void
347 task_bank_reset(__unused task_t task) {
348
349 #if CONFIG_BANK
350 if (task->bank_context != NULL) {
351 bank_task_destroy(task);
352 }
353 #endif
354
355 }
356
357 /*
358 * NOTE: This should only be called when the P_LINTRANSIT
359 * flag is set (the proc_trans lock is held) on the
360 * proc associated with the task.
361 */
362 void
363 task_bank_init(__unused task_t task) {
364
365 #if CONFIG_BANK
366 if (task->bank_context != NULL) {
367 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
368 }
369 bank_task_initialize(task);
370 #endif
371
372 }
373
374 #if TASK_REFERENCE_LEAK_DEBUG
375 #include <kern/btlog.h>
376
377 decl_simple_lock_data(static,task_ref_lock);
378 static btlog_t *task_ref_btlog;
379 #define TASK_REF_OP_INCR 0x1
380 #define TASK_REF_OP_DECR 0x2
381
382 #define TASK_REF_BTDEPTH 7
383
384 static void
385 task_ref_lock_lock(void *context)
386 {
387 simple_lock((simple_lock_t)context);
388 }
389 static void
390 task_ref_lock_unlock(void *context)
391 {
392 simple_unlock((simple_lock_t)context);
393 }
394
395 void
396 task_reference_internal(task_t task)
397 {
398 void * bt[TASK_REF_BTDEPTH];
399 int numsaved = 0;
400
401 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
402
403 (void)hw_atomic_add(&(task)->ref_count, 1);
404 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
405 bt, numsaved);
406 }
407
408 uint32_t
409 task_deallocate_internal(task_t task)
410 {
411 void * bt[TASK_REF_BTDEPTH];
412 int numsaved = 0;
413
414 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
415
416 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
417 bt, numsaved);
418 return hw_atomic_sub(&(task)->ref_count, 1);
419 }
420
421 #endif /* TASK_REFERENCE_LEAK_DEBUG */
422
423 void
424 task_init(void)
425 {
426
427 lck_grp_attr_setdefault(&task_lck_grp_attr);
428 lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
429 lck_attr_setdefault(&task_lck_attr);
430 lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
431
432 task_zone = zinit(
433 sizeof(struct task),
434 task_max * sizeof(struct task),
435 TASK_CHUNK * sizeof(struct task),
436 "tasks");
437
438 zone_change(task_zone, Z_NOENCRYPT, TRUE);
439
440 /*
441 * Configure per-task memory limit.
442 * The boot-arg is interpreted as Megabytes,
443 * and takes precedence over the device tree.
444 * Setting the boot-arg to 0 disables task limits.
445 */
446 if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
447 sizeof (max_task_footprint_mb))) {
448 /*
449 * No limit was found in boot-args, so go look in the device tree.
450 */
451 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
452 sizeof(max_task_footprint_mb))) {
453 /*
454 * No limit was found in device tree.
455 */
456 max_task_footprint_mb = 0;
457 }
458 }
459
460 if (max_task_footprint_mb != 0) {
461 #if CONFIG_JETSAM
462 if (max_task_footprint_mb < 50) {
463 printf("Warning: max_task_pmem %d below minimum.\n",
464 max_task_footprint_mb);
465 max_task_footprint_mb = 50;
466 }
467 printf("Limiting task physical memory footprint to %d MB\n",
468 max_task_footprint_mb);
469
470 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
471 #else
472 printf("Warning: max_task_footprint specified, but jetsam not configured; ignoring.\n");
473 #endif
474 }
475
476 #if MACH_ASSERT
477 PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
478 sizeof (pmap_ledgers_panic));
479 #endif /* MACH_ASSERT */
480
481 if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
482 sizeof (hwm_user_cores))) {
483 hwm_user_cores = 0;
484 }
485
486 if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
487 printf("QOS override mode: 0x%08x\n", qos_override_mode);
488 } else {
489 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
490 }
491
492 proc_init_cpumon_params();
493
494 if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
495 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
496 }
497
498 if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
499 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
500 }
501
502 if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
503 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
504 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
505 }
506
507 if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
508 sizeof (disable_exc_resource))) {
509 disable_exc_resource = 0;
510 }
511
512 /*
513 * If we have coalitions, coalition_init() will call init_task_ledgers() as it
514 * sets up the ledgers for the default coalition. If we don't have coalitions,
515 * then we have to call it now.
516 */
517 #if CONFIG_COALITIONS
518 assert(task_ledger_template);
519 #else /* CONFIG_COALITIONS */
520 init_task_ledgers();
521 #endif /* CONFIG_COALITIONS */
522
523 #if TASK_REFERENCE_LEAK_DEBUG
524 simple_lock_init(&task_ref_lock, 0);
525 task_ref_btlog = btlog_create(100000,
526 TASK_REF_BTDEPTH,
527 task_ref_lock_lock,
528 task_ref_lock_unlock,
529 &task_ref_lock);
530 assert(task_ref_btlog);
531 #endif
532
533 /*
534 * Create the kernel task as the first task.
535 */
536 #ifdef __LP64__
537 if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS)
538 #else
539 if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS)
540 #endif
541 panic("task_init\n");
542
543 vm_map_deallocate(kernel_task->map);
544 kernel_task->map = kernel_map;
545 lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
546
547 }
548
549 /*
550 * Create a task running in the kernel address space. It may
551 * have its own map of size mem_size and may have ipc privileges.
552 */
553 kern_return_t
554 kernel_task_create(
555 __unused task_t parent_task,
556 __unused vm_offset_t map_base,
557 __unused vm_size_t map_size,
558 __unused task_t *child_task)
559 {
560 return (KERN_INVALID_ARGUMENT);
561 }
562
563 kern_return_t
564 task_create(
565 task_t parent_task,
566 __unused ledger_port_array_t ledger_ports,
567 __unused mach_msg_type_number_t num_ledger_ports,
568 __unused boolean_t inherit_memory,
569 __unused task_t *child_task) /* OUT */
570 {
571 if (parent_task == TASK_NULL)
572 return(KERN_INVALID_ARGUMENT);
573
574 /*
575 * No longer supported: too many calls assume that a task has a valid
576 * process attached.
577 */
578 return(KERN_FAILURE);
579 }
580
581 kern_return_t
582 host_security_create_task_token(
583 host_security_t host_security,
584 task_t parent_task,
585 __unused security_token_t sec_token,
586 __unused audit_token_t audit_token,
587 __unused host_priv_t host_priv,
588 __unused ledger_port_array_t ledger_ports,
589 __unused mach_msg_type_number_t num_ledger_ports,
590 __unused boolean_t inherit_memory,
591 __unused task_t *child_task) /* OUT */
592 {
593 if (parent_task == TASK_NULL)
594 return(KERN_INVALID_ARGUMENT);
595
596 if (host_security == HOST_NULL)
597 return(KERN_INVALID_SECURITY);
598
599 /*
600 * No longer supported.
601 */
602 return(KERN_FAILURE);
603 }
604
605 /*
606 * Task ledgers
607 * ------------
608 *
609 * phys_footprint
610 * Physical footprint: This is the sum of:
611 * + (internal - alternate_accounting)
612 * + (internal_compressed - alternate_accounting_compressed)
613 * + iokit_mapped
614 * + purgeable_nonvolatile
615 * + purgeable_nonvolatile_compressed
616 *
617 * internal
618 * The task's anonymous memory, which on iOS is always resident.
619 *
620 * internal_compressed
621 * Amount of this task's internal memory which is held by the compressor.
622 * Such memory is no longer actually resident for the task [i.e., resident in its pmap],
623 * and could be either decompressed back into memory, or paged out to storage, depending
624 * on our implementation.
625 *
626 * iokit_mapped
627 * IOKit mappings: The total size of all IOKit mappings in this task, regardless of
628 clean/dirty or internal/external state].
629 *
630 * alternate_accounting
631 * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
632 * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
633 * double counting.
634 */
635 void
636 init_task_ledgers(void)
637 {
638 ledger_template_t t;
639
640 assert(task_ledger_template == NULL);
641 assert(kernel_task == TASK_NULL);
642
643 if ((t = ledger_template_create("Per-task ledger")) == NULL)
644 panic("couldn't create task ledger template");
645
646 task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
647 task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
648 "physmem", "bytes");
649 task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
650 "bytes");
651 task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
652 "bytes");
653 task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
654 "bytes");
655 task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
656 "bytes");
657 task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
658 "bytes");
659 task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
660 "bytes");
661 task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
662 "bytes");
663 task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
664 "bytes");
665 task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
666 "bytes");
667 task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
668 task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
669 task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
670 task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
671 task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
672 "count");
673 task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
674 "count");
675
676 #if CONFIG_SCHED_SFI
677 sfi_class_id_t class_id, ledger_alias;
678 for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
679 task_ledgers.sfi_wait_times[class_id] = -1;
680 }
681
682 /* don't account for UNSPECIFIED */
683 for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
684 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
685 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
686 /* Check to see if alias has been registered yet */
687 if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
688 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
689 } else {
690 /* Otherwise, initialize it first */
691 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
692 }
693 } else {
694 task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
695 }
696
697 if (task_ledgers.sfi_wait_times[class_id] < 0) {
698 panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
699 }
700 }
701
702 assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
703 #endif /* CONFIG_SCHED_SFI */
704
705 #ifdef CONFIG_BANK
706 task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
707 task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
708 #endif
709 if ((task_ledgers.cpu_time < 0) ||
710 (task_ledgers.tkm_private < 0) ||
711 (task_ledgers.tkm_shared < 0) ||
712 (task_ledgers.phys_mem < 0) ||
713 (task_ledgers.wired_mem < 0) ||
714 (task_ledgers.internal < 0) ||
715 (task_ledgers.iokit_mapped < 0) ||
716 (task_ledgers.alternate_accounting < 0) ||
717 (task_ledgers.alternate_accounting_compressed < 0) ||
718 (task_ledgers.phys_footprint < 0) ||
719 (task_ledgers.internal_compressed < 0) ||
720 (task_ledgers.purgeable_volatile < 0) ||
721 (task_ledgers.purgeable_nonvolatile < 0) ||
722 (task_ledgers.purgeable_volatile_compressed < 0) ||
723 (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
724 (task_ledgers.platform_idle_wakeups < 0) ||
725 (task_ledgers.interrupt_wakeups < 0)
726 #ifdef CONFIG_BANK
727 || (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0)
728 #endif
729 ) {
730 panic("couldn't create entries for task ledger template");
731 }
732
733 ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
734 #if MACH_ASSERT
735 if (pmap_ledgers_panic) {
736 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
737 ledger_panic_on_negative(t, task_ledgers.internal);
738 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
739 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
740 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
741 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
742 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
743 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
744 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
745 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
746 }
747 #endif /* MACH_ASSERT */
748
749 #if CONFIG_JETSAM
750 ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
751 #endif
752
753 ledger_set_callback(t, task_ledgers.interrupt_wakeups,
754 task_wakeups_rate_exceeded, NULL, NULL);
755
756 task_ledger_template = t;
757 }
758
759 kern_return_t
760 task_create_internal(
761 task_t parent_task,
762 coalition_t *parent_coalitions __unused,
763 boolean_t inherit_memory,
764 boolean_t is_64bit,
765 task_t *child_task) /* OUT */
766 {
767 task_t new_task;
768 vm_shared_region_t shared_region;
769 ledger_t ledger = NULL;
770
771 new_task = (task_t) zalloc(task_zone);
772
773 if (new_task == TASK_NULL)
774 return(KERN_RESOURCE_SHORTAGE);
775
776 /* one ref for just being alive; one for our caller */
777 new_task->ref_count = 2;
778
779 /* allocate with active entries */
780 assert(task_ledger_template != NULL);
781 if ((ledger = ledger_instantiate(task_ledger_template,
782 LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
783 zfree(task_zone, new_task);
784 return(KERN_RESOURCE_SHORTAGE);
785 }
786
787 new_task->ledger = ledger;
788
789 #if defined(CONFIG_SCHED_MULTIQ)
790 new_task->sched_group = sched_group_create();
791 #endif
792
793 /* if inherit_memory is true, parent_task MUST not be NULL */
794 if (inherit_memory)
795 new_task->map = vm_map_fork(ledger, parent_task->map);
796 else
797 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
798 (vm_map_offset_t)(VM_MIN_ADDRESS),
799 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
800
801 /* Inherit memlock limit from parent */
802 if (parent_task)
803 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
804
805 lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
806 queue_init(&new_task->threads);
807 new_task->suspend_count = 0;
808 new_task->thread_count = 0;
809 new_task->active_thread_count = 0;
810 new_task->user_stop_count = 0;
811 new_task->legacy_stop_count = 0;
812 new_task->active = TRUE;
813 new_task->halting = FALSE;
814 new_task->user_data = NULL;
815 new_task->faults = 0;
816 new_task->cow_faults = 0;
817 new_task->pageins = 0;
818 new_task->messages_sent = 0;
819 new_task->messages_received = 0;
820 new_task->syscalls_mach = 0;
821 new_task->priv_flags = 0;
822 new_task->syscalls_unix=0;
823 new_task->c_switch = new_task->p_switch = new_task->ps_switch = 0;
824 new_task->t_flags = 0;
825 new_task->importance = 0;
826
827 #if CONFIG_ATM
828 new_task->atm_context = NULL;
829 #endif
830 #if CONFIG_BANK
831 new_task->bank_context = NULL;
832 #endif
833
834 zinfo_task_init(new_task);
835
836 #ifdef MACH_BSD
837 new_task->bsd_info = NULL;
838 new_task->corpse_info = NULL;
839 #endif /* MACH_BSD */
840
841 #if CONFIG_JETSAM
842 if (max_task_footprint != 0) {
843 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
844 }
845 #endif
846
847 if (task_wakeups_monitor_rate != 0) {
848 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
849 int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
850 task_wakeups_monitor_ctl(new_task, &flags, &rate);
851 }
852
853 #if defined(__i386__) || defined(__x86_64__)
854 new_task->i386_ldt = 0;
855 #endif
856
857 new_task->task_debug = NULL;
858
859 queue_init(&new_task->semaphore_list);
860 new_task->semaphores_owned = 0;
861
862 ipc_task_init(new_task, parent_task);
863
864 new_task->total_user_time = 0;
865 new_task->total_system_time = 0;
866
867 new_task->vtimers = 0;
868
869 new_task->shared_region = NULL;
870
871 new_task->affinity_space = NULL;
872
873 new_task->pidsuspended = FALSE;
874 new_task->frozen = FALSE;
875 new_task->changing_freeze_state = FALSE;
876 new_task->rusage_cpu_flags = 0;
877 new_task->rusage_cpu_percentage = 0;
878 new_task->rusage_cpu_interval = 0;
879 new_task->rusage_cpu_deadline = 0;
880 new_task->rusage_cpu_callt = NULL;
881 #if MACH_ASSERT
882 new_task->suspends_outstanding = 0;
883 #endif
884
885 #if HYPERVISOR
886 new_task->hv_task_target = NULL;
887 #endif /* HYPERVISOR */
888
889
890 new_task->low_mem_notified_warn = 0;
891 new_task->low_mem_notified_critical = 0;
892 new_task->low_mem_privileged_listener = 0;
893 new_task->purged_memory_warn = 0;
894 new_task->purged_memory_critical = 0;
895 new_task->mem_notify_reserved = 0;
896 #if IMPORTANCE_INHERITANCE
897 new_task->task_imp_base = NULL;
898 #endif /* IMPORTANCE_INHERITANCE */
899
900 #if defined(__x86_64__)
901 new_task->uexc_range_start = new_task->uexc_range_size = new_task->uexc_handler = 0;
902 #endif
903
904 new_task->requested_policy = default_task_requested_policy;
905 new_task->effective_policy = default_task_effective_policy;
906 new_task->pended_policy = default_task_pended_policy;
907
908 if (parent_task != TASK_NULL) {
909 new_task->sec_token = parent_task->sec_token;
910 new_task->audit_token = parent_task->audit_token;
911
912 /* inherit the parent's shared region */
913 shared_region = vm_shared_region_get(parent_task);
914 vm_shared_region_set(new_task, shared_region);
915
916 if(task_has_64BitAddr(parent_task))
917 task_set_64BitAddr(new_task);
918 new_task->all_image_info_addr = parent_task->all_image_info_addr;
919 new_task->all_image_info_size = parent_task->all_image_info_size;
920
921 #if defined(__i386__) || defined(__x86_64__)
922 if (inherit_memory && parent_task->i386_ldt)
923 new_task->i386_ldt = user_ldt_copy(parent_task->i386_ldt);
924 #endif
925 if (inherit_memory && parent_task->affinity_space)
926 task_affinity_create(parent_task, new_task);
927
928 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
929
930 #if IMPORTANCE_INHERITANCE
931 ipc_importance_task_t new_task_imp = IIT_NULL;
932
933 if (task_is_marked_importance_donor(parent_task)) {
934 new_task_imp = ipc_importance_for_task(new_task, FALSE);
935 assert(IIT_NULL != new_task_imp);
936 ipc_importance_task_mark_donor(new_task_imp, TRUE);
937 }
938 /* Embedded doesn't want this to inherit */
939 if (task_is_marked_importance_receiver(parent_task)) {
940 if (IIT_NULL == new_task_imp)
941 new_task_imp = ipc_importance_for_task(new_task, FALSE);
942 assert(IIT_NULL != new_task_imp);
943 ipc_importance_task_mark_receiver(new_task_imp, TRUE);
944 }
945 if (task_is_marked_importance_denap_receiver(parent_task)) {
946 if (IIT_NULL == new_task_imp)
947 new_task_imp = ipc_importance_for_task(new_task, FALSE);
948 assert(IIT_NULL != new_task_imp);
949 ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
950 }
951
952 if (IIT_NULL != new_task_imp) {
953 assert(new_task->task_imp_base == new_task_imp);
954 ipc_importance_task_release(new_task_imp);
955 }
956 #endif /* IMPORTANCE_INHERITANCE */
957
958 new_task->priority = BASEPRI_DEFAULT;
959 new_task->max_priority = MAXPRI_USER;
960
961 new_task->requested_policy.t_apptype = parent_task->requested_policy.t_apptype;
962
963 new_task->requested_policy.int_darwinbg = parent_task->requested_policy.int_darwinbg;
964 new_task->requested_policy.ext_darwinbg = parent_task->requested_policy.ext_darwinbg;
965 new_task->requested_policy.int_iotier = parent_task->requested_policy.int_iotier;
966 new_task->requested_policy.ext_iotier = parent_task->requested_policy.ext_iotier;
967 new_task->requested_policy.int_iopassive = parent_task->requested_policy.int_iopassive;
968 new_task->requested_policy.ext_iopassive = parent_task->requested_policy.ext_iopassive;
969 new_task->requested_policy.bg_iotier = parent_task->requested_policy.bg_iotier;
970 new_task->requested_policy.terminated = parent_task->requested_policy.terminated;
971 new_task->requested_policy.t_qos_clamp = parent_task->requested_policy.t_qos_clamp;
972
973 task_policy_create(new_task, parent_task->requested_policy.t_boosted);
974 } else {
975 new_task->sec_token = KERNEL_SECURITY_TOKEN;
976 new_task->audit_token = KERNEL_AUDIT_TOKEN;
977 #ifdef __LP64__
978 if(is_64bit)
979 task_set_64BitAddr(new_task);
980 #endif
981 new_task->all_image_info_addr = (mach_vm_address_t)0;
982 new_task->all_image_info_size = (mach_vm_size_t)0;
983
984 new_task->pset_hint = PROCESSOR_SET_NULL;
985
986 if (kernel_task == TASK_NULL) {
987 new_task->priority = BASEPRI_KERNEL;
988 new_task->max_priority = MAXPRI_KERNEL;
989 } else {
990 new_task->priority = BASEPRI_DEFAULT;
991 new_task->max_priority = MAXPRI_USER;
992 }
993 }
994
995 bzero(new_task->coalition, sizeof(new_task->coalition));
996 for (int i = 0; i < COALITION_NUM_TYPES; i++)
997 queue_chain_init(new_task->task_coalition[i]);
998
999 /* Allocate I/O Statistics */
1000 new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1001 assert(new_task->task_io_stats != NULL);
1002 bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1003 new_task->task_immediate_writes = 0;
1004 new_task->task_deferred_writes = 0;
1005 new_task->task_invalidated_writes = 0;
1006 new_task->task_metadata_writes = 0;
1007
1008 bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats));
1009
1010 bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1011 new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0;
1012 new_task->task_gpu_ns = 0;
1013
1014 #if CONFIG_COALITIONS
1015
1016 /* TODO: there is no graceful failure path here... */
1017 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1018 coalitions_adopt_task(parent_coalitions, new_task);
1019 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1020 /*
1021 * all tasks at least have a resource coalition, so
1022 * if the parent has one then inherit all coalitions
1023 * the parent is a part of
1024 */
1025 coalitions_adopt_task(parent_task->coalition, new_task);
1026 } else {
1027 /* TODO: assert that new_task will be PID 1 (launchd) */
1028 coalitions_adopt_init_task(new_task);
1029 }
1030
1031 if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1032 panic("created task is not a member of a resource coalition");
1033 }
1034 #endif /* CONFIG_COALITIONS */
1035
1036 new_task->dispatchqueue_offset = 0;
1037 if (parent_task != NULL) {
1038 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1039 }
1040
1041 if (vm_backing_store_low && parent_task != NULL)
1042 new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
1043
1044 new_task->task_volatile_objects = 0;
1045 new_task->task_nonvolatile_objects = 0;
1046 new_task->task_purgeable_disowning = FALSE;
1047 new_task->task_purgeable_disowned = FALSE;
1048
1049 ipc_task_enable(new_task);
1050
1051 lck_mtx_lock(&tasks_threads_lock);
1052 queue_enter(&tasks, new_task, task_t, tasks);
1053 tasks_count++;
1054 if (tasks_suspend_state) {
1055 task_suspend_internal(new_task);
1056 }
1057 lck_mtx_unlock(&tasks_threads_lock);
1058
1059 *child_task = new_task;
1060 return(KERN_SUCCESS);
1061 }
1062
1063 int task_dropped_imp_count = 0;
1064
1065 /*
1066 * task_deallocate:
1067 *
1068 * Drop a reference on a task.
1069 */
1070 void
1071 task_deallocate(
1072 task_t task)
1073 {
1074 ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1075 uint32_t refs;
1076
1077 if (task == TASK_NULL)
1078 return;
1079
1080 refs = task_deallocate_internal(task);
1081
1082 #if IMPORTANCE_INHERITANCE
1083 if (refs > 1)
1084 return;
1085
1086 if (refs == 1) {
1087 /*
1088 * If last ref potentially comes from the task's importance,
1089 * disconnect it. But more task refs may be added before
1090 * that completes, so wait for the reference to go to zero
1091 * naturually (it may happen on a recursive task_deallocate()
1092 * from the ipc_importance_disconnect_task() call).
1093 */
1094 if (IIT_NULL != task->task_imp_base)
1095 ipc_importance_disconnect_task(task);
1096 return;
1097 }
1098 #else
1099 if (refs > 0)
1100 return;
1101 #endif /* IMPORTANCE_INHERITANCE */
1102
1103 lck_mtx_lock(&tasks_threads_lock);
1104 queue_remove(&terminated_tasks, task, task_t, tasks);
1105 terminated_tasks_count--;
1106 lck_mtx_unlock(&tasks_threads_lock);
1107
1108 /*
1109 * remove the reference on atm descriptor
1110 */
1111 task_atm_reset(task);
1112
1113 /*
1114 * remove the reference on bank context
1115 */
1116 task_bank_reset(task);
1117
1118 if (task->task_io_stats)
1119 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1120
1121 /*
1122 * Give the machine dependent code a chance
1123 * to perform cleanup before ripping apart
1124 * the task.
1125 */
1126 machine_task_terminate(task);
1127
1128 ipc_task_terminate(task);
1129
1130 if (task->affinity_space)
1131 task_affinity_deallocate(task);
1132
1133 #if MACH_ASSERT
1134 if (task->ledger != NULL &&
1135 task->map != NULL &&
1136 task->map->pmap != NULL &&
1137 task->map->pmap->ledger != NULL) {
1138 assert(task->ledger == task->map->pmap->ledger);
1139 }
1140 #endif /* MACH_ASSERT */
1141
1142 vm_purgeable_disown(task);
1143 assert(task->task_purgeable_disowned);
1144 if (task->task_volatile_objects != 0 ||
1145 task->task_nonvolatile_objects != 0) {
1146 panic("task_deallocate(%p): "
1147 "volatile_objects=%d nonvolatile_objects=%d\n",
1148 task,
1149 task->task_volatile_objects,
1150 task->task_nonvolatile_objects);
1151 }
1152
1153 vm_map_deallocate(task->map);
1154 is_release(task->itk_space);
1155
1156 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1157 &interrupt_wakeups, &debit);
1158 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1159 &platform_idle_wakeups, &debit);
1160
1161 #if defined(CONFIG_SCHED_MULTIQ)
1162 sched_group_destroy(task->sched_group);
1163 #endif
1164
1165 /* Accumulate statistics for dead tasks */
1166 lck_spin_lock(&dead_task_statistics_lock);
1167 dead_task_statistics.total_user_time += task->total_user_time;
1168 dead_task_statistics.total_system_time += task->total_system_time;
1169
1170 dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1171 dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1172
1173 dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1174 dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1175
1176 lck_spin_unlock(&dead_task_statistics_lock);
1177 lck_mtx_destroy(&task->lock, &task_lck_grp);
1178
1179 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1180 &debit)) {
1181 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1182 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1183 }
1184 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1185 &debit)) {
1186 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1187 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1188 }
1189 ledger_dereference(task->ledger);
1190 zinfo_task_free(task);
1191
1192 #if TASK_REFERENCE_LEAK_DEBUG
1193 btlog_remove_entries_for_element(task_ref_btlog, task);
1194 #endif
1195
1196 #if CONFIG_COALITIONS
1197 if (!task->coalition[COALITION_TYPE_RESOURCE])
1198 panic("deallocating task was not a member of a resource coalition");
1199 task_release_coalitions(task);
1200 #endif /* CONFIG_COALITIONS */
1201
1202 bzero(task->coalition, sizeof(task->coalition));
1203
1204 #if MACH_BSD
1205 /* clean up collected information since last reference to task is gone */
1206 if (task->corpse_info) {
1207 task_crashinfo_destroy(task->corpse_info);
1208 task->corpse_info = NULL;
1209 }
1210 #endif
1211
1212 zfree(task_zone, task);
1213 }
1214
1215 /*
1216 * task_name_deallocate:
1217 *
1218 * Drop a reference on a task name.
1219 */
1220 void
1221 task_name_deallocate(
1222 task_name_t task_name)
1223 {
1224 return(task_deallocate((task_t)task_name));
1225 }
1226
1227 /*
1228 * task_suspension_token_deallocate:
1229 *
1230 * Drop a reference on a task suspension token.
1231 */
1232 void
1233 task_suspension_token_deallocate(
1234 task_suspension_token_t token)
1235 {
1236 return(task_deallocate((task_t)token));
1237 }
1238
1239
1240 /*
1241 * task_collect_crash_info:
1242 *
1243 * collect crash info from bsd and mach based data
1244 */
1245 kern_return_t
1246 task_collect_crash_info(task_t task)
1247 {
1248 kern_return_t kr = KERN_SUCCESS;
1249
1250 kcdata_descriptor_t crash_data = NULL;
1251 kcdata_descriptor_t crash_data_release = NULL;
1252 mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1253 mach_vm_offset_t crash_data_user_ptr = 0;
1254
1255 if (!corpses_enabled()) {
1256 return KERN_NOT_SUPPORTED;
1257 }
1258
1259 task_lock(task);
1260 assert(task->bsd_info != NULL);
1261 if (task->corpse_info == NULL && task->bsd_info != NULL) {
1262 task_unlock(task);
1263 /* map crash data memory in task's vm map */
1264 kr = mach_vm_allocate(task->map, &crash_data_user_ptr, size, (VM_MAKE_TAG(VM_MEMORY_CORPSEINFO) | VM_FLAGS_ANYWHERE));
1265
1266 if (kr != KERN_SUCCESS)
1267 goto out_no_lock;
1268
1269 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_user_ptr, size);
1270 if (crash_data) {
1271 task_lock(task);
1272 crash_data_release = task->corpse_info;
1273 task->corpse_info = crash_data;
1274 task_unlock(task);
1275 kr = KERN_SUCCESS;
1276 } else {
1277 /* if failed to create corpse info, free the mapping */
1278 if (KERN_SUCCESS != mach_vm_deallocate(task->map, crash_data_user_ptr, size)) {
1279 printf("mach_vm_deallocate failed to clear corpse_data for pid %d.\n", task_pid(task));
1280 }
1281 kr = KERN_FAILURE;
1282 }
1283
1284 if (crash_data_release != NULL) {
1285 task_crashinfo_destroy(crash_data_release);
1286 }
1287 } else {
1288 task_unlock(task);
1289 }
1290
1291 out_no_lock:
1292 return kr;
1293 }
1294
1295 /*
1296 * task_deliver_crash_notification:
1297 *
1298 * Makes outcall to registered host port for a corpse.
1299 */
1300 kern_return_t
1301 task_deliver_crash_notification(task_t task)
1302 {
1303 kcdata_descriptor_t crash_info = task->corpse_info;
1304 thread_t th_iter = NULL;
1305 kern_return_t kr = KERN_SUCCESS;
1306 wait_interrupt_t wsave;
1307 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1308
1309 if (crash_info == NULL)
1310 return KERN_FAILURE;
1311
1312 code[0] = crash_info->kcd_addr_begin;
1313 code[1] = crash_info->kcd_length;
1314
1315 task_lock(task);
1316 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1317 {
1318 ipc_thread_reset(th_iter);
1319 }
1320 task_unlock(task);
1321
1322 wsave = thread_interrupt_level(THREAD_UNINT);
1323 kr = exception_triage(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX);
1324 if (kr != KERN_SUCCESS) {
1325 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
1326 }
1327
1328 /*
1329 * crash reporting is done. Now release threads
1330 * for reaping by thread_terminate_daemon
1331 */
1332 task_lock(task);
1333 assert(task->active_thread_count == 0);
1334 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1335 {
1336 thread_mtx_lock(th_iter);
1337 assert(th_iter->inspection == TRUE);
1338 th_iter->inspection = FALSE;
1339 /* now that the corpse has been autopsied, dispose of the thread name */
1340 uthread_cleanup_name(th_iter->uthread);
1341 thread_mtx_unlock(th_iter);
1342 }
1343
1344 thread_terminate_crashed_threads();
1345 /* remove the pending corpse report flag */
1346 task_clear_corpse_pending_report(task);
1347
1348 task_unlock(task);
1349
1350 (void)thread_interrupt_level(wsave);
1351 task_terminate_internal(task);
1352
1353 return kr;
1354 }
1355
1356 /*
1357 * task_terminate:
1358 *
1359 * Terminate the specified task. See comments on thread_terminate
1360 * (kern/thread.c) about problems with terminating the "current task."
1361 */
1362
1363 kern_return_t
1364 task_terminate(
1365 task_t task)
1366 {
1367 if (task == TASK_NULL)
1368 return (KERN_INVALID_ARGUMENT);
1369
1370 if (task->bsd_info)
1371 return (KERN_FAILURE);
1372
1373 return (task_terminate_internal(task));
1374 }
1375
1376 #if MACH_ASSERT
1377 extern int proc_pid(struct proc *);
1378 extern void proc_name_kdp(task_t t, char *buf, int size);
1379 #endif /* MACH_ASSERT */
1380
1381 #define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
1382 static void
1383 __unused task_partial_reap(task_t task, __unused int pid)
1384 {
1385 unsigned int reclaimed_resident = 0;
1386 unsigned int reclaimed_compressed = 0;
1387 uint64_t task_page_count;
1388
1389 task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1390
1391 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
1392 pid, task_page_count, 0, 0, 0);
1393
1394 vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1395
1396 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
1397 pid, reclaimed_resident, reclaimed_compressed, 0, 0);
1398 }
1399
1400 kern_return_t
1401 task_mark_corpse(task_t task)
1402 {
1403 kern_return_t kr = KERN_SUCCESS;
1404 thread_t self_thread;
1405 (void) self_thread;
1406 wait_interrupt_t wsave;
1407
1408 assert(task != kernel_task);
1409 assert(task == current_task());
1410 assert(!task_is_a_corpse(task));
1411
1412 kr = task_collect_crash_info(task);
1413 if (kr != KERN_SUCCESS) {
1414 return kr;
1415 }
1416
1417 self_thread = current_thread();
1418
1419 wsave = thread_interrupt_level(THREAD_UNINT);
1420 task_lock(task);
1421
1422 task_set_corpse_pending_report(task);
1423 task_set_corpse(task);
1424
1425 kr = task_start_halt_locked(task, TRUE);
1426 assert(kr == KERN_SUCCESS);
1427 ipc_task_reset(task);
1428 ipc_task_enable(task);
1429
1430 task_unlock(task);
1431 /* terminate the ipc space */
1432 ipc_space_terminate(task->itk_space);
1433
1434 task_start_halt(task);
1435 thread_terminate_internal(self_thread);
1436 (void) thread_interrupt_level(wsave);
1437 assert(task->halting == TRUE);
1438 return kr;
1439 }
1440
1441 kern_return_t
1442 task_terminate_internal(
1443 task_t task)
1444 {
1445 thread_t thread, self;
1446 task_t self_task;
1447 boolean_t interrupt_save;
1448 int pid = 0;
1449
1450 assert(task != kernel_task);
1451
1452 self = current_thread();
1453 self_task = self->task;
1454
1455 /*
1456 * Get the task locked and make sure that we are not racing
1457 * with someone else trying to terminate us.
1458 */
1459 if (task == self_task)
1460 task_lock(task);
1461 else
1462 if (task < self_task) {
1463 task_lock(task);
1464 task_lock(self_task);
1465 }
1466 else {
1467 task_lock(self_task);
1468 task_lock(task);
1469 }
1470
1471 if (!task->active) {
1472 /*
1473 * Task is already being terminated.
1474 * Just return an error. If we are dying, this will
1475 * just get us to our AST special handler and that
1476 * will get us to finalize the termination of ourselves.
1477 */
1478 task_unlock(task);
1479 if (self_task != task)
1480 task_unlock(self_task);
1481
1482 return (KERN_FAILURE);
1483 }
1484
1485 if (task_corpse_pending_report(task)) {
1486 /*
1487 * Task is marked for reporting as corpse.
1488 * Just return an error. This will
1489 * just get us to our AST special handler and that
1490 * will get us to finish the path to death
1491 */
1492 task_unlock(task);
1493 if (self_task != task)
1494 task_unlock(self_task);
1495
1496 return (KERN_FAILURE);
1497 }
1498
1499 if (self_task != task)
1500 task_unlock(self_task);
1501
1502 /*
1503 * Make sure the current thread does not get aborted out of
1504 * the waits inside these operations.
1505 */
1506 interrupt_save = thread_interrupt_level(THREAD_UNINT);
1507
1508 /*
1509 * Indicate that we want all the threads to stop executing
1510 * at user space by holding the task (we would have held
1511 * each thread independently in thread_terminate_internal -
1512 * but this way we may be more likely to already find it
1513 * held there). Mark the task inactive, and prevent
1514 * further task operations via the task port.
1515 */
1516 task_hold_locked(task);
1517 task->active = FALSE;
1518 ipc_task_disable(task);
1519
1520 #if CONFIG_TELEMETRY
1521 /*
1522 * Notify telemetry that this task is going away.
1523 */
1524 telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
1525 #endif
1526
1527 /*
1528 * Terminate each thread in the task.
1529 */
1530 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1531 thread_terminate_internal(thread);
1532 }
1533
1534 #ifdef MACH_BSD
1535 if (task->bsd_info != NULL) {
1536 pid = proc_pid(task->bsd_info);
1537 }
1538 #endif /* MACH_BSD */
1539
1540 task_unlock(task);
1541
1542 proc_set_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE,
1543 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
1544
1545 /* Early object reap phase */
1546
1547 // PR-17045188: Revisit implementation
1548 // task_partial_reap(task, pid);
1549
1550
1551 /*
1552 * Destroy all synchronizers owned by the task.
1553 */
1554 task_synchronizer_destroy_all(task);
1555
1556 /*
1557 * Destroy the IPC space, leaving just a reference for it.
1558 */
1559 ipc_space_terminate(task->itk_space);
1560
1561 #if 00
1562 /* if some ledgers go negative on tear-down again... */
1563 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1564 task_ledgers.phys_footprint);
1565 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1566 task_ledgers.internal);
1567 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1568 task_ledgers.internal_compressed);
1569 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1570 task_ledgers.iokit_mapped);
1571 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1572 task_ledgers.alternate_accounting);
1573 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1574 task_ledgers.alternate_accounting_compressed);
1575 #endif
1576
1577 /*
1578 * If the current thread is a member of the task
1579 * being terminated, then the last reference to
1580 * the task will not be dropped until the thread
1581 * is finally reaped. To avoid incurring the
1582 * expense of removing the address space regions
1583 * at reap time, we do it explictly here.
1584 */
1585
1586 vm_map_lock(task->map);
1587 vm_map_disable_hole_optimization(task->map);
1588 vm_map_unlock(task->map);
1589
1590 vm_map_remove(task->map,
1591 task->map->min_offset,
1592 task->map->max_offset,
1593 /* no unnesting on final cleanup: */
1594 VM_MAP_REMOVE_NO_UNNESTING);
1595
1596 /* release our shared region */
1597 vm_shared_region_set(task, NULL);
1598
1599
1600 #if MACH_ASSERT
1601 /*
1602 * Identify the pmap's process, in case the pmap ledgers drift
1603 * and we have to report it.
1604 */
1605 char procname[17];
1606 if (task->bsd_info) {
1607 pid = proc_pid(task->bsd_info);
1608 proc_name_kdp(task, procname, sizeof (procname));
1609 } else {
1610 pid = 0;
1611 strlcpy(procname, "<unknown>", sizeof (procname));
1612 }
1613 pmap_set_process(task->map->pmap, pid, procname);
1614 #endif /* MACH_ASSERT */
1615
1616 lck_mtx_lock(&tasks_threads_lock);
1617 queue_remove(&tasks, task, task_t, tasks);
1618 queue_enter(&terminated_tasks, task, task_t, tasks);
1619 tasks_count--;
1620 terminated_tasks_count++;
1621 lck_mtx_unlock(&tasks_threads_lock);
1622
1623 /*
1624 * We no longer need to guard against being aborted, so restore
1625 * the previous interruptible state.
1626 */
1627 thread_interrupt_level(interrupt_save);
1628
1629 #if KPERF
1630 /* force the task to release all ctrs */
1631 if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS)
1632 kpc_force_all_ctrs(task, 0);
1633 #endif
1634
1635 #if CONFIG_COALITIONS
1636 /*
1637 * Leave our coalitions. (drop activation but not reference)
1638 */
1639 coalitions_remove_task(task);
1640 #endif
1641
1642 /*
1643 * Get rid of the task active reference on itself.
1644 */
1645 task_deallocate(task);
1646
1647 return (KERN_SUCCESS);
1648 }
1649
1650 void
1651 tasks_system_suspend(boolean_t suspend)
1652 {
1653 task_t task;
1654
1655 lck_mtx_lock(&tasks_threads_lock);
1656 assert(tasks_suspend_state != suspend);
1657 tasks_suspend_state = suspend;
1658 queue_iterate(&tasks, task, task_t, tasks) {
1659 if (task == kernel_task) {
1660 continue;
1661 }
1662 suspend ? task_suspend_internal(task) : task_resume_internal(task);
1663 }
1664 lck_mtx_unlock(&tasks_threads_lock);
1665 }
1666
1667 /*
1668 * task_start_halt:
1669 *
1670 * Shut the current task down (except for the current thread) in
1671 * preparation for dramatic changes to the task (probably exec).
1672 * We hold the task and mark all other threads in the task for
1673 * termination.
1674 */
1675 kern_return_t
1676 task_start_halt(task_t task)
1677 {
1678 kern_return_t kr = KERN_SUCCESS;
1679 task_lock(task);
1680 kr = task_start_halt_locked(task, FALSE);
1681 task_unlock(task);
1682 return kr;
1683 }
1684
1685 static kern_return_t
1686 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
1687 {
1688 thread_t thread, self;
1689 uint64_t dispatchqueue_offset;
1690
1691 assert(task != kernel_task);
1692
1693 self = current_thread();
1694
1695 if (task != self->task)
1696 return (KERN_INVALID_ARGUMENT);
1697
1698 if (task->halting || !task->active || !self->active) {
1699 /*
1700 * Task or current thread is already being terminated.
1701 * Hurry up and return out of the current kernel context
1702 * so that we run our AST special handler to terminate
1703 * ourselves.
1704 */
1705 return (KERN_FAILURE);
1706 }
1707
1708 task->halting = TRUE;
1709
1710 /*
1711 * Mark all the threads to keep them from starting any more
1712 * user-level execution. The thread_terminate_internal code
1713 * would do this on a thread by thread basis anyway, but this
1714 * gives us a better chance of not having to wait there.
1715 */
1716 task_hold_locked(task);
1717 dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
1718
1719 /*
1720 * Terminate all the other threads in the task.
1721 */
1722 queue_iterate(&task->threads, thread, thread_t, task_threads)
1723 {
1724 if (should_mark_corpse) {
1725 thread_mtx_lock(thread);
1726 thread->inspection = TRUE;
1727 thread_mtx_unlock(thread);
1728 }
1729 if (thread != self)
1730 thread_terminate_internal(thread);
1731 }
1732 task->dispatchqueue_offset = dispatchqueue_offset;
1733
1734 task_release_locked(task);
1735
1736 return KERN_SUCCESS;
1737 }
1738
1739
1740 /*
1741 * task_complete_halt:
1742 *
1743 * Complete task halt by waiting for threads to terminate, then clean
1744 * up task resources (VM, port namespace, etc...) and then let the
1745 * current thread go in the (practically empty) task context.
1746 */
1747 void
1748 task_complete_halt(task_t task)
1749 {
1750 task_lock(task);
1751 assert(task->halting);
1752 assert(task == current_task());
1753
1754 /*
1755 * Wait for the other threads to get shut down.
1756 * When the last other thread is reaped, we'll be
1757 * woken up.
1758 */
1759 if (task->thread_count > 1) {
1760 assert_wait((event_t)&task->halting, THREAD_UNINT);
1761 task_unlock(task);
1762 thread_block(THREAD_CONTINUE_NULL);
1763 } else {
1764 task_unlock(task);
1765 }
1766
1767 /*
1768 * Give the machine dependent code a chance
1769 * to perform cleanup of task-level resources
1770 * associated with the current thread before
1771 * ripping apart the task.
1772 */
1773 machine_task_terminate(task);
1774
1775 /*
1776 * Destroy all synchronizers owned by the task.
1777 */
1778 task_synchronizer_destroy_all(task);
1779
1780 /*
1781 * Destroy the contents of the IPC space, leaving just
1782 * a reference for it.
1783 */
1784 ipc_space_clean(task->itk_space);
1785
1786 /*
1787 * Clean out the address space, as we are going to be
1788 * getting a new one.
1789 */
1790 vm_map_remove(task->map, task->map->min_offset,
1791 task->map->max_offset,
1792 /* no unnesting on final cleanup: */
1793 VM_MAP_REMOVE_NO_UNNESTING);
1794
1795 task->halting = FALSE;
1796 }
1797
1798 /*
1799 * task_hold_locked:
1800 *
1801 * Suspend execution of the specified task.
1802 * This is a recursive-style suspension of the task, a count of
1803 * suspends is maintained.
1804 *
1805 * CONDITIONS: the task is locked and active.
1806 */
1807 void
1808 task_hold_locked(
1809 register task_t task)
1810 {
1811 register thread_t thread;
1812
1813 assert(task->active);
1814
1815 if (task->suspend_count++ > 0)
1816 return;
1817
1818 /*
1819 * Iterate through all the threads and hold them.
1820 */
1821 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1822 thread_mtx_lock(thread);
1823 thread_hold(thread);
1824 thread_mtx_unlock(thread);
1825 }
1826 }
1827
1828 /*
1829 * task_hold:
1830 *
1831 * Same as the internal routine above, except that is must lock
1832 * and verify that the task is active. This differs from task_suspend
1833 * in that it places a kernel hold on the task rather than just a
1834 * user-level hold. This keeps users from over resuming and setting
1835 * it running out from under the kernel.
1836 *
1837 * CONDITIONS: the caller holds a reference on the task
1838 */
1839 kern_return_t
1840 task_hold(
1841 register task_t task)
1842 {
1843 if (task == TASK_NULL)
1844 return (KERN_INVALID_ARGUMENT);
1845
1846 task_lock(task);
1847
1848 if (!task->active) {
1849 task_unlock(task);
1850
1851 return (KERN_FAILURE);
1852 }
1853
1854 task_hold_locked(task);
1855 task_unlock(task);
1856
1857 return (KERN_SUCCESS);
1858 }
1859
1860 kern_return_t
1861 task_wait(
1862 task_t task,
1863 boolean_t until_not_runnable)
1864 {
1865 if (task == TASK_NULL)
1866 return (KERN_INVALID_ARGUMENT);
1867
1868 task_lock(task);
1869
1870 if (!task->active) {
1871 task_unlock(task);
1872
1873 return (KERN_FAILURE);
1874 }
1875
1876 task_wait_locked(task, until_not_runnable);
1877 task_unlock(task);
1878
1879 return (KERN_SUCCESS);
1880 }
1881
1882 /*
1883 * task_wait_locked:
1884 *
1885 * Wait for all threads in task to stop.
1886 *
1887 * Conditions:
1888 * Called with task locked, active, and held.
1889 */
1890 void
1891 task_wait_locked(
1892 register task_t task,
1893 boolean_t until_not_runnable)
1894 {
1895 register thread_t thread, self;
1896
1897 assert(task->active);
1898 assert(task->suspend_count > 0);
1899
1900 self = current_thread();
1901
1902 /*
1903 * Iterate through all the threads and wait for them to
1904 * stop. Do not wait for the current thread if it is within
1905 * the task.
1906 */
1907 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1908 if (thread != self)
1909 thread_wait(thread, until_not_runnable);
1910 }
1911 }
1912
1913 /*
1914 * task_release_locked:
1915 *
1916 * Release a kernel hold on a task.
1917 *
1918 * CONDITIONS: the task is locked and active
1919 */
1920 void
1921 task_release_locked(
1922 register task_t task)
1923 {
1924 register thread_t thread;
1925
1926 assert(task->active);
1927 assert(task->suspend_count > 0);
1928
1929 if (--task->suspend_count > 0)
1930 return;
1931
1932 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1933 thread_mtx_lock(thread);
1934 thread_release(thread);
1935 thread_mtx_unlock(thread);
1936 }
1937 }
1938
1939 /*
1940 * task_release:
1941 *
1942 * Same as the internal routine above, except that it must lock
1943 * and verify that the task is active.
1944 *
1945 * CONDITIONS: The caller holds a reference to the task
1946 */
1947 kern_return_t
1948 task_release(
1949 task_t task)
1950 {
1951 if (task == TASK_NULL)
1952 return (KERN_INVALID_ARGUMENT);
1953
1954 task_lock(task);
1955
1956 if (!task->active) {
1957 task_unlock(task);
1958
1959 return (KERN_FAILURE);
1960 }
1961
1962 task_release_locked(task);
1963 task_unlock(task);
1964
1965 return (KERN_SUCCESS);
1966 }
1967
1968 kern_return_t
1969 task_threads(
1970 task_t task,
1971 thread_act_array_t *threads_out,
1972 mach_msg_type_number_t *count)
1973 {
1974 mach_msg_type_number_t actual;
1975 thread_t *thread_list;
1976 thread_t thread;
1977 vm_size_t size, size_needed;
1978 void *addr;
1979 unsigned int i, j;
1980
1981 if (task == TASK_NULL)
1982 return (KERN_INVALID_ARGUMENT);
1983
1984 size = 0; addr = NULL;
1985
1986 for (;;) {
1987 task_lock(task);
1988 if (!task->active) {
1989 task_unlock(task);
1990
1991 if (size != 0)
1992 kfree(addr, size);
1993
1994 return (KERN_FAILURE);
1995 }
1996
1997 actual = task->thread_count;
1998
1999 /* do we have the memory we need? */
2000 size_needed = actual * sizeof (mach_port_t);
2001 if (size_needed <= size)
2002 break;
2003
2004 /* unlock the task and allocate more memory */
2005 task_unlock(task);
2006
2007 if (size != 0)
2008 kfree(addr, size);
2009
2010 assert(size_needed > 0);
2011 size = size_needed;
2012
2013 addr = kalloc(size);
2014 if (addr == 0)
2015 return (KERN_RESOURCE_SHORTAGE);
2016 }
2017
2018 /* OK, have memory and the task is locked & active */
2019 thread_list = (thread_t *)addr;
2020
2021 i = j = 0;
2022
2023 for (thread = (thread_t)queue_first(&task->threads); i < actual;
2024 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
2025 thread_reference_internal(thread);
2026 thread_list[j++] = thread;
2027 }
2028
2029 assert(queue_end(&task->threads, (queue_entry_t)thread));
2030
2031 actual = j;
2032 size_needed = actual * sizeof (mach_port_t);
2033
2034 /* can unlock task now that we've got the thread refs */
2035 task_unlock(task);
2036
2037 if (actual == 0) {
2038 /* no threads, so return null pointer and deallocate memory */
2039
2040 *threads_out = NULL;
2041 *count = 0;
2042
2043 if (size != 0)
2044 kfree(addr, size);
2045 }
2046 else {
2047 /* if we allocated too much, must copy */
2048
2049 if (size_needed < size) {
2050 void *newaddr;
2051
2052 newaddr = kalloc(size_needed);
2053 if (newaddr == 0) {
2054 for (i = 0; i < actual; ++i)
2055 thread_deallocate(thread_list[i]);
2056 kfree(addr, size);
2057 return (KERN_RESOURCE_SHORTAGE);
2058 }
2059
2060 bcopy(addr, newaddr, size_needed);
2061 kfree(addr, size);
2062 thread_list = (thread_t *)newaddr;
2063 }
2064
2065 *threads_out = thread_list;
2066 *count = actual;
2067
2068 /* do the conversion that Mig should handle */
2069
2070 for (i = 0; i < actual; ++i)
2071 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
2072 }
2073
2074 return (KERN_SUCCESS);
2075 }
2076
2077 #define TASK_HOLD_NORMAL 0
2078 #define TASK_HOLD_PIDSUSPEND 1
2079 #define TASK_HOLD_LEGACY 2
2080 #define TASK_HOLD_LEGACY_ALL 3
2081
2082 static kern_return_t
2083 place_task_hold (
2084 register task_t task,
2085 int mode)
2086 {
2087 if (!task->active) {
2088 return (KERN_FAILURE);
2089 }
2090
2091 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2092 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
2093 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2094 task->user_stop_count, task->user_stop_count + 1, 0);
2095
2096 #if MACH_ASSERT
2097 current_task()->suspends_outstanding++;
2098 #endif
2099
2100 if (mode == TASK_HOLD_LEGACY)
2101 task->legacy_stop_count++;
2102
2103 if (task->user_stop_count++ > 0) {
2104 /*
2105 * If the stop count was positive, the task is
2106 * already stopped and we can exit.
2107 */
2108 return (KERN_SUCCESS);
2109 }
2110
2111 /*
2112 * Put a kernel-level hold on the threads in the task (all
2113 * user-level task suspensions added together represent a
2114 * single kernel-level hold). We then wait for the threads
2115 * to stop executing user code.
2116 */
2117 task_hold_locked(task);
2118 task_wait_locked(task, FALSE);
2119
2120 return (KERN_SUCCESS);
2121 }
2122
2123 static kern_return_t
2124 release_task_hold (
2125 register task_t task,
2126 int mode)
2127 {
2128 register boolean_t release = FALSE;
2129
2130 if (!task->active) {
2131 return (KERN_FAILURE);
2132 }
2133
2134 if (mode == TASK_HOLD_PIDSUSPEND) {
2135 if (task->pidsuspended == FALSE) {
2136 return (KERN_FAILURE);
2137 }
2138 task->pidsuspended = FALSE;
2139 }
2140
2141 if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
2142
2143 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2144 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
2145 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2146 task->user_stop_count, mode, task->legacy_stop_count);
2147
2148 #if MACH_ASSERT
2149 /*
2150 * This is obviously not robust; if we suspend one task and then resume a different one,
2151 * we'll fly under the radar. This is only meant to catch the common case of a crashed
2152 * or buggy suspender.
2153 */
2154 current_task()->suspends_outstanding--;
2155 #endif
2156
2157 if (mode == TASK_HOLD_LEGACY_ALL) {
2158 if (task->legacy_stop_count >= task->user_stop_count) {
2159 task->user_stop_count = 0;
2160 release = TRUE;
2161 } else {
2162 task->user_stop_count -= task->legacy_stop_count;
2163 }
2164 task->legacy_stop_count = 0;
2165 } else {
2166 if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
2167 task->legacy_stop_count--;
2168 if (--task->user_stop_count == 0)
2169 release = TRUE;
2170 }
2171 }
2172 else {
2173 return (KERN_FAILURE);
2174 }
2175
2176 /*
2177 * Release the task if necessary.
2178 */
2179 if (release)
2180 task_release_locked(task);
2181
2182 return (KERN_SUCCESS);
2183 }
2184
2185
2186 /*
2187 * task_suspend:
2188 *
2189 * Implement an (old-fashioned) user-level suspension on a task.
2190 *
2191 * Because the user isn't expecting to have to manage a suspension
2192 * token, we'll track it for him in the kernel in the form of a naked
2193 * send right to the task's resume port. All such send rights
2194 * account for a single suspension against the task (unlike task_suspend2()
2195 * where each caller gets a unique suspension count represented by a
2196 * unique send-once right).
2197 *
2198 * Conditions:
2199 * The caller holds a reference to the task
2200 */
2201 kern_return_t
2202 task_suspend(
2203 register task_t task)
2204 {
2205 kern_return_t kr;
2206 mach_port_t port, send, old_notify;
2207 mach_port_name_t name;
2208
2209 if (task == TASK_NULL || task == kernel_task)
2210 return (KERN_INVALID_ARGUMENT);
2211
2212 task_lock(task);
2213
2214 /*
2215 * Claim a send right on the task resume port, and request a no-senders
2216 * notification on that port (if none outstanding).
2217 */
2218 if (task->itk_resume == IP_NULL) {
2219 task->itk_resume = ipc_port_alloc_kernel();
2220 if (!IP_VALID(task->itk_resume))
2221 panic("failed to create resume port");
2222 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
2223 }
2224
2225 port = task->itk_resume;
2226 ip_lock(port);
2227 assert(ip_active(port));
2228
2229 send = ipc_port_make_send_locked(port);
2230 assert(IP_VALID(send));
2231
2232 if (port->ip_nsrequest == IP_NULL) {
2233 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2234 assert(old_notify == IP_NULL);
2235 /* port unlocked */
2236 } else {
2237 ip_unlock(port);
2238 }
2239
2240 /*
2241 * place a legacy hold on the task.
2242 */
2243 kr = place_task_hold(task, TASK_HOLD_LEGACY);
2244 if (kr != KERN_SUCCESS) {
2245 task_unlock(task);
2246 ipc_port_release_send(send);
2247 return kr;
2248 }
2249
2250 task_unlock(task);
2251
2252 /*
2253 * Copyout the send right into the calling task's IPC space. It won't know it is there,
2254 * but we'll look it up when calling a traditional resume. Any IPC operations that
2255 * deallocate the send right will auto-release the suspension.
2256 */
2257 if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
2258 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
2259 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
2260 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2261 task_pid(task), kr);
2262 return (kr);
2263 }
2264
2265 return (kr);
2266 }
2267
2268 /*
2269 * task_resume:
2270 * Release a user hold on a task.
2271 *
2272 * Conditions:
2273 * The caller holds a reference to the task
2274 */
2275 kern_return_t
2276 task_resume(
2277 register task_t task)
2278 {
2279 kern_return_t kr;
2280 mach_port_name_t resume_port_name;
2281 ipc_entry_t resume_port_entry;
2282 ipc_space_t space = current_task()->itk_space;
2283
2284 if (task == TASK_NULL || task == kernel_task )
2285 return (KERN_INVALID_ARGUMENT);
2286
2287 /* release a legacy task hold */
2288 task_lock(task);
2289 kr = release_task_hold(task, TASK_HOLD_LEGACY);
2290 task_unlock(task);
2291
2292 is_write_lock(space);
2293 if (is_active(space) && IP_VALID(task->itk_resume) &&
2294 ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
2295 /*
2296 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
2297 * we are holding one less legacy hold on the task from this caller. If the release failed,
2298 * go ahead and drop all the rights, as someone either already released our holds or the task
2299 * is gone.
2300 */
2301 if (kr == KERN_SUCCESS)
2302 ipc_right_dealloc(space, resume_port_name, resume_port_entry);
2303 else
2304 ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
2305 /* space unlocked */
2306 } else {
2307 is_write_unlock(space);
2308 if (kr == KERN_SUCCESS)
2309 printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
2310 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2311 task_pid(task));
2312 }
2313
2314 return kr;
2315 }
2316
2317 /*
2318 * Suspend the target task.
2319 * Making/holding a token/reference/port is the callers responsibility.
2320 */
2321 kern_return_t
2322 task_suspend_internal(task_t task)
2323 {
2324 kern_return_t kr;
2325
2326 if (task == TASK_NULL || task == kernel_task)
2327 return (KERN_INVALID_ARGUMENT);
2328
2329 task_lock(task);
2330 kr = place_task_hold(task, TASK_HOLD_NORMAL);
2331 task_unlock(task);
2332 return (kr);
2333 }
2334
2335 /*
2336 * Suspend the target task, and return a suspension token. The token
2337 * represents a reference on the suspended task.
2338 */
2339 kern_return_t
2340 task_suspend2(
2341 register task_t task,
2342 task_suspension_token_t *suspend_token)
2343 {
2344 kern_return_t kr;
2345
2346 kr = task_suspend_internal(task);
2347 if (kr != KERN_SUCCESS) {
2348 *suspend_token = TASK_NULL;
2349 return (kr);
2350 }
2351
2352 /*
2353 * Take a reference on the target task and return that to the caller
2354 * as a "suspension token," which can be converted into an SO right to
2355 * the now-suspended task's resume port.
2356 */
2357 task_reference_internal(task);
2358 *suspend_token = task;
2359
2360 return (KERN_SUCCESS);
2361 }
2362
2363 /*
2364 * Resume the task
2365 * (reference/token/port management is caller's responsibility).
2366 */
2367 kern_return_t
2368 task_resume_internal(
2369 register task_suspension_token_t task)
2370 {
2371 kern_return_t kr;
2372
2373 if (task == TASK_NULL || task == kernel_task)
2374 return (KERN_INVALID_ARGUMENT);
2375
2376 task_lock(task);
2377 kr = release_task_hold(task, TASK_HOLD_NORMAL);
2378 task_unlock(task);
2379 return (kr);
2380 }
2381
2382 /*
2383 * Resume the task using a suspension token. Consumes the token's ref.
2384 */
2385 kern_return_t
2386 task_resume2(
2387 register task_suspension_token_t task)
2388 {
2389 kern_return_t kr;
2390
2391 kr = task_resume_internal(task);
2392 task_suspension_token_deallocate(task);
2393
2394 return (kr);
2395 }
2396
2397 boolean_t
2398 task_suspension_notify(mach_msg_header_t *request_header)
2399 {
2400 ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
2401 task_t task = convert_port_to_task_suspension_token(port);
2402 mach_msg_type_number_t not_count;
2403
2404 if (task == TASK_NULL || task == kernel_task)
2405 return TRUE; /* nothing to do */
2406
2407 switch (request_header->msgh_id) {
2408
2409 case MACH_NOTIFY_SEND_ONCE:
2410 /* release the hold held by this specific send-once right */
2411 task_lock(task);
2412 release_task_hold(task, TASK_HOLD_NORMAL);
2413 task_unlock(task);
2414 break;
2415
2416 case MACH_NOTIFY_NO_SENDERS:
2417 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
2418
2419 task_lock(task);
2420 ip_lock(port);
2421 if (port->ip_mscount == not_count) {
2422
2423 /* release all the [remaining] outstanding legacy holds */
2424 assert(port->ip_nsrequest == IP_NULL);
2425 ip_unlock(port);
2426 release_task_hold(task, TASK_HOLD_LEGACY_ALL);
2427 task_unlock(task);
2428
2429 } else if (port->ip_nsrequest == IP_NULL) {
2430 ipc_port_t old_notify;
2431
2432 task_unlock(task);
2433 /* new send rights, re-arm notification at current make-send count */
2434 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2435 assert(old_notify == IP_NULL);
2436 /* port unlocked */
2437 } else {
2438 ip_unlock(port);
2439 task_unlock(task);
2440 }
2441 break;
2442
2443 default:
2444 break;
2445 }
2446
2447 task_suspension_token_deallocate(task); /* drop token reference */
2448 return TRUE;
2449 }
2450
2451 kern_return_t
2452 task_pidsuspend_locked(task_t task)
2453 {
2454 kern_return_t kr;
2455
2456 if (task->pidsuspended) {
2457 kr = KERN_FAILURE;
2458 goto out;
2459 }
2460
2461 task->pidsuspended = TRUE;
2462
2463 kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
2464 if (kr != KERN_SUCCESS) {
2465 task->pidsuspended = FALSE;
2466 }
2467 out:
2468 return(kr);
2469 }
2470
2471
2472 /*
2473 * task_pidsuspend:
2474 *
2475 * Suspends a task by placing a hold on its threads.
2476 *
2477 * Conditions:
2478 * The caller holds a reference to the task
2479 */
2480 kern_return_t
2481 task_pidsuspend(
2482 register task_t task)
2483 {
2484 kern_return_t kr;
2485
2486 if (task == TASK_NULL || task == kernel_task)
2487 return (KERN_INVALID_ARGUMENT);
2488
2489 task_lock(task);
2490
2491 kr = task_pidsuspend_locked(task);
2492
2493 task_unlock(task);
2494
2495 return (kr);
2496 }
2497
2498 /* If enabled, we bring all the frozen pages back in prior to resumption; otherwise, they're faulted back in on demand */
2499 #define THAW_ON_RESUME 1
2500
2501 /*
2502 * task_pidresume:
2503 * Resumes a previously suspended task.
2504 *
2505 * Conditions:
2506 * The caller holds a reference to the task
2507 */
2508 kern_return_t
2509 task_pidresume(
2510 register task_t task)
2511 {
2512 kern_return_t kr;
2513
2514 if (task == TASK_NULL || task == kernel_task)
2515 return (KERN_INVALID_ARGUMENT);
2516
2517 task_lock(task);
2518
2519 #if (CONFIG_FREEZE && THAW_ON_RESUME)
2520
2521 while (task->changing_freeze_state) {
2522
2523 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2524 task_unlock(task);
2525 thread_block(THREAD_CONTINUE_NULL);
2526
2527 task_lock(task);
2528 }
2529 task->changing_freeze_state = TRUE;
2530 #endif
2531
2532 kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
2533
2534 task_unlock(task);
2535
2536 #if (CONFIG_FREEZE && THAW_ON_RESUME)
2537 if ((kr == KERN_SUCCESS) && (task->frozen == TRUE)) {
2538
2539 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2540
2541 kr = KERN_SUCCESS;
2542 } else {
2543
2544 kr = vm_map_thaw(task->map);
2545 }
2546 }
2547 task_lock(task);
2548
2549 if (kr == KERN_SUCCESS)
2550 task->frozen = FALSE;
2551 task->changing_freeze_state = FALSE;
2552 thread_wakeup(&task->changing_freeze_state);
2553
2554 task_unlock(task);
2555 #endif
2556
2557 return (kr);
2558 }
2559
2560 #if CONFIG_FREEZE
2561
2562 /*
2563 * task_freeze:
2564 *
2565 * Freeze a task.
2566 *
2567 * Conditions:
2568 * The caller holds a reference to the task
2569 */
2570 extern void vm_wake_compactor_swapper();
2571 extern queue_head_t c_swapout_list_head;
2572
2573 kern_return_t
2574 task_freeze(
2575 register task_t task,
2576 uint32_t *purgeable_count,
2577 uint32_t *wired_count,
2578 uint32_t *clean_count,
2579 uint32_t *dirty_count,
2580 uint32_t dirty_budget,
2581 boolean_t *shared,
2582 boolean_t walk_only)
2583 {
2584 kern_return_t kr;
2585
2586 if (task == TASK_NULL || task == kernel_task)
2587 return (KERN_INVALID_ARGUMENT);
2588
2589 task_lock(task);
2590
2591 while (task->changing_freeze_state) {
2592
2593 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2594 task_unlock(task);
2595 thread_block(THREAD_CONTINUE_NULL);
2596
2597 task_lock(task);
2598 }
2599 if (task->frozen) {
2600 task_unlock(task);
2601 return (KERN_FAILURE);
2602 }
2603 task->changing_freeze_state = TRUE;
2604
2605 task_unlock(task);
2606
2607 if (walk_only) {
2608 kr = vm_map_freeze_walk(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2609 } else {
2610 kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2611 }
2612
2613 task_lock(task);
2614
2615 if (walk_only == FALSE && kr == KERN_SUCCESS)
2616 task->frozen = TRUE;
2617 task->changing_freeze_state = FALSE;
2618 thread_wakeup(&task->changing_freeze_state);
2619
2620 task_unlock(task);
2621
2622 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2623 vm_wake_compactor_swapper();
2624 /*
2625 * We do an explicit wakeup of the swapout thread here
2626 * because the compact_and_swap routines don't have
2627 * knowledge about these kind of "per-task packed c_segs"
2628 * and so will not be evaluating whether we need to do
2629 * a wakeup there.
2630 */
2631 thread_wakeup((event_t)&c_swapout_list_head);
2632 }
2633
2634 return (kr);
2635 }
2636
2637 /*
2638 * task_thaw:
2639 *
2640 * Thaw a currently frozen task.
2641 *
2642 * Conditions:
2643 * The caller holds a reference to the task
2644 */
2645 kern_return_t
2646 task_thaw(
2647 register task_t task)
2648 {
2649 kern_return_t kr;
2650
2651 if (task == TASK_NULL || task == kernel_task)
2652 return (KERN_INVALID_ARGUMENT);
2653
2654 task_lock(task);
2655
2656 while (task->changing_freeze_state) {
2657
2658 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2659 task_unlock(task);
2660 thread_block(THREAD_CONTINUE_NULL);
2661
2662 task_lock(task);
2663 }
2664 if (!task->frozen) {
2665 task_unlock(task);
2666 return (KERN_FAILURE);
2667 }
2668 task->changing_freeze_state = TRUE;
2669
2670 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) {
2671 task_unlock(task);
2672
2673 kr = vm_map_thaw(task->map);
2674
2675 task_lock(task);
2676
2677 if (kr == KERN_SUCCESS)
2678 task->frozen = FALSE;
2679 } else {
2680 task->frozen = FALSE;
2681 kr = KERN_SUCCESS;
2682 }
2683
2684 task->changing_freeze_state = FALSE;
2685 thread_wakeup(&task->changing_freeze_state);
2686
2687 task_unlock(task);
2688
2689 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2690 vm_wake_compactor_swapper();
2691 }
2692
2693 return (kr);
2694 }
2695
2696 #endif /* CONFIG_FREEZE */
2697
2698 kern_return_t
2699 host_security_set_task_token(
2700 host_security_t host_security,
2701 task_t task,
2702 security_token_t sec_token,
2703 audit_token_t audit_token,
2704 host_priv_t host_priv)
2705 {
2706 ipc_port_t host_port;
2707 kern_return_t kr;
2708
2709 if (task == TASK_NULL)
2710 return(KERN_INVALID_ARGUMENT);
2711
2712 if (host_security == HOST_NULL)
2713 return(KERN_INVALID_SECURITY);
2714
2715 task_lock(task);
2716 task->sec_token = sec_token;
2717 task->audit_token = audit_token;
2718
2719 task_unlock(task);
2720
2721 if (host_priv != HOST_PRIV_NULL) {
2722 kr = host_get_host_priv_port(host_priv, &host_port);
2723 } else {
2724 kr = host_get_host_port(host_priv_self(), &host_port);
2725 }
2726 assert(kr == KERN_SUCCESS);
2727 kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
2728 return(kr);
2729 }
2730
2731 kern_return_t
2732 task_send_trace_memory(
2733 task_t target_task,
2734 __unused uint32_t pid,
2735 __unused uint64_t uniqueid)
2736 {
2737 kern_return_t kr = KERN_INVALID_ARGUMENT;
2738 if (target_task == TASK_NULL)
2739 return (KERN_INVALID_ARGUMENT);
2740
2741 #if CONFIG_ATM
2742 kr = atm_send_proc_inspect_notification(target_task,
2743 pid,
2744 uniqueid);
2745
2746 #endif
2747 return (kr);
2748 }
2749 /*
2750 * This routine was added, pretty much exclusively, for registering the
2751 * RPC glue vector for in-kernel short circuited tasks. Rather than
2752 * removing it completely, I have only disabled that feature (which was
2753 * the only feature at the time). It just appears that we are going to
2754 * want to add some user data to tasks in the future (i.e. bsd info,
2755 * task names, etc...), so I left it in the formal task interface.
2756 */
2757 kern_return_t
2758 task_set_info(
2759 task_t task,
2760 task_flavor_t flavor,
2761 __unused task_info_t task_info_in, /* pointer to IN array */
2762 __unused mach_msg_type_number_t task_info_count)
2763 {
2764 if (task == TASK_NULL)
2765 return(KERN_INVALID_ARGUMENT);
2766
2767 switch (flavor) {
2768
2769 #if CONFIG_ATM
2770 case TASK_TRACE_MEMORY_INFO:
2771 {
2772 if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
2773 return (KERN_INVALID_ARGUMENT);
2774
2775 assert(task_info_in != NULL);
2776 task_trace_memory_info_t mem_info;
2777 mem_info = (task_trace_memory_info_t) task_info_in;
2778 kern_return_t kr = atm_register_trace_memory(task,
2779 mem_info->user_memory_address,
2780 mem_info->buffer_size);
2781 return kr;
2782 break;
2783 }
2784
2785 #endif
2786 default:
2787 return (KERN_INVALID_ARGUMENT);
2788 }
2789 return (KERN_SUCCESS);
2790 }
2791
2792 int radar_20146450 = 1;
2793 kern_return_t
2794 task_info(
2795 task_t task,
2796 task_flavor_t flavor,
2797 task_info_t task_info_out,
2798 mach_msg_type_number_t *task_info_count)
2799 {
2800 kern_return_t error = KERN_SUCCESS;
2801
2802 if (task == TASK_NULL)
2803 return (KERN_INVALID_ARGUMENT);
2804
2805 task_lock(task);
2806
2807 if ((task != current_task()) && (!task->active)) {
2808 task_unlock(task);
2809 return (KERN_INVALID_ARGUMENT);
2810 }
2811
2812 switch (flavor) {
2813
2814 case TASK_BASIC_INFO_32:
2815 case TASK_BASIC2_INFO_32:
2816 {
2817 task_basic_info_32_t basic_info;
2818 vm_map_t map;
2819 clock_sec_t secs;
2820 clock_usec_t usecs;
2821
2822 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
2823 error = KERN_INVALID_ARGUMENT;
2824 break;
2825 }
2826
2827 basic_info = (task_basic_info_32_t)task_info_out;
2828
2829 map = (task == kernel_task)? kernel_map: task->map;
2830 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
2831 if (flavor == TASK_BASIC2_INFO_32) {
2832 /*
2833 * The "BASIC2" flavor gets the maximum resident
2834 * size instead of the current resident size...
2835 */
2836 basic_info->resident_size = pmap_resident_max(map->pmap);
2837 } else {
2838 basic_info->resident_size = pmap_resident_count(map->pmap);
2839 }
2840 basic_info->resident_size *= PAGE_SIZE;
2841
2842 basic_info->policy = ((task != kernel_task)?
2843 POLICY_TIMESHARE: POLICY_RR);
2844 basic_info->suspend_count = task->user_stop_count;
2845
2846 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2847 basic_info->user_time.seconds =
2848 (typeof(basic_info->user_time.seconds))secs;
2849 basic_info->user_time.microseconds = usecs;
2850
2851 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2852 basic_info->system_time.seconds =
2853 (typeof(basic_info->system_time.seconds))secs;
2854 basic_info->system_time.microseconds = usecs;
2855
2856 *task_info_count = TASK_BASIC_INFO_32_COUNT;
2857 break;
2858 }
2859
2860 case TASK_BASIC_INFO_64:
2861 {
2862 task_basic_info_64_t basic_info;
2863 vm_map_t map;
2864 clock_sec_t secs;
2865 clock_usec_t usecs;
2866
2867 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
2868 error = KERN_INVALID_ARGUMENT;
2869 break;
2870 }
2871
2872 basic_info = (task_basic_info_64_t)task_info_out;
2873
2874 map = (task == kernel_task)? kernel_map: task->map;
2875 basic_info->virtual_size = map->size;
2876 basic_info->resident_size =
2877 (mach_vm_size_t)(pmap_resident_count(map->pmap))
2878 * PAGE_SIZE_64;
2879
2880 basic_info->policy = ((task != kernel_task)?
2881 POLICY_TIMESHARE: POLICY_RR);
2882 basic_info->suspend_count = task->user_stop_count;
2883
2884 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2885 basic_info->user_time.seconds =
2886 (typeof(basic_info->user_time.seconds))secs;
2887 basic_info->user_time.microseconds = usecs;
2888
2889 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2890 basic_info->system_time.seconds =
2891 (typeof(basic_info->system_time.seconds))secs;
2892 basic_info->system_time.microseconds = usecs;
2893
2894 *task_info_count = TASK_BASIC_INFO_64_COUNT;
2895 break;
2896 }
2897
2898 case MACH_TASK_BASIC_INFO:
2899 {
2900 mach_task_basic_info_t basic_info;
2901 vm_map_t map;
2902 clock_sec_t secs;
2903 clock_usec_t usecs;
2904
2905 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
2906 error = KERN_INVALID_ARGUMENT;
2907 break;
2908 }
2909
2910 basic_info = (mach_task_basic_info_t)task_info_out;
2911
2912 map = (task == kernel_task) ? kernel_map : task->map;
2913
2914 basic_info->virtual_size = map->size;
2915
2916 basic_info->resident_size =
2917 (mach_vm_size_t)(pmap_resident_count(map->pmap));
2918 basic_info->resident_size *= PAGE_SIZE_64;
2919
2920 basic_info->resident_size_max =
2921 (mach_vm_size_t)(pmap_resident_max(map->pmap));
2922 basic_info->resident_size_max *= PAGE_SIZE_64;
2923
2924 basic_info->policy = ((task != kernel_task) ?
2925 POLICY_TIMESHARE : POLICY_RR);
2926
2927 basic_info->suspend_count = task->user_stop_count;
2928
2929 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2930 basic_info->user_time.seconds =
2931 (typeof(basic_info->user_time.seconds))secs;
2932 basic_info->user_time.microseconds = usecs;
2933
2934 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2935 basic_info->system_time.seconds =
2936 (typeof(basic_info->system_time.seconds))secs;
2937 basic_info->system_time.microseconds = usecs;
2938
2939 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
2940 break;
2941 }
2942
2943 case TASK_THREAD_TIMES_INFO:
2944 {
2945 register task_thread_times_info_t times_info;
2946 register thread_t thread;
2947
2948 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
2949 error = KERN_INVALID_ARGUMENT;
2950 break;
2951 }
2952
2953 times_info = (task_thread_times_info_t) task_info_out;
2954 times_info->user_time.seconds = 0;
2955 times_info->user_time.microseconds = 0;
2956 times_info->system_time.seconds = 0;
2957 times_info->system_time.microseconds = 0;
2958
2959
2960 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2961 time_value_t user_time, system_time;
2962
2963 if (thread->options & TH_OPT_IDLE_THREAD)
2964 continue;
2965
2966 thread_read_times(thread, &user_time, &system_time);
2967
2968 time_value_add(&times_info->user_time, &user_time);
2969 time_value_add(&times_info->system_time, &system_time);
2970 }
2971
2972 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
2973 break;
2974 }
2975
2976 case TASK_ABSOLUTETIME_INFO:
2977 {
2978 task_absolutetime_info_t info;
2979 register thread_t thread;
2980
2981 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
2982 error = KERN_INVALID_ARGUMENT;
2983 break;
2984 }
2985
2986 info = (task_absolutetime_info_t)task_info_out;
2987 info->threads_user = info->threads_system = 0;
2988
2989
2990 info->total_user = task->total_user_time;
2991 info->total_system = task->total_system_time;
2992
2993 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2994 uint64_t tval;
2995 spl_t x;
2996
2997 if (thread->options & TH_OPT_IDLE_THREAD)
2998 continue;
2999
3000 x = splsched();
3001 thread_lock(thread);
3002
3003 tval = timer_grab(&thread->user_timer);
3004 info->threads_user += tval;
3005 info->total_user += tval;
3006
3007 tval = timer_grab(&thread->system_timer);
3008 if (thread->precise_user_kernel_time) {
3009 info->threads_system += tval;
3010 info->total_system += tval;
3011 } else {
3012 /* system_timer may represent either sys or user */
3013 info->threads_user += tval;
3014 info->total_user += tval;
3015 }
3016
3017 thread_unlock(thread);
3018 splx(x);
3019 }
3020
3021
3022 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
3023 break;
3024 }
3025
3026 case TASK_DYLD_INFO:
3027 {
3028 task_dyld_info_t info;
3029
3030 /*
3031 * We added the format field to TASK_DYLD_INFO output. For
3032 * temporary backward compatibility, accept the fact that
3033 * clients may ask for the old version - distinquished by the
3034 * size of the expected result structure.
3035 */
3036 #define TASK_LEGACY_DYLD_INFO_COUNT \
3037 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
3038
3039 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
3040 error = KERN_INVALID_ARGUMENT;
3041 break;
3042 }
3043
3044 info = (task_dyld_info_t)task_info_out;
3045 info->all_image_info_addr = task->all_image_info_addr;
3046 info->all_image_info_size = task->all_image_info_size;
3047
3048 /* only set format on output for those expecting it */
3049 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
3050 info->all_image_info_format = task_has_64BitAddr(task) ?
3051 TASK_DYLD_ALL_IMAGE_INFO_64 :
3052 TASK_DYLD_ALL_IMAGE_INFO_32 ;
3053 *task_info_count = TASK_DYLD_INFO_COUNT;
3054 } else {
3055 *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
3056 }
3057 break;
3058 }
3059
3060 case TASK_EXTMOD_INFO:
3061 {
3062 task_extmod_info_t info;
3063 void *p;
3064
3065 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
3066 error = KERN_INVALID_ARGUMENT;
3067 break;
3068 }
3069
3070 info = (task_extmod_info_t)task_info_out;
3071
3072 p = get_bsdtask_info(task);
3073 if (p) {
3074 proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
3075 } else {
3076 bzero(info->task_uuid, sizeof(info->task_uuid));
3077 }
3078 info->extmod_statistics = task->extmod_statistics;
3079 *task_info_count = TASK_EXTMOD_INFO_COUNT;
3080
3081 break;
3082 }
3083
3084 case TASK_KERNELMEMORY_INFO:
3085 {
3086 task_kernelmemory_info_t tkm_info;
3087 ledger_amount_t credit, debit;
3088
3089 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
3090 error = KERN_INVALID_ARGUMENT;
3091 break;
3092 }
3093
3094 tkm_info = (task_kernelmemory_info_t) task_info_out;
3095 tkm_info->total_palloc = 0;
3096 tkm_info->total_pfree = 0;
3097 tkm_info->total_salloc = 0;
3098 tkm_info->total_sfree = 0;
3099
3100 if (task == kernel_task) {
3101 /*
3102 * All shared allocs/frees from other tasks count against
3103 * the kernel private memory usage. If we are looking up
3104 * info for the kernel task, gather from everywhere.
3105 */
3106 task_unlock(task);
3107
3108 /* start by accounting for all the terminated tasks against the kernel */
3109 tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
3110 tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
3111
3112 /* count all other task/thread shared alloc/free against the kernel */
3113 lck_mtx_lock(&tasks_threads_lock);
3114
3115 /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
3116 queue_iterate(&tasks, task, task_t, tasks) {
3117 if (task == kernel_task) {
3118 if (ledger_get_entries(task->ledger,
3119 task_ledgers.tkm_private, &credit,
3120 &debit) == KERN_SUCCESS) {
3121 tkm_info->total_palloc += credit;
3122 tkm_info->total_pfree += debit;
3123 }
3124 }
3125 if (!ledger_get_entries(task->ledger,
3126 task_ledgers.tkm_shared, &credit, &debit)) {
3127 tkm_info->total_palloc += credit;
3128 tkm_info->total_pfree += debit;
3129 }
3130 }
3131 lck_mtx_unlock(&tasks_threads_lock);
3132 } else {
3133 if (!ledger_get_entries(task->ledger,
3134 task_ledgers.tkm_private, &credit, &debit)) {
3135 tkm_info->total_palloc = credit;
3136 tkm_info->total_pfree = debit;
3137 }
3138 if (!ledger_get_entries(task->ledger,
3139 task_ledgers.tkm_shared, &credit, &debit)) {
3140 tkm_info->total_salloc = credit;
3141 tkm_info->total_sfree = debit;
3142 }
3143 task_unlock(task);
3144 }
3145
3146 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
3147 return KERN_SUCCESS;
3148 }
3149
3150 /* OBSOLETE */
3151 case TASK_SCHED_FIFO_INFO:
3152 {
3153
3154 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
3155 error = KERN_INVALID_ARGUMENT;
3156 break;
3157 }
3158
3159 error = KERN_INVALID_POLICY;
3160 break;
3161 }
3162
3163 /* OBSOLETE */
3164 case TASK_SCHED_RR_INFO:
3165 {
3166 register policy_rr_base_t rr_base;
3167 uint32_t quantum_time;
3168 uint64_t quantum_ns;
3169
3170 if (*task_info_count < POLICY_RR_BASE_COUNT) {
3171 error = KERN_INVALID_ARGUMENT;
3172 break;
3173 }
3174
3175 rr_base = (policy_rr_base_t) task_info_out;
3176
3177 if (task != kernel_task) {
3178 error = KERN_INVALID_POLICY;
3179 break;
3180 }
3181
3182 rr_base->base_priority = task->priority;
3183
3184 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
3185 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
3186
3187 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
3188
3189 *task_info_count = POLICY_RR_BASE_COUNT;
3190 break;
3191 }
3192
3193 /* OBSOLETE */
3194 case TASK_SCHED_TIMESHARE_INFO:
3195 {
3196 register policy_timeshare_base_t ts_base;
3197
3198 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
3199 error = KERN_INVALID_ARGUMENT;
3200 break;
3201 }
3202
3203 ts_base = (policy_timeshare_base_t) task_info_out;
3204
3205 if (task == kernel_task) {
3206 error = KERN_INVALID_POLICY;
3207 break;
3208 }
3209
3210 ts_base->base_priority = task->priority;
3211
3212 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
3213 break;
3214 }
3215
3216 case TASK_SECURITY_TOKEN:
3217 {
3218 register security_token_t *sec_token_p;
3219
3220 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
3221 error = KERN_INVALID_ARGUMENT;
3222 break;
3223 }
3224
3225 sec_token_p = (security_token_t *) task_info_out;
3226
3227 *sec_token_p = task->sec_token;
3228
3229 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
3230 break;
3231 }
3232
3233 case TASK_AUDIT_TOKEN:
3234 {
3235 register audit_token_t *audit_token_p;
3236
3237 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
3238 error = KERN_INVALID_ARGUMENT;
3239 break;
3240 }
3241
3242 audit_token_p = (audit_token_t *) task_info_out;
3243
3244 *audit_token_p = task->audit_token;
3245
3246 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
3247 break;
3248 }
3249
3250 case TASK_SCHED_INFO:
3251 error = KERN_INVALID_ARGUMENT;
3252 break;
3253
3254 case TASK_EVENTS_INFO:
3255 {
3256 register task_events_info_t events_info;
3257 register thread_t thread;
3258
3259 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
3260 error = KERN_INVALID_ARGUMENT;
3261 break;
3262 }
3263
3264 events_info = (task_events_info_t) task_info_out;
3265
3266
3267 events_info->faults = task->faults;
3268 events_info->pageins = task->pageins;
3269 events_info->cow_faults = task->cow_faults;
3270 events_info->messages_sent = task->messages_sent;
3271 events_info->messages_received = task->messages_received;
3272 events_info->syscalls_mach = task->syscalls_mach;
3273 events_info->syscalls_unix = task->syscalls_unix;
3274
3275 events_info->csw = task->c_switch;
3276
3277 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3278 events_info->csw += thread->c_switch;
3279 events_info->syscalls_mach += thread->syscalls_mach;
3280 events_info->syscalls_unix += thread->syscalls_unix;
3281 }
3282
3283
3284 *task_info_count = TASK_EVENTS_INFO_COUNT;
3285 break;
3286 }
3287 case TASK_AFFINITY_TAG_INFO:
3288 {
3289 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
3290 error = KERN_INVALID_ARGUMENT;
3291 break;
3292 }
3293
3294 error = task_affinity_info(task, task_info_out, task_info_count);
3295 break;
3296 }
3297 case TASK_POWER_INFO:
3298 {
3299 if (*task_info_count < TASK_POWER_INFO_COUNT) {
3300 error = KERN_INVALID_ARGUMENT;
3301 break;
3302 }
3303
3304 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL);
3305 break;
3306 }
3307
3308 case TASK_POWER_INFO_V2:
3309 {
3310 if (*task_info_count < TASK_POWER_INFO_V2_COUNT) {
3311 error = KERN_INVALID_ARGUMENT;
3312 break;
3313 }
3314 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
3315 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy);
3316 break;
3317 }
3318
3319 case TASK_VM_INFO:
3320 case TASK_VM_INFO_PURGEABLE:
3321 {
3322 task_vm_info_t vm_info;
3323 vm_map_t map;
3324
3325 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
3326 error = KERN_INVALID_ARGUMENT;
3327 break;
3328 }
3329
3330 vm_info = (task_vm_info_t)task_info_out;
3331
3332 if (task == kernel_task) {
3333 map = kernel_map;
3334 /* no lock */
3335 } else {
3336 map = task->map;
3337 vm_map_lock_read(map);
3338 }
3339
3340 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
3341 vm_info->region_count = map->hdr.nentries;
3342 vm_info->page_size = vm_map_page_size(map);
3343
3344 vm_info->resident_size = pmap_resident_count(map->pmap);
3345 vm_info->resident_size *= PAGE_SIZE;
3346 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
3347 vm_info->resident_size_peak *= PAGE_SIZE;
3348
3349 #define _VM_INFO(_name) \
3350 vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
3351
3352 _VM_INFO(device);
3353 _VM_INFO(device_peak);
3354 _VM_INFO(external);
3355 _VM_INFO(external_peak);
3356 _VM_INFO(internal);
3357 _VM_INFO(internal_peak);
3358 _VM_INFO(reusable);
3359 _VM_INFO(reusable_peak);
3360 _VM_INFO(compressed);
3361 _VM_INFO(compressed_peak);
3362 _VM_INFO(compressed_lifetime);
3363
3364 vm_info->purgeable_volatile_pmap = 0;
3365 vm_info->purgeable_volatile_resident = 0;
3366 vm_info->purgeable_volatile_virtual = 0;
3367 if (task == kernel_task) {
3368 /*
3369 * We do not maintain the detailed stats for the
3370 * kernel_pmap, so just count everything as
3371 * "internal"...
3372 */
3373 vm_info->internal = vm_info->resident_size;
3374 /*
3375 * ... but since the memory held by the VM compressor
3376 * in the kernel address space ought to be attributed
3377 * to user-space tasks, we subtract it from "internal"
3378 * to give memory reporting tools a more accurate idea
3379 * of what the kernel itself is actually using, instead
3380 * of making it look like the kernel is leaking memory
3381 * when the system is under memory pressure.
3382 */
3383 vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
3384 PAGE_SIZE);
3385 } else {
3386 mach_vm_size_t volatile_virtual_size;
3387 mach_vm_size_t volatile_resident_size;
3388 mach_vm_size_t volatile_compressed_size;
3389 mach_vm_size_t volatile_pmap_size;
3390 mach_vm_size_t volatile_compressed_pmap_size;
3391 kern_return_t kr;
3392
3393 if (flavor == TASK_VM_INFO_PURGEABLE) {
3394 kr = vm_map_query_volatile(
3395 map,
3396 &volatile_virtual_size,
3397 &volatile_resident_size,
3398 &volatile_compressed_size,
3399 &volatile_pmap_size,
3400 &volatile_compressed_pmap_size);
3401 if (kr == KERN_SUCCESS) {
3402 vm_info->purgeable_volatile_pmap =
3403 volatile_pmap_size;
3404 if (radar_20146450) {
3405 vm_info->compressed -=
3406 volatile_compressed_pmap_size;
3407 }
3408 vm_info->purgeable_volatile_resident =
3409 volatile_resident_size;
3410 vm_info->purgeable_volatile_virtual =
3411 volatile_virtual_size;
3412 }
3413 }
3414 vm_map_unlock_read(map);
3415 }
3416
3417 if (*task_info_count >= TASK_VM_INFO_COUNT) {
3418 vm_info->phys_footprint = 0;
3419 *task_info_count = TASK_VM_INFO_COUNT;
3420 } else {
3421 *task_info_count = TASK_VM_INFO_REV0_COUNT;
3422 }
3423
3424 break;
3425 }
3426
3427 case TASK_WAIT_STATE_INFO:
3428 {
3429 /*
3430 * Deprecated flavor. Currently allowing some results until all users
3431 * stop calling it. The results may not be accurate.
3432 */
3433 task_wait_state_info_t wait_state_info;
3434 uint64_t total_sfi_ledger_val = 0;
3435
3436 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
3437 error = KERN_INVALID_ARGUMENT;
3438 break;
3439 }
3440
3441 wait_state_info = (task_wait_state_info_t) task_info_out;
3442
3443 wait_state_info->total_wait_state_time = 0;
3444 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
3445
3446 #if CONFIG_SCHED_SFI
3447 int i, prev_lentry = -1;
3448 int64_t val_credit, val_debit;
3449
3450 for (i = 0; i < MAX_SFI_CLASS_ID; i++){
3451 val_credit =0;
3452 /*
3453 * checking with prev_lentry != entry ensures adjacent classes
3454 * which share the same ledger do not add wait times twice.
3455 * Note: Use ledger() call to get data for each individual sfi class.
3456 */
3457 if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
3458 KERN_SUCCESS == ledger_get_entries(task->ledger,
3459 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
3460 total_sfi_ledger_val += val_credit;
3461 }
3462 prev_lentry = task_ledgers.sfi_wait_times[i];
3463 }
3464
3465 #endif /* CONFIG_SCHED_SFI */
3466 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
3467 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
3468
3469 break;
3470 }
3471 case TASK_VM_INFO_PURGEABLE_ACCOUNT:
3472 {
3473 #if DEVELOPMENT || DEBUG
3474 pvm_account_info_t acnt_info;
3475
3476 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
3477 error = KERN_INVALID_ARGUMENT;
3478 break;
3479 }
3480
3481 if (task_info_out == NULL) {
3482 error = KERN_INVALID_ARGUMENT;
3483 break;
3484 }
3485
3486 acnt_info = (pvm_account_info_t) task_info_out;
3487
3488 error = vm_purgeable_account(task, acnt_info);
3489
3490 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
3491
3492 break;
3493 #else /* DEVELOPMENT || DEBUG */
3494 error = KERN_NOT_SUPPORTED;
3495 break;
3496 #endif /* DEVELOPMENT || DEBUG */
3497 }
3498 case TASK_FLAGS_INFO:
3499 {
3500 task_flags_info_t flags_info;
3501
3502 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
3503 error = KERN_INVALID_ARGUMENT;
3504 break;
3505 }
3506
3507 flags_info = (task_flags_info_t)task_info_out;
3508
3509 /* only publish the 64-bit flag of the task */
3510 flags_info->flags = task->t_flags & TF_64B_ADDR;
3511
3512 *task_info_count = TASK_FLAGS_INFO_COUNT;
3513 break;
3514 }
3515
3516 case TASK_DEBUG_INFO_INTERNAL:
3517 {
3518 #if DEVELOPMENT || DEBUG
3519 task_debug_info_internal_t dbg_info;
3520 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
3521 error = KERN_NOT_SUPPORTED;
3522 break;
3523 }
3524
3525 if (task_info_out == NULL) {
3526 error = KERN_INVALID_ARGUMENT;
3527 break;
3528 }
3529 dbg_info = (task_debug_info_internal_t) task_info_out;
3530 dbg_info->ipc_space_size = 0;
3531 if (task->itk_space){
3532 dbg_info->ipc_space_size = task->itk_space->is_table_size;
3533 }
3534
3535 error = KERN_SUCCESS;
3536 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
3537 break;
3538 #else /* DEVELOPMENT || DEBUG */
3539 error = KERN_NOT_SUPPORTED;
3540 break;
3541 #endif /* DEVELOPMENT || DEBUG */
3542 }
3543 default:
3544 error = KERN_INVALID_ARGUMENT;
3545 }
3546
3547 task_unlock(task);
3548 return (error);
3549 }
3550
3551 /*
3552 * task_power_info
3553 *
3554 * Returns power stats for the task.
3555 * Note: Called with task locked.
3556 */
3557 void
3558 task_power_info_locked(
3559 task_t task,
3560 task_power_info_t info,
3561 gpu_energy_data_t ginfo)
3562 {
3563 thread_t thread;
3564 ledger_amount_t tmp;
3565
3566 task_lock_assert_owned(task);
3567
3568 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
3569 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
3570 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
3571 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
3572
3573 info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
3574 info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
3575
3576 info->total_user = task->total_user_time;
3577 info->total_system = task->total_system_time;
3578
3579 if (ginfo) {
3580 ginfo->task_gpu_utilisation = task->task_gpu_ns;
3581 }
3582
3583 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3584 uint64_t tval;
3585 spl_t x;
3586
3587 if (thread->options & TH_OPT_IDLE_THREAD)
3588 continue;
3589
3590 x = splsched();
3591 thread_lock(thread);
3592
3593 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
3594 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
3595
3596 tval = timer_grab(&thread->user_timer);
3597 info->total_user += tval;
3598
3599 tval = timer_grab(&thread->system_timer);
3600 if (thread->precise_user_kernel_time) {
3601 info->total_system += tval;
3602 } else {
3603 /* system_timer may represent either sys or user */
3604 info->total_user += tval;
3605 }
3606
3607 if (ginfo) {
3608 ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
3609 }
3610 thread_unlock(thread);
3611 splx(x);
3612 }
3613 }
3614
3615 /*
3616 * task_gpu_utilisation
3617 *
3618 * Returns the total gpu time used by the all the threads of the task
3619 * (both dead and alive)
3620 */
3621 uint64_t
3622 task_gpu_utilisation(
3623 task_t task)
3624 {
3625 uint64_t gpu_time = 0;
3626 thread_t thread;
3627
3628 task_lock(task);
3629 gpu_time += task->task_gpu_ns;
3630
3631 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3632 spl_t x;
3633 x = splsched();
3634 thread_lock(thread);
3635 gpu_time += ml_gpu_stat(thread);
3636 thread_unlock(thread);
3637 splx(x);
3638 }
3639
3640 task_unlock(task);
3641 return gpu_time;
3642 }
3643
3644 kern_return_t
3645 task_purgable_info(
3646 task_t task,
3647 task_purgable_info_t *stats)
3648 {
3649 if (task == TASK_NULL || stats == NULL)
3650 return KERN_INVALID_ARGUMENT;
3651 /* Take task reference */
3652 task_reference(task);
3653 vm_purgeable_stats((vm_purgeable_info_t)stats, task);
3654 /* Drop task reference */
3655 task_deallocate(task);
3656 return KERN_SUCCESS;
3657 }
3658
3659 void
3660 task_vtimer_set(
3661 task_t task,
3662 integer_t which)
3663 {
3664 thread_t thread;
3665 spl_t x;
3666
3667 /* assert(task == current_task()); */ /* bogus assert 4803227 4807483 */
3668
3669 task_lock(task);
3670
3671 task->vtimers |= which;
3672
3673 switch (which) {
3674
3675 case TASK_VTIMER_USER:
3676 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3677 x = splsched();
3678 thread_lock(thread);
3679 if (thread->precise_user_kernel_time)
3680 thread->vtimer_user_save = timer_grab(&thread->user_timer);
3681 else
3682 thread->vtimer_user_save = timer_grab(&thread->system_timer);
3683 thread_unlock(thread);
3684 splx(x);
3685 }
3686 break;
3687
3688 case TASK_VTIMER_PROF:
3689 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3690 x = splsched();
3691 thread_lock(thread);
3692 thread->vtimer_prof_save = timer_grab(&thread->user_timer);
3693 thread->vtimer_prof_save += timer_grab(&thread->system_timer);
3694 thread_unlock(thread);
3695 splx(x);
3696 }
3697 break;
3698
3699 case TASK_VTIMER_RLIM:
3700 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3701 x = splsched();
3702 thread_lock(thread);
3703 thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
3704 thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
3705 thread_unlock(thread);
3706 splx(x);
3707 }
3708 break;
3709 }
3710
3711 task_unlock(task);
3712 }
3713
3714 void
3715 task_vtimer_clear(
3716 task_t task,
3717 integer_t which)
3718 {
3719 assert(task == current_task());
3720
3721 task_lock(task);
3722
3723 task->vtimers &= ~which;
3724
3725 task_unlock(task);
3726 }
3727
3728 void
3729 task_vtimer_update(
3730 __unused
3731 task_t task,
3732 integer_t which,
3733 uint32_t *microsecs)
3734 {
3735 thread_t thread = current_thread();
3736 uint32_t tdelt;
3737 clock_sec_t secs;
3738 uint64_t tsum;
3739
3740 assert(task == current_task());
3741
3742 assert(task->vtimers & which);
3743
3744 secs = tdelt = 0;
3745
3746 switch (which) {
3747
3748 case TASK_VTIMER_USER:
3749 if (thread->precise_user_kernel_time) {
3750 tdelt = (uint32_t)timer_delta(&thread->user_timer,
3751 &thread->vtimer_user_save);
3752 } else {
3753 tdelt = (uint32_t)timer_delta(&thread->system_timer,
3754 &thread->vtimer_user_save);
3755 }
3756 absolutetime_to_microtime(tdelt, &secs, microsecs);
3757 break;
3758
3759 case TASK_VTIMER_PROF:
3760 tsum = timer_grab(&thread->user_timer);
3761 tsum += timer_grab(&thread->system_timer);
3762 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
3763 absolutetime_to_microtime(tdelt, &secs, microsecs);
3764 /* if the time delta is smaller than a usec, ignore */
3765 if (*microsecs != 0)
3766 thread->vtimer_prof_save = tsum;
3767 break;
3768
3769 case TASK_VTIMER_RLIM:
3770 tsum = timer_grab(&thread->user_timer);
3771 tsum += timer_grab(&thread->system_timer);
3772 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
3773 thread->vtimer_rlim_save = tsum;
3774 absolutetime_to_microtime(tdelt, &secs, microsecs);
3775 break;
3776 }
3777
3778 }
3779
3780 /*
3781 * task_assign:
3782 *
3783 * Change the assigned processor set for the task
3784 */
3785 kern_return_t
3786 task_assign(
3787 __unused task_t task,
3788 __unused processor_set_t new_pset,
3789 __unused boolean_t assign_threads)
3790 {
3791 return(KERN_FAILURE);
3792 }
3793
3794 /*
3795 * task_assign_default:
3796 *
3797 * Version of task_assign to assign to default processor set.
3798 */
3799 kern_return_t
3800 task_assign_default(
3801 task_t task,
3802 boolean_t assign_threads)
3803 {
3804 return (task_assign(task, &pset0, assign_threads));
3805 }
3806
3807 /*
3808 * task_get_assignment
3809 *
3810 * Return name of processor set that task is assigned to.
3811 */
3812 kern_return_t
3813 task_get_assignment(
3814 task_t task,
3815 processor_set_t *pset)
3816 {
3817 if (!task->active)
3818 return(KERN_FAILURE);
3819
3820 *pset = &pset0;
3821
3822 return (KERN_SUCCESS);
3823 }
3824
3825 uint64_t
3826 get_task_dispatchqueue_offset(
3827 task_t task)
3828 {
3829 return task->dispatchqueue_offset;
3830 }
3831
3832 /*
3833 * task_policy
3834 *
3835 * Set scheduling policy and parameters, both base and limit, for
3836 * the given task. Policy must be a policy which is enabled for the
3837 * processor set. Change contained threads if requested.
3838 */
3839 kern_return_t
3840 task_policy(
3841 __unused task_t task,
3842 __unused policy_t policy_id,
3843 __unused policy_base_t base,
3844 __unused mach_msg_type_number_t count,
3845 __unused boolean_t set_limit,
3846 __unused boolean_t change)
3847 {
3848 return(KERN_FAILURE);
3849 }
3850
3851 /*
3852 * task_set_policy
3853 *
3854 * Set scheduling policy and parameters, both base and limit, for
3855 * the given task. Policy can be any policy implemented by the
3856 * processor set, whether enabled or not. Change contained threads
3857 * if requested.
3858 */
3859 kern_return_t
3860 task_set_policy(
3861 __unused task_t task,
3862 __unused processor_set_t pset,
3863 __unused policy_t policy_id,
3864 __unused policy_base_t base,
3865 __unused mach_msg_type_number_t base_count,
3866 __unused policy_limit_t limit,
3867 __unused mach_msg_type_number_t limit_count,
3868 __unused boolean_t change)
3869 {
3870 return(KERN_FAILURE);
3871 }
3872
3873 kern_return_t
3874 task_set_ras_pc(
3875 __unused task_t task,
3876 __unused vm_offset_t pc,
3877 __unused vm_offset_t endpc)
3878 {
3879 return KERN_FAILURE;
3880 }
3881
3882 void
3883 task_synchronizer_destroy_all(task_t task)
3884 {
3885 /*
3886 * Destroy owned semaphores
3887 */
3888 semaphore_destroy_all(task);
3889 }
3890
3891 /*
3892 * Install default (machine-dependent) initial thread state
3893 * on the task. Subsequent thread creation will have this initial
3894 * state set on the thread by machine_thread_inherit_taskwide().
3895 * Flavors and structures are exactly the same as those to thread_set_state()
3896 */
3897 kern_return_t
3898 task_set_state(
3899 task_t task,
3900 int flavor,
3901 thread_state_t state,
3902 mach_msg_type_number_t state_count)
3903 {
3904 kern_return_t ret;
3905
3906 if (task == TASK_NULL) {
3907 return (KERN_INVALID_ARGUMENT);
3908 }
3909
3910 task_lock(task);
3911
3912 if (!task->active) {
3913 task_unlock(task);
3914 return (KERN_FAILURE);
3915 }
3916
3917 ret = machine_task_set_state(task, flavor, state, state_count);
3918
3919 task_unlock(task);
3920 return ret;
3921 }
3922
3923 /*
3924 * Examine the default (machine-dependent) initial thread state
3925 * on the task, as set by task_set_state(). Flavors and structures
3926 * are exactly the same as those passed to thread_get_state().
3927 */
3928 kern_return_t
3929 task_get_state(
3930 task_t task,
3931 int flavor,
3932 thread_state_t state,
3933 mach_msg_type_number_t *state_count)
3934 {
3935 kern_return_t ret;
3936
3937 if (task == TASK_NULL) {
3938 return (KERN_INVALID_ARGUMENT);
3939 }
3940
3941 task_lock(task);
3942
3943 if (!task->active) {
3944 task_unlock(task);
3945 return (KERN_FAILURE);
3946 }
3947
3948 ret = machine_task_get_state(task, flavor, state, state_count);
3949
3950 task_unlock(task);
3951 return ret;
3952 }
3953
3954 #if CONFIG_JETSAM
3955 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
3956
3957 void __attribute__((noinline))
3958 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb)
3959 {
3960 task_t task = current_task();
3961 int pid = 0;
3962 const char *procname = "unknown";
3963 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
3964
3965 #ifdef MACH_BSD
3966 pid = proc_selfpid();
3967
3968 if (pid == 1) {
3969 /*
3970 * Cannot have ReportCrash analyzing
3971 * a suspended initproc.
3972 */
3973 return;
3974 }
3975
3976 if (task->bsd_info != NULL)
3977 procname = proc_name_address(current_task()->bsd_info);
3978 #endif
3979
3980 if (hwm_user_cores) {
3981 int error;
3982 uint64_t starttime, end;
3983 clock_sec_t secs = 0;
3984 uint32_t microsecs = 0;
3985
3986 starttime = mach_absolute_time();
3987 /*
3988 * Trigger a coredump of this process. Don't proceed unless we know we won't
3989 * be filling up the disk; and ignore the core size resource limit for this
3990 * core file.
3991 */
3992 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
3993 printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
3994 }
3995 /*
3996 * coredump() leaves the task suspended.
3997 */
3998 task_resume_internal(current_task());
3999
4000 end = mach_absolute_time();
4001 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
4002 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
4003 proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
4004 }
4005
4006 if (disable_exc_resource) {
4007 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
4008 "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
4009 return;
4010 }
4011
4012 /*
4013 * A task that has triggered an EXC_RESOURCE, should not be
4014 * jetsammed when the device is under memory pressure. Here
4015 * we set the P_MEMSTAT_TERMINATED flag so that the process
4016 * will be skipped if the memorystatus_thread wakes up.
4017 */
4018 proc_memstat_terminated(current_task()->bsd_info, TRUE);
4019
4020 printf("process %s[%d] crossed memory high watermark (%d MB); sending "
4021 "EXC_RESOURCE.\n", procname, pid, max_footprint_mb);
4022
4023 code[0] = code[1] = 0;
4024 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
4025 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
4026 EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
4027
4028 /*
4029 * Use the _internal_ variant so that no user-space
4030 * process can resume our task from under us.
4031 */
4032 task_suspend_internal(task);
4033 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
4034 task_resume_internal(task);
4035
4036 /*
4037 * After the EXC_RESOURCE has been handled, we must clear the
4038 * P_MEMSTAT_TERMINATED flag so that the process can again be
4039 * considered for jetsam if the memorystatus_thread wakes up.
4040 */
4041 proc_memstat_terminated(current_task()->bsd_info, FALSE); /* clear the flag */
4042 }
4043
4044 /*
4045 * Callback invoked when a task exceeds its physical footprint limit.
4046 */
4047 void
4048 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4049 {
4050 ledger_amount_t max_footprint, max_footprint_mb;
4051 ledger_amount_t footprint_after_purge;
4052 task_t task;
4053
4054 if (warning == LEDGER_WARNING_DIPPED_BELOW) {
4055 /*
4056 * Task memory limits only provide a warning on the way up.
4057 */
4058 return;
4059 }
4060
4061 task = current_task();
4062
4063 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
4064 max_footprint_mb = max_footprint >> 20;
4065
4066 /*
4067 * Try and purge all "volatile" memory in that task first.
4068 */
4069 (void) task_purge_volatile_memory(task);
4070 /* are we still over the limit ? */
4071 ledger_get_balance(task->ledger,
4072 task_ledgers.phys_footprint,
4073 &footprint_after_purge);
4074 if ((!warning &&
4075 footprint_after_purge <= max_footprint) ||
4076 (warning &&
4077 footprint_after_purge <= ((max_footprint *
4078 PHYS_FOOTPRINT_WARNING_LEVEL) / 100))) {
4079 /* all better now */
4080 ledger_reset_callback_state(task->ledger,
4081 task_ledgers.phys_footprint);
4082 return;
4083 }
4084 /* still over the limit after purging... */
4085
4086 /*
4087 * If this an actual violation (not a warning),
4088 * generate a non-fatal high watermark EXC_RESOURCE.
4089 */
4090 if ((warning == 0) && (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) {
4091 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb);
4092 }
4093
4094 memorystatus_on_ledger_footprint_exceeded((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE,
4095 (int)max_footprint_mb);
4096 }
4097
4098 extern int proc_check_footprint_priv(void);
4099
4100 kern_return_t
4101 task_set_phys_footprint_limit(
4102 task_t task,
4103 int new_limit_mb,
4104 int *old_limit_mb)
4105 {
4106 kern_return_t error;
4107
4108 if ((error = proc_check_footprint_priv())) {
4109 return (KERN_NO_ACCESS);
4110 }
4111
4112 return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, FALSE);
4113 }
4114
4115 kern_return_t
4116 task_convert_phys_footprint_limit(
4117 int limit_mb,
4118 int *converted_limit_mb)
4119 {
4120 if (limit_mb == -1) {
4121 /*
4122 * No limit
4123 */
4124 if (max_task_footprint != 0) {
4125 *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024); /* bytes to MB */
4126 } else {
4127 *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
4128 }
4129 } else {
4130 /* nothing to convert */
4131 *converted_limit_mb = limit_mb;
4132 }
4133 return (KERN_SUCCESS);
4134 }
4135
4136
4137 kern_return_t
4138 task_set_phys_footprint_limit_internal(
4139 task_t task,
4140 int new_limit_mb,
4141 int *old_limit_mb,
4142 boolean_t trigger_exception)
4143 {
4144 ledger_amount_t old;
4145
4146 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
4147
4148 if (old_limit_mb) {
4149 /*
4150 * Check that limit >> 20 will not give an "unexpected" 32-bit
4151 * result. There are, however, implicit assumptions that -1 mb limit
4152 * equates to LEDGER_LIMIT_INFINITY.
4153 */
4154 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
4155 *old_limit_mb = (int)(old >> 20);
4156 }
4157
4158 if (new_limit_mb == -1) {
4159 /*
4160 * Caller wishes to remove the limit.
4161 */
4162 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4163 max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
4164 max_task_footprint ? PHYS_FOOTPRINT_WARNING_LEVEL : 0);
4165 return (KERN_SUCCESS);
4166 }
4167
4168 #ifdef CONFIG_NOMONITORS
4169 return (KERN_SUCCESS);
4170 #endif /* CONFIG_NOMONITORS */
4171
4172 task_lock(task);
4173
4174 if (trigger_exception) {
4175 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4176 } else {
4177 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4178 }
4179
4180 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4181 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
4182
4183 if (task == current_task()) {
4184 ledger_check_new_balance(task->ledger, task_ledgers.phys_footprint);
4185 }
4186
4187 task_unlock(task);
4188
4189 return (KERN_SUCCESS);
4190 }
4191
4192 kern_return_t
4193 task_get_phys_footprint_limit(
4194 task_t task,
4195 int *limit_mb)
4196 {
4197 ledger_amount_t limit;
4198
4199 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
4200 /*
4201 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
4202 * result. There are, however, implicit assumptions that -1 mb limit
4203 * equates to LEDGER_LIMIT_INFINITY.
4204 */
4205 assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
4206 *limit_mb = (int)(limit >> 20);
4207
4208 return (KERN_SUCCESS);
4209 }
4210 #else /* CONFIG_JETSAM */
4211 kern_return_t
4212 task_set_phys_footprint_limit(
4213 __unused task_t task,
4214 __unused int new_limit_mb,
4215 __unused int *old_limit_mb)
4216 {
4217 return (KERN_FAILURE);
4218 }
4219
4220 kern_return_t
4221 task_get_phys_footprint_limit(
4222 __unused task_t task,
4223 __unused int *limit_mb)
4224 {
4225 return (KERN_FAILURE);
4226 }
4227 #endif /* CONFIG_JETSAM */
4228
4229 /*
4230 * We need to export some functions to other components that
4231 * are currently implemented in macros within the osfmk
4232 * component. Just export them as functions of the same name.
4233 */
4234 boolean_t is_kerneltask(task_t t)
4235 {
4236 if (t == kernel_task)
4237 return (TRUE);
4238
4239 return (FALSE);
4240 }
4241
4242 int
4243 check_for_tasksuspend(task_t task)
4244 {
4245
4246 if (task == TASK_NULL)
4247 return (0);
4248
4249 return (task->suspend_count > 0);
4250 }
4251
4252 #undef current_task
4253 task_t current_task(void);
4254 task_t current_task(void)
4255 {
4256 return (current_task_fast());
4257 }
4258
4259 #undef task_reference
4260 void task_reference(task_t task);
4261 void
4262 task_reference(
4263 task_t task)
4264 {
4265 if (task != TASK_NULL)
4266 task_reference_internal(task);
4267 }
4268
4269 /* defined in bsd/kern/kern_prot.c */
4270 extern int get_audit_token_pid(audit_token_t *audit_token);
4271
4272 int task_pid(task_t task)
4273 {
4274 if (task)
4275 return get_audit_token_pid(&task->audit_token);
4276 return -1;
4277 }
4278
4279
4280 /*
4281 * This routine is called always with task lock held.
4282 * And it returns a thread handle without reference as the caller
4283 * operates on it under the task lock held.
4284 */
4285 thread_t
4286 task_findtid(task_t task, uint64_t tid)
4287 {
4288 thread_t thread= THREAD_NULL;
4289
4290 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4291 if (thread->thread_id == tid)
4292 return(thread);
4293 }
4294 return(THREAD_NULL);
4295 }
4296
4297 /*
4298 * Control the CPU usage monitor for a task.
4299 */
4300 kern_return_t
4301 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
4302 {
4303 int error = KERN_SUCCESS;
4304
4305 if (*flags & CPUMON_MAKE_FATAL) {
4306 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
4307 } else {
4308 error = KERN_INVALID_ARGUMENT;
4309 }
4310
4311 return error;
4312 }
4313
4314 /*
4315 * Control the wakeups monitor for a task.
4316 */
4317 kern_return_t
4318 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
4319 {
4320 ledger_t ledger = task->ledger;
4321
4322 task_lock(task);
4323 if (*flags & WAKEMON_GET_PARAMS) {
4324 ledger_amount_t limit;
4325 uint64_t period;
4326
4327 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
4328 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
4329
4330 if (limit != LEDGER_LIMIT_INFINITY) {
4331 /*
4332 * An active limit means the wakeups monitor is enabled.
4333 */
4334 *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
4335 *flags = WAKEMON_ENABLE;
4336 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
4337 *flags |= WAKEMON_MAKE_FATAL;
4338 }
4339 } else {
4340 *flags = WAKEMON_DISABLE;
4341 *rate_hz = -1;
4342 }
4343
4344 /*
4345 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
4346 */
4347 task_unlock(task);
4348 return KERN_SUCCESS;
4349 }
4350
4351 if (*flags & WAKEMON_ENABLE) {
4352 if (*flags & WAKEMON_SET_DEFAULTS) {
4353 *rate_hz = task_wakeups_monitor_rate;
4354 }
4355
4356 #ifndef CONFIG_NOMONITORS
4357 if (*flags & WAKEMON_MAKE_FATAL) {
4358 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
4359 }
4360 #endif /* CONFIG_NOMONITORS */
4361
4362 if (*rate_hz < 0) {
4363 task_unlock(task);
4364 return KERN_INVALID_ARGUMENT;
4365 }
4366
4367 #ifndef CONFIG_NOMONITORS
4368 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
4369 task_wakeups_monitor_ustackshots_trigger_pct);
4370 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
4371 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
4372 #endif /* CONFIG_NOMONITORS */
4373 } else if (*flags & WAKEMON_DISABLE) {
4374 /*
4375 * Caller wishes to disable wakeups monitor on the task.
4376 *
4377 * Disable telemetry if it was triggered by the wakeups monitor, and
4378 * remove the limit & callback on the wakeups ledger entry.
4379 */
4380 #if CONFIG_TELEMETRY
4381 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
4382 #endif
4383 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
4384 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
4385 }
4386
4387 task_unlock(task);
4388 return KERN_SUCCESS;
4389 }
4390
4391 void
4392 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4393 {
4394 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
4395 #if CONFIG_TELEMETRY
4396 /*
4397 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
4398 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
4399 */
4400 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
4401 #endif
4402 return;
4403 }
4404
4405 #if CONFIG_TELEMETRY
4406 /*
4407 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
4408 * exceeded the limit, turn telemetry off for the task.
4409 */
4410 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
4411 #endif
4412
4413 if (warning == 0) {
4414 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE();
4415 }
4416 }
4417
4418 void __attribute__((noinline))
4419 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void)
4420 {
4421 task_t task = current_task();
4422 int pid = 0;
4423 const char *procname = "unknown";
4424 uint64_t observed_wakeups_rate;
4425 uint64_t permitted_wakeups_rate;
4426 uint64_t observation_interval;
4427 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
4428 struct ledger_entry_info lei;
4429
4430 #ifdef MACH_BSD
4431 pid = proc_selfpid();
4432 if (task->bsd_info != NULL)
4433 procname = proc_name_address(current_task()->bsd_info);
4434 #endif
4435
4436 ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
4437
4438 /*
4439 * Disable the exception notification so we don't overwhelm
4440 * the listener with an endless stream of redundant exceptions.
4441 */
4442 uint32_t flags = WAKEMON_DISABLE;
4443 task_wakeups_monitor_ctl(task, &flags, NULL);
4444
4445 observed_wakeups_rate = (lei.lei_balance * (int64_t)NSEC_PER_SEC) / lei.lei_last_refill;
4446 permitted_wakeups_rate = lei.lei_limit / task_wakeups_monitor_interval;
4447 observation_interval = lei.lei_refill_period / NSEC_PER_SEC;
4448
4449 if (disable_exc_resource) {
4450 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
4451 "supressed by a boot-arg\n", procname, pid);
4452 return;
4453 }
4454 if (audio_active) {
4455 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
4456 "supressed due to audio playback\n", procname, pid);
4457 return;
4458 }
4459 printf("process %s[%d] caught causing excessive wakeups. Observed wakeups rate "
4460 "(per sec): %lld; Maximum permitted wakeups rate (per sec): %lld; Observation "
4461 "period: %lld seconds; Task lifetime number of wakeups: %lld\n",
4462 procname, pid, observed_wakeups_rate, permitted_wakeups_rate,
4463 observation_interval, lei.lei_credit);
4464
4465 code[0] = code[1] = 0;
4466 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
4467 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
4468 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0], task_wakeups_monitor_rate);
4469 EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0], observation_interval);
4470 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1], lei.lei_balance * (int64_t)NSEC_PER_SEC / lei.lei_last_refill);
4471 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
4472
4473 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
4474 task_terminate_internal(task);
4475 }
4476 }
4477
4478 kern_return_t
4479 task_purge_volatile_memory(
4480 task_t task)
4481 {
4482 vm_map_t map;
4483 int num_object_purged;
4484
4485 if (task == TASK_NULL)
4486 return KERN_INVALID_TASK;
4487
4488 task_lock(task);
4489
4490 if (!task->active) {
4491 task_unlock(task);
4492 return KERN_INVALID_TASK;
4493 }
4494 map = task->map;
4495 if (map == VM_MAP_NULL) {
4496 task_unlock(task);
4497 return KERN_INVALID_TASK;
4498 }
4499 vm_map_reference(task->map);
4500
4501 task_unlock(task);
4502
4503 num_object_purged = vm_map_purge(map);
4504 vm_map_deallocate(map);
4505
4506 return KERN_SUCCESS;
4507 }
4508
4509 /* Placeholders for the task set/get voucher interfaces */
4510 kern_return_t
4511 task_get_mach_voucher(
4512 task_t task,
4513 mach_voucher_selector_t __unused which,
4514 ipc_voucher_t *voucher)
4515 {
4516 if (TASK_NULL == task)
4517 return KERN_INVALID_TASK;
4518
4519 *voucher = NULL;
4520 return KERN_SUCCESS;
4521 }
4522
4523 kern_return_t
4524 task_set_mach_voucher(
4525 task_t task,
4526 ipc_voucher_t __unused voucher)
4527 {
4528 if (TASK_NULL == task)
4529 return KERN_INVALID_TASK;
4530
4531 return KERN_SUCCESS;
4532 }
4533
4534 kern_return_t
4535 task_swap_mach_voucher(
4536 task_t task,
4537 ipc_voucher_t new_voucher,
4538 ipc_voucher_t *in_out_old_voucher)
4539 {
4540 if (TASK_NULL == task)
4541 return KERN_INVALID_TASK;
4542
4543 *in_out_old_voucher = new_voucher;
4544 return KERN_SUCCESS;
4545 }
4546
4547 void task_set_gpu_denied(task_t task, boolean_t denied)
4548 {
4549 task_lock(task);
4550
4551 if (denied) {
4552 task->t_flags |= TF_GPU_DENIED;
4553 } else {
4554 task->t_flags &= ~TF_GPU_DENIED;
4555 }
4556
4557 task_unlock(task);
4558 }
4559
4560 boolean_t task_is_gpu_denied(task_t task)
4561 {
4562 /* We don't need the lock to read this flag */
4563 return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
4564 }
4565
4566 void task_update_logical_writes(task_t task, uint32_t io_size, int flags)
4567 {
4568 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE, task_pid(task), io_size, flags, 0, 0);
4569 switch(flags) {
4570 case TASK_WRITE_IMMEDIATE:
4571 OSAddAtomic64(io_size, (SInt64 *)&(task->task_immediate_writes));
4572 break;
4573 case TASK_WRITE_DEFERRED:
4574 OSAddAtomic64(io_size, (SInt64 *)&(task->task_deferred_writes));
4575 break;
4576 case TASK_WRITE_INVALIDATED:
4577 OSAddAtomic64(io_size, (SInt64 *)&(task->task_invalidated_writes));
4578 break;
4579 case TASK_WRITE_METADATA:
4580 OSAddAtomic64(io_size, (SInt64 *)&(task->task_metadata_writes));
4581 break;
4582 }
4583 return;
4584 }