]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/task.c
xnu-3248.60.10.tar.gz
[apple/xnu.git] / osfmk / kern / task.c
1 /*
2 * Copyright (c) 2000-2010, 2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/task.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59 * David Black
60 *
61 * Task management primitives implementation.
62 */
63 /*
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL). All rights reserved.
66 *
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
72 *
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76 *
77 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
78 * improvements that they make and grant CSL redistribution rights.
79 *
80 */
81 /*
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections. This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
85 * Version 2.0.
86 * Copyright (c) 2005 SPARTA, Inc.
87 */
88
89 #include <mach/mach_types.h>
90 #include <mach/boolean.h>
91 #include <mach/host_priv.h>
92 #include <mach/machine/vm_types.h>
93 #include <mach/vm_param.h>
94 #include <mach/mach_vm.h>
95 #include <mach/semaphore.h>
96 #include <mach/task_info.h>
97 #include <mach/task_special_ports.h>
98
99 #include <ipc/ipc_importance.h>
100 #include <ipc/ipc_types.h>
101 #include <ipc/ipc_space.h>
102 #include <ipc/ipc_entry.h>
103 #include <ipc/ipc_hash.h>
104
105 #include <kern/kern_types.h>
106 #include <kern/mach_param.h>
107 #include <kern/misc_protos.h>
108 #include <kern/task.h>
109 #include <kern/thread.h>
110 #include <kern/coalition.h>
111 #include <kern/zalloc.h>
112 #include <kern/kalloc.h>
113 #include <kern/kern_cdata.h>
114 #include <kern/processor.h>
115 #include <kern/sched_prim.h> /* for thread_wakeup */
116 #include <kern/ipc_tt.h>
117 #include <kern/host.h>
118 #include <kern/clock.h>
119 #include <kern/timer.h>
120 #include <kern/assert.h>
121 #include <kern/sync_lock.h>
122 #include <kern/affinity.h>
123 #include <kern/exc_resource.h>
124 #include <kern/machine.h>
125 #include <corpses/task_corpse.h>
126 #if CONFIG_TELEMETRY
127 #include <kern/telemetry.h>
128 #endif
129
130 #include <vm/pmap.h>
131 #include <vm/vm_map.h>
132 #include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
133 #include <vm/vm_pageout.h>
134 #include <vm/vm_protos.h>
135 #include <vm/vm_purgeable_internal.h>
136
137 #include <sys/resource.h>
138 #include <sys/signalvar.h> /* for coredump */
139
140 /*
141 * Exported interfaces
142 */
143
144 #include <mach/task_server.h>
145 #include <mach/mach_host_server.h>
146 #include <mach/host_security_server.h>
147 #include <mach/mach_port_server.h>
148
149 #include <vm/vm_shared_region.h>
150
151 #include <libkern/OSDebug.h>
152 #include <libkern/OSAtomic.h>
153
154 #if CONFIG_ATM
155 #include <atm/atm_internal.h>
156 #endif
157
158 #include <kern/sfi.h>
159
160 #if KPERF
161 extern int kpc_force_all_ctrs(task_t, int);
162 #endif
163
164 uint32_t qos_override_mode;
165
166 task_t kernel_task;
167 zone_t task_zone;
168 lck_attr_t task_lck_attr;
169 lck_grp_t task_lck_grp;
170 lck_grp_attr_t task_lck_grp_attr;
171
172 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
173 int audio_active = 0;
174
175 zinfo_usage_store_t tasks_tkm_private;
176 zinfo_usage_store_t tasks_tkm_shared;
177
178 /* A container to accumulate statistics for expired tasks */
179 expired_task_statistics_t dead_task_statistics;
180 lck_spin_t dead_task_statistics_lock;
181
182 ledger_template_t task_ledger_template = NULL;
183
184 struct _task_ledger_indices task_ledgers __attribute__((used)) =
185 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
186 { 0 /* initialized at runtime */},
187 #ifdef CONFIG_BANK
188 -1, -1,
189 #endif
190 };
191
192 /* System sleep state */
193 boolean_t tasks_suspend_state;
194
195
196 void init_task_ledgers(void);
197 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
198 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
199 void __attribute__((noinline)) THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void);
200 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb);
201
202 kern_return_t task_suspend_internal(task_t);
203 kern_return_t task_resume_internal(task_t);
204 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
205
206 extern kern_return_t iokit_task_terminate(task_t task);
207
208 void proc_init_cpumon_params(void);
209 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
210
211 // Warn tasks when they hit 80% of their memory limit.
212 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
213
214 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
215 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
216
217 /*
218 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
219 *
220 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
221 * stacktraces, aka micro-stackshots)
222 */
223 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
224
225 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
226 int task_wakeups_monitor_rate; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
227
228 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
229
230 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
231
232 ledger_amount_t max_task_footprint = 0; /* Per-task limit on physical memory consumption in bytes */
233 int max_task_footprint_mb = 0; /* Per-task limit on physical memory consumption in megabytes */
234
235 #if MACH_ASSERT
236 int pmap_ledgers_panic = 1;
237 #endif /* MACH_ASSERT */
238
239 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
240
241 int hwm_user_cores = 0; /* high watermark violations generate user core files */
242
243 #ifdef MACH_BSD
244 extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
245 extern int proc_pid(struct proc *p);
246 extern int proc_selfpid(void);
247 extern char *proc_name_address(struct proc *p);
248 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
249 #if CONFIG_JETSAM
250 extern void proc_memstat_terminated(struct proc* p, boolean_t set);
251 extern void memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb);
252 #endif
253 #endif
254 #if MACH_ASSERT
255 extern int pmap_ledgers_panic;
256 #endif /* MACH_ASSERT */
257
258 /* Forwards */
259
260 void task_hold_locked(
261 task_t task);
262 void task_wait_locked(
263 task_t task,
264 boolean_t until_not_runnable);
265 void task_release_locked(
266 task_t task);
267 void task_free(
268 task_t task );
269 void task_synchronizer_destroy_all(
270 task_t task);
271
272 int check_for_tasksuspend(
273 task_t task);
274
275 void
276 task_backing_store_privileged(
277 task_t task)
278 {
279 task_lock(task);
280 task->priv_flags |= VM_BACKING_STORE_PRIV;
281 task_unlock(task);
282 return;
283 }
284
285
286 void
287 task_set_64bit(
288 task_t task,
289 boolean_t is64bit)
290 {
291 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
292 thread_t thread;
293 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
294
295 task_lock(task);
296
297 if (is64bit) {
298 if (task_has_64BitAddr(task))
299 goto out;
300 task_set_64BitAddr(task);
301 } else {
302 if ( !task_has_64BitAddr(task))
303 goto out;
304 task_clear_64BitAddr(task);
305 }
306 /* FIXME: On x86, the thread save state flavor can diverge from the
307 * task's 64-bit feature flag due to the 32-bit/64-bit register save
308 * state dichotomy. Since we can be pre-empted in this interval,
309 * certain routines may observe the thread as being in an inconsistent
310 * state with respect to its task's 64-bitness.
311 */
312
313 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
314 queue_iterate(&task->threads, thread, thread_t, task_threads) {
315 thread_mtx_lock(thread);
316 machine_thread_switch_addrmode(thread);
317 thread_mtx_unlock(thread);
318 }
319 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
320
321 out:
322 task_unlock(task);
323 }
324
325
326 void
327 task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size)
328 {
329 task_lock(task);
330 task->all_image_info_addr = addr;
331 task->all_image_info_size = size;
332 task_unlock(task);
333 }
334
335 void
336 task_atm_reset(__unused task_t task) {
337
338 #if CONFIG_ATM
339 if (task->atm_context != NULL) {
340 atm_task_descriptor_destroy(task->atm_context);
341 task->atm_context = NULL;
342 }
343 #endif
344
345 }
346
347 void
348 task_bank_reset(__unused task_t task) {
349
350 #if CONFIG_BANK
351 if (task->bank_context != NULL) {
352 bank_task_destroy(task);
353 }
354 #endif
355
356 }
357
358 /*
359 * NOTE: This should only be called when the P_LINTRANSIT
360 * flag is set (the proc_trans lock is held) on the
361 * proc associated with the task.
362 */
363 void
364 task_bank_init(__unused task_t task) {
365
366 #if CONFIG_BANK
367 if (task->bank_context != NULL) {
368 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
369 }
370 bank_task_initialize(task);
371 #endif
372
373 }
374
375 #if TASK_REFERENCE_LEAK_DEBUG
376 #include <kern/btlog.h>
377
378 decl_simple_lock_data(static,task_ref_lock);
379 static btlog_t *task_ref_btlog;
380 #define TASK_REF_OP_INCR 0x1
381 #define TASK_REF_OP_DECR 0x2
382
383 #define TASK_REF_BTDEPTH 7
384
385 static void
386 task_ref_lock_lock(void *context)
387 {
388 simple_lock((simple_lock_t)context);
389 }
390 static void
391 task_ref_lock_unlock(void *context)
392 {
393 simple_unlock((simple_lock_t)context);
394 }
395
396 void
397 task_reference_internal(task_t task)
398 {
399 void * bt[TASK_REF_BTDEPTH];
400 int numsaved = 0;
401
402 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
403
404 (void)hw_atomic_add(&(task)->ref_count, 1);
405 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
406 bt, numsaved);
407 }
408
409 uint32_t
410 task_deallocate_internal(task_t task)
411 {
412 void * bt[TASK_REF_BTDEPTH];
413 int numsaved = 0;
414
415 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
416
417 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
418 bt, numsaved);
419 return hw_atomic_sub(&(task)->ref_count, 1);
420 }
421
422 #endif /* TASK_REFERENCE_LEAK_DEBUG */
423
424 void
425 task_init(void)
426 {
427
428 lck_grp_attr_setdefault(&task_lck_grp_attr);
429 lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
430 lck_attr_setdefault(&task_lck_attr);
431 lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
432
433 task_zone = zinit(
434 sizeof(struct task),
435 task_max * sizeof(struct task),
436 TASK_CHUNK * sizeof(struct task),
437 "tasks");
438
439 zone_change(task_zone, Z_NOENCRYPT, TRUE);
440
441 /*
442 * Configure per-task memory limit.
443 * The boot-arg is interpreted as Megabytes,
444 * and takes precedence over the device tree.
445 * Setting the boot-arg to 0 disables task limits.
446 */
447 if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
448 sizeof (max_task_footprint_mb))) {
449 /*
450 * No limit was found in boot-args, so go look in the device tree.
451 */
452 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
453 sizeof(max_task_footprint_mb))) {
454 /*
455 * No limit was found in device tree.
456 */
457 max_task_footprint_mb = 0;
458 }
459 }
460
461 if (max_task_footprint_mb != 0) {
462 #if CONFIG_JETSAM
463 if (max_task_footprint_mb < 50) {
464 printf("Warning: max_task_pmem %d below minimum.\n",
465 max_task_footprint_mb);
466 max_task_footprint_mb = 50;
467 }
468 printf("Limiting task physical memory footprint to %d MB\n",
469 max_task_footprint_mb);
470
471 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
472 #else
473 printf("Warning: max_task_footprint specified, but jetsam not configured; ignoring.\n");
474 #endif
475 }
476
477 #if MACH_ASSERT
478 PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
479 sizeof (pmap_ledgers_panic));
480 #endif /* MACH_ASSERT */
481
482 if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
483 sizeof (hwm_user_cores))) {
484 hwm_user_cores = 0;
485 }
486
487 if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
488 printf("QOS override mode: 0x%08x\n", qos_override_mode);
489 } else {
490 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
491 }
492
493 proc_init_cpumon_params();
494
495 if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
496 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
497 }
498
499 if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
500 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
501 }
502
503 if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
504 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
505 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
506 }
507
508 if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
509 sizeof (disable_exc_resource))) {
510 disable_exc_resource = 0;
511 }
512
513 /*
514 * If we have coalitions, coalition_init() will call init_task_ledgers() as it
515 * sets up the ledgers for the default coalition. If we don't have coalitions,
516 * then we have to call it now.
517 */
518 #if CONFIG_COALITIONS
519 assert(task_ledger_template);
520 #else /* CONFIG_COALITIONS */
521 init_task_ledgers();
522 #endif /* CONFIG_COALITIONS */
523
524 #if TASK_REFERENCE_LEAK_DEBUG
525 simple_lock_init(&task_ref_lock, 0);
526 task_ref_btlog = btlog_create(100000,
527 TASK_REF_BTDEPTH,
528 task_ref_lock_lock,
529 task_ref_lock_unlock,
530 &task_ref_lock);
531 assert(task_ref_btlog);
532 #endif
533
534 /*
535 * Create the kernel task as the first task.
536 */
537 #ifdef __LP64__
538 if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS)
539 #else
540 if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS)
541 #endif
542 panic("task_init\n");
543
544 vm_map_deallocate(kernel_task->map);
545 kernel_task->map = kernel_map;
546 lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
547
548 }
549
550 /*
551 * Create a task running in the kernel address space. It may
552 * have its own map of size mem_size and may have ipc privileges.
553 */
554 kern_return_t
555 kernel_task_create(
556 __unused task_t parent_task,
557 __unused vm_offset_t map_base,
558 __unused vm_size_t map_size,
559 __unused task_t *child_task)
560 {
561 return (KERN_INVALID_ARGUMENT);
562 }
563
564 kern_return_t
565 task_create(
566 task_t parent_task,
567 __unused ledger_port_array_t ledger_ports,
568 __unused mach_msg_type_number_t num_ledger_ports,
569 __unused boolean_t inherit_memory,
570 __unused task_t *child_task) /* OUT */
571 {
572 if (parent_task == TASK_NULL)
573 return(KERN_INVALID_ARGUMENT);
574
575 /*
576 * No longer supported: too many calls assume that a task has a valid
577 * process attached.
578 */
579 return(KERN_FAILURE);
580 }
581
582 kern_return_t
583 host_security_create_task_token(
584 host_security_t host_security,
585 task_t parent_task,
586 __unused security_token_t sec_token,
587 __unused audit_token_t audit_token,
588 __unused host_priv_t host_priv,
589 __unused ledger_port_array_t ledger_ports,
590 __unused mach_msg_type_number_t num_ledger_ports,
591 __unused boolean_t inherit_memory,
592 __unused task_t *child_task) /* OUT */
593 {
594 if (parent_task == TASK_NULL)
595 return(KERN_INVALID_ARGUMENT);
596
597 if (host_security == HOST_NULL)
598 return(KERN_INVALID_SECURITY);
599
600 /*
601 * No longer supported.
602 */
603 return(KERN_FAILURE);
604 }
605
606 /*
607 * Task ledgers
608 * ------------
609 *
610 * phys_footprint
611 * Physical footprint: This is the sum of:
612 * + (internal - alternate_accounting)
613 * + (internal_compressed - alternate_accounting_compressed)
614 * + iokit_mapped
615 * + purgeable_nonvolatile
616 * + purgeable_nonvolatile_compressed
617 *
618 * internal
619 * The task's anonymous memory, which on iOS is always resident.
620 *
621 * internal_compressed
622 * Amount of this task's internal memory which is held by the compressor.
623 * Such memory is no longer actually resident for the task [i.e., resident in its pmap],
624 * and could be either decompressed back into memory, or paged out to storage, depending
625 * on our implementation.
626 *
627 * iokit_mapped
628 * IOKit mappings: The total size of all IOKit mappings in this task, regardless of
629 clean/dirty or internal/external state].
630 *
631 * alternate_accounting
632 * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
633 * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
634 * double counting.
635 */
636 void
637 init_task_ledgers(void)
638 {
639 ledger_template_t t;
640
641 assert(task_ledger_template == NULL);
642 assert(kernel_task == TASK_NULL);
643
644 if ((t = ledger_template_create("Per-task ledger")) == NULL)
645 panic("couldn't create task ledger template");
646
647 task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
648 task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
649 "physmem", "bytes");
650 task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
651 "bytes");
652 task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
653 "bytes");
654 task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
655 "bytes");
656 task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
657 "bytes");
658 task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
659 "bytes");
660 task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
661 "bytes");
662 task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
663 "bytes");
664 task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
665 "bytes");
666 task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
667 "bytes");
668 task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
669 task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
670 task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
671 task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
672 task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
673 "count");
674 task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
675 "count");
676
677 #if CONFIG_SCHED_SFI
678 sfi_class_id_t class_id, ledger_alias;
679 for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
680 task_ledgers.sfi_wait_times[class_id] = -1;
681 }
682
683 /* don't account for UNSPECIFIED */
684 for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
685 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
686 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
687 /* Check to see if alias has been registered yet */
688 if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
689 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
690 } else {
691 /* Otherwise, initialize it first */
692 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
693 }
694 } else {
695 task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
696 }
697
698 if (task_ledgers.sfi_wait_times[class_id] < 0) {
699 panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
700 }
701 }
702
703 assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
704 #endif /* CONFIG_SCHED_SFI */
705
706 #ifdef CONFIG_BANK
707 task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
708 task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
709 #endif
710 if ((task_ledgers.cpu_time < 0) ||
711 (task_ledgers.tkm_private < 0) ||
712 (task_ledgers.tkm_shared < 0) ||
713 (task_ledgers.phys_mem < 0) ||
714 (task_ledgers.wired_mem < 0) ||
715 (task_ledgers.internal < 0) ||
716 (task_ledgers.iokit_mapped < 0) ||
717 (task_ledgers.alternate_accounting < 0) ||
718 (task_ledgers.alternate_accounting_compressed < 0) ||
719 (task_ledgers.phys_footprint < 0) ||
720 (task_ledgers.internal_compressed < 0) ||
721 (task_ledgers.purgeable_volatile < 0) ||
722 (task_ledgers.purgeable_nonvolatile < 0) ||
723 (task_ledgers.purgeable_volatile_compressed < 0) ||
724 (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
725 (task_ledgers.platform_idle_wakeups < 0) ||
726 (task_ledgers.interrupt_wakeups < 0)
727 #ifdef CONFIG_BANK
728 || (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0)
729 #endif
730 ) {
731 panic("couldn't create entries for task ledger template");
732 }
733
734 ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
735 #if MACH_ASSERT
736 if (pmap_ledgers_panic) {
737 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
738 ledger_panic_on_negative(t, task_ledgers.internal);
739 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
740 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
741 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
742 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
743 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
744 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
745 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
746 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
747 }
748 #endif /* MACH_ASSERT */
749
750 #if CONFIG_JETSAM
751 ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
752 #endif
753
754 ledger_set_callback(t, task_ledgers.interrupt_wakeups,
755 task_wakeups_rate_exceeded, NULL, NULL);
756
757 task_ledger_template = t;
758 }
759
760 kern_return_t
761 task_create_internal(
762 task_t parent_task,
763 coalition_t *parent_coalitions __unused,
764 boolean_t inherit_memory,
765 boolean_t is_64bit,
766 task_t *child_task) /* OUT */
767 {
768 task_t new_task;
769 vm_shared_region_t shared_region;
770 ledger_t ledger = NULL;
771
772 new_task = (task_t) zalloc(task_zone);
773
774 if (new_task == TASK_NULL)
775 return(KERN_RESOURCE_SHORTAGE);
776
777 /* one ref for just being alive; one for our caller */
778 new_task->ref_count = 2;
779
780 /* allocate with active entries */
781 assert(task_ledger_template != NULL);
782 if ((ledger = ledger_instantiate(task_ledger_template,
783 LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
784 zfree(task_zone, new_task);
785 return(KERN_RESOURCE_SHORTAGE);
786 }
787
788 new_task->ledger = ledger;
789
790 #if defined(CONFIG_SCHED_MULTIQ)
791 new_task->sched_group = sched_group_create();
792 #endif
793
794 /* if inherit_memory is true, parent_task MUST not be NULL */
795 if (inherit_memory)
796 new_task->map = vm_map_fork(ledger, parent_task->map);
797 else
798 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
799 (vm_map_offset_t)(VM_MIN_ADDRESS),
800 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
801
802 /* Inherit memlock limit from parent */
803 if (parent_task)
804 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
805
806 lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
807 queue_init(&new_task->threads);
808 new_task->suspend_count = 0;
809 new_task->thread_count = 0;
810 new_task->active_thread_count = 0;
811 new_task->user_stop_count = 0;
812 new_task->legacy_stop_count = 0;
813 new_task->active = TRUE;
814 new_task->halting = FALSE;
815 new_task->user_data = NULL;
816 new_task->faults = 0;
817 new_task->cow_faults = 0;
818 new_task->pageins = 0;
819 new_task->messages_sent = 0;
820 new_task->messages_received = 0;
821 new_task->syscalls_mach = 0;
822 new_task->priv_flags = 0;
823 new_task->syscalls_unix=0;
824 new_task->c_switch = new_task->p_switch = new_task->ps_switch = 0;
825 new_task->t_flags = 0;
826 new_task->importance = 0;
827
828 #if CONFIG_ATM
829 new_task->atm_context = NULL;
830 #endif
831 #if CONFIG_BANK
832 new_task->bank_context = NULL;
833 #endif
834
835 zinfo_task_init(new_task);
836
837 #ifdef MACH_BSD
838 new_task->bsd_info = NULL;
839 new_task->corpse_info = NULL;
840 #endif /* MACH_BSD */
841
842 #if CONFIG_JETSAM
843 if (max_task_footprint != 0) {
844 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
845 }
846 #endif
847
848 if (task_wakeups_monitor_rate != 0) {
849 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
850 int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
851 task_wakeups_monitor_ctl(new_task, &flags, &rate);
852 }
853
854 #if defined(__i386__) || defined(__x86_64__)
855 new_task->i386_ldt = 0;
856 #endif
857
858 new_task->task_debug = NULL;
859
860 queue_init(&new_task->semaphore_list);
861 new_task->semaphores_owned = 0;
862
863 ipc_task_init(new_task, parent_task);
864
865 new_task->total_user_time = 0;
866 new_task->total_system_time = 0;
867
868 new_task->vtimers = 0;
869
870 new_task->shared_region = NULL;
871
872 new_task->affinity_space = NULL;
873
874 new_task->pidsuspended = FALSE;
875 new_task->frozen = FALSE;
876 new_task->changing_freeze_state = FALSE;
877 new_task->rusage_cpu_flags = 0;
878 new_task->rusage_cpu_percentage = 0;
879 new_task->rusage_cpu_interval = 0;
880 new_task->rusage_cpu_deadline = 0;
881 new_task->rusage_cpu_callt = NULL;
882 #if MACH_ASSERT
883 new_task->suspends_outstanding = 0;
884 #endif
885
886 #if HYPERVISOR
887 new_task->hv_task_target = NULL;
888 #endif /* HYPERVISOR */
889
890
891 new_task->low_mem_notified_warn = 0;
892 new_task->low_mem_notified_critical = 0;
893 new_task->low_mem_privileged_listener = 0;
894 new_task->purged_memory_warn = 0;
895 new_task->purged_memory_critical = 0;
896 new_task->mem_notify_reserved = 0;
897 #if IMPORTANCE_INHERITANCE
898 new_task->task_imp_base = NULL;
899 #endif /* IMPORTANCE_INHERITANCE */
900
901 #if defined(__x86_64__)
902 new_task->uexc_range_start = new_task->uexc_range_size = new_task->uexc_handler = 0;
903 #endif
904
905 new_task->requested_policy = default_task_requested_policy;
906 new_task->effective_policy = default_task_effective_policy;
907 new_task->pended_policy = default_task_pended_policy;
908
909 if (parent_task != TASK_NULL) {
910 new_task->sec_token = parent_task->sec_token;
911 new_task->audit_token = parent_task->audit_token;
912
913 /* inherit the parent's shared region */
914 shared_region = vm_shared_region_get(parent_task);
915 vm_shared_region_set(new_task, shared_region);
916
917 if(task_has_64BitAddr(parent_task))
918 task_set_64BitAddr(new_task);
919 new_task->all_image_info_addr = parent_task->all_image_info_addr;
920 new_task->all_image_info_size = parent_task->all_image_info_size;
921
922 #if defined(__i386__) || defined(__x86_64__)
923 if (inherit_memory && parent_task->i386_ldt)
924 new_task->i386_ldt = user_ldt_copy(parent_task->i386_ldt);
925 #endif
926 if (inherit_memory && parent_task->affinity_space)
927 task_affinity_create(parent_task, new_task);
928
929 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
930
931 #if IMPORTANCE_INHERITANCE
932 ipc_importance_task_t new_task_imp = IIT_NULL;
933
934 if (task_is_marked_importance_donor(parent_task)) {
935 new_task_imp = ipc_importance_for_task(new_task, FALSE);
936 assert(IIT_NULL != new_task_imp);
937 ipc_importance_task_mark_donor(new_task_imp, TRUE);
938 }
939 /* Embedded doesn't want this to inherit */
940 if (task_is_marked_importance_receiver(parent_task)) {
941 if (IIT_NULL == new_task_imp)
942 new_task_imp = ipc_importance_for_task(new_task, FALSE);
943 assert(IIT_NULL != new_task_imp);
944 ipc_importance_task_mark_receiver(new_task_imp, TRUE);
945 }
946 if (task_is_marked_importance_denap_receiver(parent_task)) {
947 if (IIT_NULL == new_task_imp)
948 new_task_imp = ipc_importance_for_task(new_task, FALSE);
949 assert(IIT_NULL != new_task_imp);
950 ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
951 }
952
953 if (IIT_NULL != new_task_imp) {
954 assert(new_task->task_imp_base == new_task_imp);
955 ipc_importance_task_release(new_task_imp);
956 }
957 #endif /* IMPORTANCE_INHERITANCE */
958
959 new_task->priority = BASEPRI_DEFAULT;
960 new_task->max_priority = MAXPRI_USER;
961
962 new_task->requested_policy.t_apptype = parent_task->requested_policy.t_apptype;
963
964 new_task->requested_policy.int_darwinbg = parent_task->requested_policy.int_darwinbg;
965 new_task->requested_policy.ext_darwinbg = parent_task->requested_policy.ext_darwinbg;
966 new_task->requested_policy.int_iotier = parent_task->requested_policy.int_iotier;
967 new_task->requested_policy.ext_iotier = parent_task->requested_policy.ext_iotier;
968 new_task->requested_policy.int_iopassive = parent_task->requested_policy.int_iopassive;
969 new_task->requested_policy.ext_iopassive = parent_task->requested_policy.ext_iopassive;
970 new_task->requested_policy.bg_iotier = parent_task->requested_policy.bg_iotier;
971 new_task->requested_policy.terminated = parent_task->requested_policy.terminated;
972 new_task->requested_policy.t_qos_clamp = parent_task->requested_policy.t_qos_clamp;
973
974 task_policy_create(new_task, parent_task->requested_policy.t_boosted);
975 } else {
976 new_task->sec_token = KERNEL_SECURITY_TOKEN;
977 new_task->audit_token = KERNEL_AUDIT_TOKEN;
978 #ifdef __LP64__
979 if(is_64bit)
980 task_set_64BitAddr(new_task);
981 #endif
982 new_task->all_image_info_addr = (mach_vm_address_t)0;
983 new_task->all_image_info_size = (mach_vm_size_t)0;
984
985 new_task->pset_hint = PROCESSOR_SET_NULL;
986
987 if (kernel_task == TASK_NULL) {
988 new_task->priority = BASEPRI_KERNEL;
989 new_task->max_priority = MAXPRI_KERNEL;
990 } else {
991 new_task->priority = BASEPRI_DEFAULT;
992 new_task->max_priority = MAXPRI_USER;
993 }
994 }
995
996 bzero(new_task->coalition, sizeof(new_task->coalition));
997 for (int i = 0; i < COALITION_NUM_TYPES; i++)
998 queue_chain_init(new_task->task_coalition[i]);
999
1000 /* Allocate I/O Statistics */
1001 new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1002 assert(new_task->task_io_stats != NULL);
1003 bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1004 new_task->task_immediate_writes = 0;
1005 new_task->task_deferred_writes = 0;
1006 new_task->task_invalidated_writes = 0;
1007 new_task->task_metadata_writes = 0;
1008
1009 bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats));
1010
1011 bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1012 new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0;
1013 new_task->task_gpu_ns = 0;
1014
1015 #if CONFIG_COALITIONS
1016
1017 /* TODO: there is no graceful failure path here... */
1018 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1019 coalitions_adopt_task(parent_coalitions, new_task);
1020 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1021 /*
1022 * all tasks at least have a resource coalition, so
1023 * if the parent has one then inherit all coalitions
1024 * the parent is a part of
1025 */
1026 coalitions_adopt_task(parent_task->coalition, new_task);
1027 } else {
1028 /* TODO: assert that new_task will be PID 1 (launchd) */
1029 coalitions_adopt_init_task(new_task);
1030 }
1031
1032 if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1033 panic("created task is not a member of a resource coalition");
1034 }
1035 #endif /* CONFIG_COALITIONS */
1036
1037 new_task->dispatchqueue_offset = 0;
1038 if (parent_task != NULL) {
1039 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1040 }
1041
1042 if (vm_backing_store_low && parent_task != NULL)
1043 new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
1044
1045 new_task->task_volatile_objects = 0;
1046 new_task->task_nonvolatile_objects = 0;
1047 new_task->task_purgeable_disowning = FALSE;
1048 new_task->task_purgeable_disowned = FALSE;
1049
1050 queue_init(&new_task->io_user_clients);
1051
1052 ipc_task_enable(new_task);
1053
1054 lck_mtx_lock(&tasks_threads_lock);
1055 queue_enter(&tasks, new_task, task_t, tasks);
1056 tasks_count++;
1057 if (tasks_suspend_state) {
1058 task_suspend_internal(new_task);
1059 }
1060 lck_mtx_unlock(&tasks_threads_lock);
1061
1062 *child_task = new_task;
1063 return(KERN_SUCCESS);
1064 }
1065
1066 int task_dropped_imp_count = 0;
1067
1068 /*
1069 * task_deallocate:
1070 *
1071 * Drop a reference on a task.
1072 */
1073 void
1074 task_deallocate(
1075 task_t task)
1076 {
1077 ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1078 uint32_t refs;
1079
1080 if (task == TASK_NULL)
1081 return;
1082
1083 refs = task_deallocate_internal(task);
1084
1085 #if IMPORTANCE_INHERITANCE
1086 if (refs > 1)
1087 return;
1088
1089 if (refs == 1) {
1090 /*
1091 * If last ref potentially comes from the task's importance,
1092 * disconnect it. But more task refs may be added before
1093 * that completes, so wait for the reference to go to zero
1094 * naturually (it may happen on a recursive task_deallocate()
1095 * from the ipc_importance_disconnect_task() call).
1096 */
1097 if (IIT_NULL != task->task_imp_base)
1098 ipc_importance_disconnect_task(task);
1099 return;
1100 }
1101 #else
1102 if (refs > 0)
1103 return;
1104 #endif /* IMPORTANCE_INHERITANCE */
1105
1106 lck_mtx_lock(&tasks_threads_lock);
1107 queue_remove(&terminated_tasks, task, task_t, tasks);
1108 terminated_tasks_count--;
1109 lck_mtx_unlock(&tasks_threads_lock);
1110
1111 /*
1112 * remove the reference on atm descriptor
1113 */
1114 task_atm_reset(task);
1115
1116 /*
1117 * remove the reference on bank context
1118 */
1119 task_bank_reset(task);
1120
1121 if (task->task_io_stats)
1122 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1123
1124 /*
1125 * Give the machine dependent code a chance
1126 * to perform cleanup before ripping apart
1127 * the task.
1128 */
1129 machine_task_terminate(task);
1130
1131 ipc_task_terminate(task);
1132
1133 /* let iokit know */
1134 iokit_task_terminate(task);
1135
1136 if (task->affinity_space)
1137 task_affinity_deallocate(task);
1138
1139 #if MACH_ASSERT
1140 if (task->ledger != NULL &&
1141 task->map != NULL &&
1142 task->map->pmap != NULL &&
1143 task->map->pmap->ledger != NULL) {
1144 assert(task->ledger == task->map->pmap->ledger);
1145 }
1146 #endif /* MACH_ASSERT */
1147
1148 vm_purgeable_disown(task);
1149 assert(task->task_purgeable_disowned);
1150 if (task->task_volatile_objects != 0 ||
1151 task->task_nonvolatile_objects != 0) {
1152 panic("task_deallocate(%p): "
1153 "volatile_objects=%d nonvolatile_objects=%d\n",
1154 task,
1155 task->task_volatile_objects,
1156 task->task_nonvolatile_objects);
1157 }
1158
1159 vm_map_deallocate(task->map);
1160 is_release(task->itk_space);
1161
1162 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1163 &interrupt_wakeups, &debit);
1164 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1165 &platform_idle_wakeups, &debit);
1166
1167 #if defined(CONFIG_SCHED_MULTIQ)
1168 sched_group_destroy(task->sched_group);
1169 #endif
1170
1171 /* Accumulate statistics for dead tasks */
1172 lck_spin_lock(&dead_task_statistics_lock);
1173 dead_task_statistics.total_user_time += task->total_user_time;
1174 dead_task_statistics.total_system_time += task->total_system_time;
1175
1176 dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1177 dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1178
1179 dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1180 dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1181
1182 lck_spin_unlock(&dead_task_statistics_lock);
1183 lck_mtx_destroy(&task->lock, &task_lck_grp);
1184
1185 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1186 &debit)) {
1187 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1188 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1189 }
1190 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1191 &debit)) {
1192 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1193 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1194 }
1195 ledger_dereference(task->ledger);
1196 zinfo_task_free(task);
1197
1198 #if TASK_REFERENCE_LEAK_DEBUG
1199 btlog_remove_entries_for_element(task_ref_btlog, task);
1200 #endif
1201
1202 #if CONFIG_COALITIONS
1203 if (!task->coalition[COALITION_TYPE_RESOURCE])
1204 panic("deallocating task was not a member of a resource coalition");
1205 task_release_coalitions(task);
1206 #endif /* CONFIG_COALITIONS */
1207
1208 bzero(task->coalition, sizeof(task->coalition));
1209
1210 #if MACH_BSD
1211 /* clean up collected information since last reference to task is gone */
1212 if (task->corpse_info) {
1213 task_crashinfo_destroy(task->corpse_info);
1214 task->corpse_info = NULL;
1215 }
1216 #endif
1217
1218 zfree(task_zone, task);
1219 }
1220
1221 /*
1222 * task_name_deallocate:
1223 *
1224 * Drop a reference on a task name.
1225 */
1226 void
1227 task_name_deallocate(
1228 task_name_t task_name)
1229 {
1230 return(task_deallocate((task_t)task_name));
1231 }
1232
1233 /*
1234 * task_suspension_token_deallocate:
1235 *
1236 * Drop a reference on a task suspension token.
1237 */
1238 void
1239 task_suspension_token_deallocate(
1240 task_suspension_token_t token)
1241 {
1242 return(task_deallocate((task_t)token));
1243 }
1244
1245
1246 /*
1247 * task_collect_crash_info:
1248 *
1249 * collect crash info from bsd and mach based data
1250 */
1251 kern_return_t
1252 task_collect_crash_info(task_t task)
1253 {
1254 kern_return_t kr = KERN_SUCCESS;
1255
1256 kcdata_descriptor_t crash_data = NULL;
1257 kcdata_descriptor_t crash_data_release = NULL;
1258 mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1259 mach_vm_offset_t crash_data_user_ptr = 0;
1260
1261 if (!corpses_enabled()) {
1262 return KERN_NOT_SUPPORTED;
1263 }
1264
1265 task_lock(task);
1266 assert(task->bsd_info != NULL);
1267 if (task->corpse_info == NULL && task->bsd_info != NULL) {
1268 task_unlock(task);
1269 /* map crash data memory in task's vm map */
1270 kr = mach_vm_allocate(task->map, &crash_data_user_ptr, size, (VM_MAKE_TAG(VM_MEMORY_CORPSEINFO) | VM_FLAGS_ANYWHERE));
1271
1272 if (kr != KERN_SUCCESS)
1273 goto out_no_lock;
1274
1275 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_user_ptr, size);
1276 if (crash_data) {
1277 task_lock(task);
1278 crash_data_release = task->corpse_info;
1279 task->corpse_info = crash_data;
1280 task_unlock(task);
1281 kr = KERN_SUCCESS;
1282 } else {
1283 /* if failed to create corpse info, free the mapping */
1284 if (KERN_SUCCESS != mach_vm_deallocate(task->map, crash_data_user_ptr, size)) {
1285 printf("mach_vm_deallocate failed to clear corpse_data for pid %d.\n", task_pid(task));
1286 }
1287 kr = KERN_FAILURE;
1288 }
1289
1290 if (crash_data_release != NULL) {
1291 task_crashinfo_destroy(crash_data_release);
1292 }
1293 } else {
1294 task_unlock(task);
1295 }
1296
1297 out_no_lock:
1298 return kr;
1299 }
1300
1301 /*
1302 * task_deliver_crash_notification:
1303 *
1304 * Makes outcall to registered host port for a corpse.
1305 */
1306 kern_return_t
1307 task_deliver_crash_notification(task_t task)
1308 {
1309 kcdata_descriptor_t crash_info = task->corpse_info;
1310 thread_t th_iter = NULL;
1311 kern_return_t kr = KERN_SUCCESS;
1312 wait_interrupt_t wsave;
1313 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1314
1315 if (crash_info == NULL)
1316 return KERN_FAILURE;
1317
1318 code[0] = crash_info->kcd_addr_begin;
1319 code[1] = crash_info->kcd_length;
1320
1321 task_lock(task);
1322 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1323 {
1324 ipc_thread_reset(th_iter);
1325 }
1326 task_unlock(task);
1327
1328 wsave = thread_interrupt_level(THREAD_UNINT);
1329 kr = exception_triage(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX);
1330 if (kr != KERN_SUCCESS) {
1331 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
1332 }
1333
1334 /*
1335 * crash reporting is done. Now release threads
1336 * for reaping by thread_terminate_daemon
1337 */
1338 task_lock(task);
1339 assert(task->active_thread_count == 0);
1340 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1341 {
1342 thread_mtx_lock(th_iter);
1343 assert(th_iter->inspection == TRUE);
1344 th_iter->inspection = FALSE;
1345 /* now that the corpse has been autopsied, dispose of the thread name */
1346 uthread_cleanup_name(th_iter->uthread);
1347 thread_mtx_unlock(th_iter);
1348 }
1349
1350 thread_terminate_crashed_threads();
1351 /* remove the pending corpse report flag */
1352 task_clear_corpse_pending_report(task);
1353
1354 task_unlock(task);
1355
1356 (void)thread_interrupt_level(wsave);
1357 task_terminate_internal(task);
1358
1359 return kr;
1360 }
1361
1362 /*
1363 * task_terminate:
1364 *
1365 * Terminate the specified task. See comments on thread_terminate
1366 * (kern/thread.c) about problems with terminating the "current task."
1367 */
1368
1369 kern_return_t
1370 task_terminate(
1371 task_t task)
1372 {
1373 if (task == TASK_NULL)
1374 return (KERN_INVALID_ARGUMENT);
1375
1376 if (task->bsd_info)
1377 return (KERN_FAILURE);
1378
1379 return (task_terminate_internal(task));
1380 }
1381
1382 #if MACH_ASSERT
1383 extern int proc_pid(struct proc *);
1384 extern void proc_name_kdp(task_t t, char *buf, int size);
1385 #endif /* MACH_ASSERT */
1386
1387 #define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
1388 static void
1389 __unused task_partial_reap(task_t task, __unused int pid)
1390 {
1391 unsigned int reclaimed_resident = 0;
1392 unsigned int reclaimed_compressed = 0;
1393 uint64_t task_page_count;
1394
1395 task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1396
1397 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
1398 pid, task_page_count, 0, 0, 0);
1399
1400 vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1401
1402 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
1403 pid, reclaimed_resident, reclaimed_compressed, 0, 0);
1404 }
1405
1406 kern_return_t
1407 task_mark_corpse(task_t task)
1408 {
1409 kern_return_t kr = KERN_SUCCESS;
1410 thread_t self_thread;
1411 (void) self_thread;
1412 wait_interrupt_t wsave;
1413
1414 assert(task != kernel_task);
1415 assert(task == current_task());
1416 assert(!task_is_a_corpse(task));
1417
1418 kr = task_collect_crash_info(task);
1419 if (kr != KERN_SUCCESS) {
1420 return kr;
1421 }
1422
1423 self_thread = current_thread();
1424
1425 wsave = thread_interrupt_level(THREAD_UNINT);
1426 task_lock(task);
1427
1428 task_set_corpse_pending_report(task);
1429 task_set_corpse(task);
1430
1431 kr = task_start_halt_locked(task, TRUE);
1432 assert(kr == KERN_SUCCESS);
1433 ipc_task_reset(task);
1434 ipc_task_enable(task);
1435
1436 task_unlock(task);
1437 /* terminate the ipc space */
1438 ipc_space_terminate(task->itk_space);
1439
1440 task_start_halt(task);
1441 thread_terminate_internal(self_thread);
1442 (void) thread_interrupt_level(wsave);
1443 assert(task->halting == TRUE);
1444 return kr;
1445 }
1446
1447 kern_return_t
1448 task_terminate_internal(
1449 task_t task)
1450 {
1451 thread_t thread, self;
1452 task_t self_task;
1453 boolean_t interrupt_save;
1454 int pid = 0;
1455
1456 assert(task != kernel_task);
1457
1458 self = current_thread();
1459 self_task = self->task;
1460
1461 /*
1462 * Get the task locked and make sure that we are not racing
1463 * with someone else trying to terminate us.
1464 */
1465 if (task == self_task)
1466 task_lock(task);
1467 else
1468 if (task < self_task) {
1469 task_lock(task);
1470 task_lock(self_task);
1471 }
1472 else {
1473 task_lock(self_task);
1474 task_lock(task);
1475 }
1476
1477 if (!task->active) {
1478 /*
1479 * Task is already being terminated.
1480 * Just return an error. If we are dying, this will
1481 * just get us to our AST special handler and that
1482 * will get us to finalize the termination of ourselves.
1483 */
1484 task_unlock(task);
1485 if (self_task != task)
1486 task_unlock(self_task);
1487
1488 return (KERN_FAILURE);
1489 }
1490
1491 if (task_corpse_pending_report(task)) {
1492 /*
1493 * Task is marked for reporting as corpse.
1494 * Just return an error. This will
1495 * just get us to our AST special handler and that
1496 * will get us to finish the path to death
1497 */
1498 task_unlock(task);
1499 if (self_task != task)
1500 task_unlock(self_task);
1501
1502 return (KERN_FAILURE);
1503 }
1504
1505 if (self_task != task)
1506 task_unlock(self_task);
1507
1508 /*
1509 * Make sure the current thread does not get aborted out of
1510 * the waits inside these operations.
1511 */
1512 interrupt_save = thread_interrupt_level(THREAD_UNINT);
1513
1514 /*
1515 * Indicate that we want all the threads to stop executing
1516 * at user space by holding the task (we would have held
1517 * each thread independently in thread_terminate_internal -
1518 * but this way we may be more likely to already find it
1519 * held there). Mark the task inactive, and prevent
1520 * further task operations via the task port.
1521 */
1522 task_hold_locked(task);
1523 task->active = FALSE;
1524 ipc_task_disable(task);
1525
1526 #if CONFIG_TELEMETRY
1527 /*
1528 * Notify telemetry that this task is going away.
1529 */
1530 telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
1531 #endif
1532
1533 /*
1534 * Terminate each thread in the task.
1535 */
1536 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1537 thread_terminate_internal(thread);
1538 }
1539
1540 #ifdef MACH_BSD
1541 if (task->bsd_info != NULL) {
1542 pid = proc_pid(task->bsd_info);
1543 }
1544 #endif /* MACH_BSD */
1545
1546 task_unlock(task);
1547
1548 proc_set_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE,
1549 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
1550
1551 /* Early object reap phase */
1552
1553 // PR-17045188: Revisit implementation
1554 // task_partial_reap(task, pid);
1555
1556
1557 /*
1558 * Destroy all synchronizers owned by the task.
1559 */
1560 task_synchronizer_destroy_all(task);
1561
1562 /*
1563 * Destroy the IPC space, leaving just a reference for it.
1564 */
1565 ipc_space_terminate(task->itk_space);
1566
1567 #if 00
1568 /* if some ledgers go negative on tear-down again... */
1569 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1570 task_ledgers.phys_footprint);
1571 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1572 task_ledgers.internal);
1573 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1574 task_ledgers.internal_compressed);
1575 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1576 task_ledgers.iokit_mapped);
1577 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1578 task_ledgers.alternate_accounting);
1579 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1580 task_ledgers.alternate_accounting_compressed);
1581 #endif
1582
1583 /*
1584 * If the current thread is a member of the task
1585 * being terminated, then the last reference to
1586 * the task will not be dropped until the thread
1587 * is finally reaped. To avoid incurring the
1588 * expense of removing the address space regions
1589 * at reap time, we do it explictly here.
1590 */
1591
1592 vm_map_lock(task->map);
1593 vm_map_disable_hole_optimization(task->map);
1594 vm_map_unlock(task->map);
1595
1596 vm_map_remove(task->map,
1597 task->map->min_offset,
1598 task->map->max_offset,
1599 /* no unnesting on final cleanup: */
1600 VM_MAP_REMOVE_NO_UNNESTING);
1601
1602 /* release our shared region */
1603 vm_shared_region_set(task, NULL);
1604
1605
1606 #if MACH_ASSERT
1607 /*
1608 * Identify the pmap's process, in case the pmap ledgers drift
1609 * and we have to report it.
1610 */
1611 char procname[17];
1612 if (task->bsd_info) {
1613 pid = proc_pid(task->bsd_info);
1614 proc_name_kdp(task, procname, sizeof (procname));
1615 } else {
1616 pid = 0;
1617 strlcpy(procname, "<unknown>", sizeof (procname));
1618 }
1619 pmap_set_process(task->map->pmap, pid, procname);
1620 #endif /* MACH_ASSERT */
1621
1622 lck_mtx_lock(&tasks_threads_lock);
1623 queue_remove(&tasks, task, task_t, tasks);
1624 queue_enter(&terminated_tasks, task, task_t, tasks);
1625 tasks_count--;
1626 terminated_tasks_count++;
1627 lck_mtx_unlock(&tasks_threads_lock);
1628
1629 /*
1630 * We no longer need to guard against being aborted, so restore
1631 * the previous interruptible state.
1632 */
1633 thread_interrupt_level(interrupt_save);
1634
1635 #if KPERF
1636 /* force the task to release all ctrs */
1637 if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS)
1638 kpc_force_all_ctrs(task, 0);
1639 #endif
1640
1641 #if CONFIG_COALITIONS
1642 /*
1643 * Leave our coalitions. (drop activation but not reference)
1644 */
1645 coalitions_remove_task(task);
1646 #endif
1647
1648 /*
1649 * Get rid of the task active reference on itself.
1650 */
1651 task_deallocate(task);
1652
1653 return (KERN_SUCCESS);
1654 }
1655
1656 void
1657 tasks_system_suspend(boolean_t suspend)
1658 {
1659 task_t task;
1660
1661 lck_mtx_lock(&tasks_threads_lock);
1662 assert(tasks_suspend_state != suspend);
1663 tasks_suspend_state = suspend;
1664 queue_iterate(&tasks, task, task_t, tasks) {
1665 if (task == kernel_task) {
1666 continue;
1667 }
1668 suspend ? task_suspend_internal(task) : task_resume_internal(task);
1669 }
1670 lck_mtx_unlock(&tasks_threads_lock);
1671 }
1672
1673 /*
1674 * task_start_halt:
1675 *
1676 * Shut the current task down (except for the current thread) in
1677 * preparation for dramatic changes to the task (probably exec).
1678 * We hold the task and mark all other threads in the task for
1679 * termination.
1680 */
1681 kern_return_t
1682 task_start_halt(task_t task)
1683 {
1684 kern_return_t kr = KERN_SUCCESS;
1685 task_lock(task);
1686 kr = task_start_halt_locked(task, FALSE);
1687 task_unlock(task);
1688 return kr;
1689 }
1690
1691 static kern_return_t
1692 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
1693 {
1694 thread_t thread, self;
1695 uint64_t dispatchqueue_offset;
1696
1697 assert(task != kernel_task);
1698
1699 self = current_thread();
1700
1701 if (task != self->task)
1702 return (KERN_INVALID_ARGUMENT);
1703
1704 if (task->halting || !task->active || !self->active) {
1705 /*
1706 * Task or current thread is already being terminated.
1707 * Hurry up and return out of the current kernel context
1708 * so that we run our AST special handler to terminate
1709 * ourselves.
1710 */
1711 return (KERN_FAILURE);
1712 }
1713
1714 task->halting = TRUE;
1715
1716 /*
1717 * Mark all the threads to keep them from starting any more
1718 * user-level execution. The thread_terminate_internal code
1719 * would do this on a thread by thread basis anyway, but this
1720 * gives us a better chance of not having to wait there.
1721 */
1722 task_hold_locked(task);
1723 dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
1724
1725 /*
1726 * Terminate all the other threads in the task.
1727 */
1728 queue_iterate(&task->threads, thread, thread_t, task_threads)
1729 {
1730 if (should_mark_corpse) {
1731 thread_mtx_lock(thread);
1732 thread->inspection = TRUE;
1733 thread_mtx_unlock(thread);
1734 }
1735 if (thread != self)
1736 thread_terminate_internal(thread);
1737 }
1738 task->dispatchqueue_offset = dispatchqueue_offset;
1739
1740 task_release_locked(task);
1741
1742 return KERN_SUCCESS;
1743 }
1744
1745
1746 /*
1747 * task_complete_halt:
1748 *
1749 * Complete task halt by waiting for threads to terminate, then clean
1750 * up task resources (VM, port namespace, etc...) and then let the
1751 * current thread go in the (practically empty) task context.
1752 */
1753 void
1754 task_complete_halt(task_t task)
1755 {
1756 task_lock(task);
1757 assert(task->halting);
1758 assert(task == current_task());
1759
1760 /*
1761 * Wait for the other threads to get shut down.
1762 * When the last other thread is reaped, we'll be
1763 * woken up.
1764 */
1765 if (task->thread_count > 1) {
1766 assert_wait((event_t)&task->halting, THREAD_UNINT);
1767 task_unlock(task);
1768 thread_block(THREAD_CONTINUE_NULL);
1769 } else {
1770 task_unlock(task);
1771 }
1772
1773 /*
1774 * Give the machine dependent code a chance
1775 * to perform cleanup of task-level resources
1776 * associated with the current thread before
1777 * ripping apart the task.
1778 */
1779 machine_task_terminate(task);
1780
1781 /*
1782 * Destroy all synchronizers owned by the task.
1783 */
1784 task_synchronizer_destroy_all(task);
1785
1786 /*
1787 * Destroy the contents of the IPC space, leaving just
1788 * a reference for it.
1789 */
1790 ipc_space_clean(task->itk_space);
1791
1792 /*
1793 * Clean out the address space, as we are going to be
1794 * getting a new one.
1795 */
1796 vm_map_remove(task->map, task->map->min_offset,
1797 task->map->max_offset,
1798 /* no unnesting on final cleanup: */
1799 VM_MAP_REMOVE_NO_UNNESTING);
1800
1801 task->halting = FALSE;
1802 }
1803
1804 /*
1805 * task_hold_locked:
1806 *
1807 * Suspend execution of the specified task.
1808 * This is a recursive-style suspension of the task, a count of
1809 * suspends is maintained.
1810 *
1811 * CONDITIONS: the task is locked and active.
1812 */
1813 void
1814 task_hold_locked(
1815 register task_t task)
1816 {
1817 register thread_t thread;
1818
1819 assert(task->active);
1820
1821 if (task->suspend_count++ > 0)
1822 return;
1823
1824 /*
1825 * Iterate through all the threads and hold them.
1826 */
1827 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1828 thread_mtx_lock(thread);
1829 thread_hold(thread);
1830 thread_mtx_unlock(thread);
1831 }
1832 }
1833
1834 /*
1835 * task_hold:
1836 *
1837 * Same as the internal routine above, except that is must lock
1838 * and verify that the task is active. This differs from task_suspend
1839 * in that it places a kernel hold on the task rather than just a
1840 * user-level hold. This keeps users from over resuming and setting
1841 * it running out from under the kernel.
1842 *
1843 * CONDITIONS: the caller holds a reference on the task
1844 */
1845 kern_return_t
1846 task_hold(
1847 register task_t task)
1848 {
1849 if (task == TASK_NULL)
1850 return (KERN_INVALID_ARGUMENT);
1851
1852 task_lock(task);
1853
1854 if (!task->active) {
1855 task_unlock(task);
1856
1857 return (KERN_FAILURE);
1858 }
1859
1860 task_hold_locked(task);
1861 task_unlock(task);
1862
1863 return (KERN_SUCCESS);
1864 }
1865
1866 kern_return_t
1867 task_wait(
1868 task_t task,
1869 boolean_t until_not_runnable)
1870 {
1871 if (task == TASK_NULL)
1872 return (KERN_INVALID_ARGUMENT);
1873
1874 task_lock(task);
1875
1876 if (!task->active) {
1877 task_unlock(task);
1878
1879 return (KERN_FAILURE);
1880 }
1881
1882 task_wait_locked(task, until_not_runnable);
1883 task_unlock(task);
1884
1885 return (KERN_SUCCESS);
1886 }
1887
1888 /*
1889 * task_wait_locked:
1890 *
1891 * Wait for all threads in task to stop.
1892 *
1893 * Conditions:
1894 * Called with task locked, active, and held.
1895 */
1896 void
1897 task_wait_locked(
1898 register task_t task,
1899 boolean_t until_not_runnable)
1900 {
1901 register thread_t thread, self;
1902
1903 assert(task->active);
1904 assert(task->suspend_count > 0);
1905
1906 self = current_thread();
1907
1908 /*
1909 * Iterate through all the threads and wait for them to
1910 * stop. Do not wait for the current thread if it is within
1911 * the task.
1912 */
1913 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1914 if (thread != self)
1915 thread_wait(thread, until_not_runnable);
1916 }
1917 }
1918
1919 /*
1920 * task_release_locked:
1921 *
1922 * Release a kernel hold on a task.
1923 *
1924 * CONDITIONS: the task is locked and active
1925 */
1926 void
1927 task_release_locked(
1928 register task_t task)
1929 {
1930 register thread_t thread;
1931
1932 assert(task->active);
1933 assert(task->suspend_count > 0);
1934
1935 if (--task->suspend_count > 0)
1936 return;
1937
1938 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1939 thread_mtx_lock(thread);
1940 thread_release(thread);
1941 thread_mtx_unlock(thread);
1942 }
1943 }
1944
1945 /*
1946 * task_release:
1947 *
1948 * Same as the internal routine above, except that it must lock
1949 * and verify that the task is active.
1950 *
1951 * CONDITIONS: The caller holds a reference to the task
1952 */
1953 kern_return_t
1954 task_release(
1955 task_t task)
1956 {
1957 if (task == TASK_NULL)
1958 return (KERN_INVALID_ARGUMENT);
1959
1960 task_lock(task);
1961
1962 if (!task->active) {
1963 task_unlock(task);
1964
1965 return (KERN_FAILURE);
1966 }
1967
1968 task_release_locked(task);
1969 task_unlock(task);
1970
1971 return (KERN_SUCCESS);
1972 }
1973
1974 kern_return_t
1975 task_threads(
1976 task_t task,
1977 thread_act_array_t *threads_out,
1978 mach_msg_type_number_t *count)
1979 {
1980 mach_msg_type_number_t actual;
1981 thread_t *thread_list;
1982 thread_t thread;
1983 vm_size_t size, size_needed;
1984 void *addr;
1985 unsigned int i, j;
1986
1987 if (task == TASK_NULL)
1988 return (KERN_INVALID_ARGUMENT);
1989
1990 size = 0; addr = NULL;
1991
1992 for (;;) {
1993 task_lock(task);
1994 if (!task->active) {
1995 task_unlock(task);
1996
1997 if (size != 0)
1998 kfree(addr, size);
1999
2000 return (KERN_FAILURE);
2001 }
2002
2003 actual = task->thread_count;
2004
2005 /* do we have the memory we need? */
2006 size_needed = actual * sizeof (mach_port_t);
2007 if (size_needed <= size)
2008 break;
2009
2010 /* unlock the task and allocate more memory */
2011 task_unlock(task);
2012
2013 if (size != 0)
2014 kfree(addr, size);
2015
2016 assert(size_needed > 0);
2017 size = size_needed;
2018
2019 addr = kalloc(size);
2020 if (addr == 0)
2021 return (KERN_RESOURCE_SHORTAGE);
2022 }
2023
2024 /* OK, have memory and the task is locked & active */
2025 thread_list = (thread_t *)addr;
2026
2027 i = j = 0;
2028
2029 for (thread = (thread_t)queue_first(&task->threads); i < actual;
2030 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
2031 thread_reference_internal(thread);
2032 thread_list[j++] = thread;
2033 }
2034
2035 assert(queue_end(&task->threads, (queue_entry_t)thread));
2036
2037 actual = j;
2038 size_needed = actual * sizeof (mach_port_t);
2039
2040 /* can unlock task now that we've got the thread refs */
2041 task_unlock(task);
2042
2043 if (actual == 0) {
2044 /* no threads, so return null pointer and deallocate memory */
2045
2046 *threads_out = NULL;
2047 *count = 0;
2048
2049 if (size != 0)
2050 kfree(addr, size);
2051 }
2052 else {
2053 /* if we allocated too much, must copy */
2054
2055 if (size_needed < size) {
2056 void *newaddr;
2057
2058 newaddr = kalloc(size_needed);
2059 if (newaddr == 0) {
2060 for (i = 0; i < actual; ++i)
2061 thread_deallocate(thread_list[i]);
2062 kfree(addr, size);
2063 return (KERN_RESOURCE_SHORTAGE);
2064 }
2065
2066 bcopy(addr, newaddr, size_needed);
2067 kfree(addr, size);
2068 thread_list = (thread_t *)newaddr;
2069 }
2070
2071 *threads_out = thread_list;
2072 *count = actual;
2073
2074 /* do the conversion that Mig should handle */
2075
2076 for (i = 0; i < actual; ++i)
2077 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
2078 }
2079
2080 return (KERN_SUCCESS);
2081 }
2082
2083 #define TASK_HOLD_NORMAL 0
2084 #define TASK_HOLD_PIDSUSPEND 1
2085 #define TASK_HOLD_LEGACY 2
2086 #define TASK_HOLD_LEGACY_ALL 3
2087
2088 static kern_return_t
2089 place_task_hold (
2090 register task_t task,
2091 int mode)
2092 {
2093 if (!task->active) {
2094 return (KERN_FAILURE);
2095 }
2096
2097 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2098 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
2099 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2100 task->user_stop_count, task->user_stop_count + 1, 0);
2101
2102 #if MACH_ASSERT
2103 current_task()->suspends_outstanding++;
2104 #endif
2105
2106 if (mode == TASK_HOLD_LEGACY)
2107 task->legacy_stop_count++;
2108
2109 if (task->user_stop_count++ > 0) {
2110 /*
2111 * If the stop count was positive, the task is
2112 * already stopped and we can exit.
2113 */
2114 return (KERN_SUCCESS);
2115 }
2116
2117 /*
2118 * Put a kernel-level hold on the threads in the task (all
2119 * user-level task suspensions added together represent a
2120 * single kernel-level hold). We then wait for the threads
2121 * to stop executing user code.
2122 */
2123 task_hold_locked(task);
2124 task_wait_locked(task, FALSE);
2125
2126 return (KERN_SUCCESS);
2127 }
2128
2129 static kern_return_t
2130 release_task_hold (
2131 register task_t task,
2132 int mode)
2133 {
2134 register boolean_t release = FALSE;
2135
2136 if (!task->active) {
2137 return (KERN_FAILURE);
2138 }
2139
2140 if (mode == TASK_HOLD_PIDSUSPEND) {
2141 if (task->pidsuspended == FALSE) {
2142 return (KERN_FAILURE);
2143 }
2144 task->pidsuspended = FALSE;
2145 }
2146
2147 if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
2148
2149 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2150 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
2151 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2152 task->user_stop_count, mode, task->legacy_stop_count);
2153
2154 #if MACH_ASSERT
2155 /*
2156 * This is obviously not robust; if we suspend one task and then resume a different one,
2157 * we'll fly under the radar. This is only meant to catch the common case of a crashed
2158 * or buggy suspender.
2159 */
2160 current_task()->suspends_outstanding--;
2161 #endif
2162
2163 if (mode == TASK_HOLD_LEGACY_ALL) {
2164 if (task->legacy_stop_count >= task->user_stop_count) {
2165 task->user_stop_count = 0;
2166 release = TRUE;
2167 } else {
2168 task->user_stop_count -= task->legacy_stop_count;
2169 }
2170 task->legacy_stop_count = 0;
2171 } else {
2172 if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
2173 task->legacy_stop_count--;
2174 if (--task->user_stop_count == 0)
2175 release = TRUE;
2176 }
2177 }
2178 else {
2179 return (KERN_FAILURE);
2180 }
2181
2182 /*
2183 * Release the task if necessary.
2184 */
2185 if (release)
2186 task_release_locked(task);
2187
2188 return (KERN_SUCCESS);
2189 }
2190
2191
2192 /*
2193 * task_suspend:
2194 *
2195 * Implement an (old-fashioned) user-level suspension on a task.
2196 *
2197 * Because the user isn't expecting to have to manage a suspension
2198 * token, we'll track it for him in the kernel in the form of a naked
2199 * send right to the task's resume port. All such send rights
2200 * account for a single suspension against the task (unlike task_suspend2()
2201 * where each caller gets a unique suspension count represented by a
2202 * unique send-once right).
2203 *
2204 * Conditions:
2205 * The caller holds a reference to the task
2206 */
2207 kern_return_t
2208 task_suspend(
2209 register task_t task)
2210 {
2211 kern_return_t kr;
2212 mach_port_t port, send, old_notify;
2213 mach_port_name_t name;
2214
2215 if (task == TASK_NULL || task == kernel_task)
2216 return (KERN_INVALID_ARGUMENT);
2217
2218 task_lock(task);
2219
2220 /*
2221 * Claim a send right on the task resume port, and request a no-senders
2222 * notification on that port (if none outstanding).
2223 */
2224 if (task->itk_resume == IP_NULL) {
2225 task->itk_resume = ipc_port_alloc_kernel();
2226 if (!IP_VALID(task->itk_resume))
2227 panic("failed to create resume port");
2228 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
2229 }
2230
2231 port = task->itk_resume;
2232 ip_lock(port);
2233 assert(ip_active(port));
2234
2235 send = ipc_port_make_send_locked(port);
2236 assert(IP_VALID(send));
2237
2238 if (port->ip_nsrequest == IP_NULL) {
2239 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2240 assert(old_notify == IP_NULL);
2241 /* port unlocked */
2242 } else {
2243 ip_unlock(port);
2244 }
2245
2246 /*
2247 * place a legacy hold on the task.
2248 */
2249 kr = place_task_hold(task, TASK_HOLD_LEGACY);
2250 if (kr != KERN_SUCCESS) {
2251 task_unlock(task);
2252 ipc_port_release_send(send);
2253 return kr;
2254 }
2255
2256 task_unlock(task);
2257
2258 /*
2259 * Copyout the send right into the calling task's IPC space. It won't know it is there,
2260 * but we'll look it up when calling a traditional resume. Any IPC operations that
2261 * deallocate the send right will auto-release the suspension.
2262 */
2263 if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
2264 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
2265 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
2266 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2267 task_pid(task), kr);
2268 return (kr);
2269 }
2270
2271 return (kr);
2272 }
2273
2274 /*
2275 * task_resume:
2276 * Release a user hold on a task.
2277 *
2278 * Conditions:
2279 * The caller holds a reference to the task
2280 */
2281 kern_return_t
2282 task_resume(
2283 register task_t task)
2284 {
2285 kern_return_t kr;
2286 mach_port_name_t resume_port_name;
2287 ipc_entry_t resume_port_entry;
2288 ipc_space_t space = current_task()->itk_space;
2289
2290 if (task == TASK_NULL || task == kernel_task )
2291 return (KERN_INVALID_ARGUMENT);
2292
2293 /* release a legacy task hold */
2294 task_lock(task);
2295 kr = release_task_hold(task, TASK_HOLD_LEGACY);
2296 task_unlock(task);
2297
2298 is_write_lock(space);
2299 if (is_active(space) && IP_VALID(task->itk_resume) &&
2300 ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
2301 /*
2302 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
2303 * we are holding one less legacy hold on the task from this caller. If the release failed,
2304 * go ahead and drop all the rights, as someone either already released our holds or the task
2305 * is gone.
2306 */
2307 if (kr == KERN_SUCCESS)
2308 ipc_right_dealloc(space, resume_port_name, resume_port_entry);
2309 else
2310 ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
2311 /* space unlocked */
2312 } else {
2313 is_write_unlock(space);
2314 if (kr == KERN_SUCCESS)
2315 printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
2316 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2317 task_pid(task));
2318 }
2319
2320 return kr;
2321 }
2322
2323 /*
2324 * Suspend the target task.
2325 * Making/holding a token/reference/port is the callers responsibility.
2326 */
2327 kern_return_t
2328 task_suspend_internal(task_t task)
2329 {
2330 kern_return_t kr;
2331
2332 if (task == TASK_NULL || task == kernel_task)
2333 return (KERN_INVALID_ARGUMENT);
2334
2335 task_lock(task);
2336 kr = place_task_hold(task, TASK_HOLD_NORMAL);
2337 task_unlock(task);
2338 return (kr);
2339 }
2340
2341 /*
2342 * Suspend the target task, and return a suspension token. The token
2343 * represents a reference on the suspended task.
2344 */
2345 kern_return_t
2346 task_suspend2(
2347 register task_t task,
2348 task_suspension_token_t *suspend_token)
2349 {
2350 kern_return_t kr;
2351
2352 kr = task_suspend_internal(task);
2353 if (kr != KERN_SUCCESS) {
2354 *suspend_token = TASK_NULL;
2355 return (kr);
2356 }
2357
2358 /*
2359 * Take a reference on the target task and return that to the caller
2360 * as a "suspension token," which can be converted into an SO right to
2361 * the now-suspended task's resume port.
2362 */
2363 task_reference_internal(task);
2364 *suspend_token = task;
2365
2366 return (KERN_SUCCESS);
2367 }
2368
2369 /*
2370 * Resume the task
2371 * (reference/token/port management is caller's responsibility).
2372 */
2373 kern_return_t
2374 task_resume_internal(
2375 register task_suspension_token_t task)
2376 {
2377 kern_return_t kr;
2378
2379 if (task == TASK_NULL || task == kernel_task)
2380 return (KERN_INVALID_ARGUMENT);
2381
2382 task_lock(task);
2383 kr = release_task_hold(task, TASK_HOLD_NORMAL);
2384 task_unlock(task);
2385 return (kr);
2386 }
2387
2388 /*
2389 * Resume the task using a suspension token. Consumes the token's ref.
2390 */
2391 kern_return_t
2392 task_resume2(
2393 register task_suspension_token_t task)
2394 {
2395 kern_return_t kr;
2396
2397 kr = task_resume_internal(task);
2398 task_suspension_token_deallocate(task);
2399
2400 return (kr);
2401 }
2402
2403 boolean_t
2404 task_suspension_notify(mach_msg_header_t *request_header)
2405 {
2406 ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
2407 task_t task = convert_port_to_task_suspension_token(port);
2408 mach_msg_type_number_t not_count;
2409
2410 if (task == TASK_NULL || task == kernel_task)
2411 return TRUE; /* nothing to do */
2412
2413 switch (request_header->msgh_id) {
2414
2415 case MACH_NOTIFY_SEND_ONCE:
2416 /* release the hold held by this specific send-once right */
2417 task_lock(task);
2418 release_task_hold(task, TASK_HOLD_NORMAL);
2419 task_unlock(task);
2420 break;
2421
2422 case MACH_NOTIFY_NO_SENDERS:
2423 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
2424
2425 task_lock(task);
2426 ip_lock(port);
2427 if (port->ip_mscount == not_count) {
2428
2429 /* release all the [remaining] outstanding legacy holds */
2430 assert(port->ip_nsrequest == IP_NULL);
2431 ip_unlock(port);
2432 release_task_hold(task, TASK_HOLD_LEGACY_ALL);
2433 task_unlock(task);
2434
2435 } else if (port->ip_nsrequest == IP_NULL) {
2436 ipc_port_t old_notify;
2437
2438 task_unlock(task);
2439 /* new send rights, re-arm notification at current make-send count */
2440 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2441 assert(old_notify == IP_NULL);
2442 /* port unlocked */
2443 } else {
2444 ip_unlock(port);
2445 task_unlock(task);
2446 }
2447 break;
2448
2449 default:
2450 break;
2451 }
2452
2453 task_suspension_token_deallocate(task); /* drop token reference */
2454 return TRUE;
2455 }
2456
2457 kern_return_t
2458 task_pidsuspend_locked(task_t task)
2459 {
2460 kern_return_t kr;
2461
2462 if (task->pidsuspended) {
2463 kr = KERN_FAILURE;
2464 goto out;
2465 }
2466
2467 task->pidsuspended = TRUE;
2468
2469 kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
2470 if (kr != KERN_SUCCESS) {
2471 task->pidsuspended = FALSE;
2472 }
2473 out:
2474 return(kr);
2475 }
2476
2477
2478 /*
2479 * task_pidsuspend:
2480 *
2481 * Suspends a task by placing a hold on its threads.
2482 *
2483 * Conditions:
2484 * The caller holds a reference to the task
2485 */
2486 kern_return_t
2487 task_pidsuspend(
2488 register task_t task)
2489 {
2490 kern_return_t kr;
2491
2492 if (task == TASK_NULL || task == kernel_task)
2493 return (KERN_INVALID_ARGUMENT);
2494
2495 task_lock(task);
2496
2497 kr = task_pidsuspend_locked(task);
2498
2499 task_unlock(task);
2500
2501 return (kr);
2502 }
2503
2504 /* If enabled, we bring all the frozen pages back in prior to resumption; otherwise, they're faulted back in on demand */
2505 #define THAW_ON_RESUME 1
2506
2507 /*
2508 * task_pidresume:
2509 * Resumes a previously suspended task.
2510 *
2511 * Conditions:
2512 * The caller holds a reference to the task
2513 */
2514 kern_return_t
2515 task_pidresume(
2516 register task_t task)
2517 {
2518 kern_return_t kr;
2519
2520 if (task == TASK_NULL || task == kernel_task)
2521 return (KERN_INVALID_ARGUMENT);
2522
2523 task_lock(task);
2524
2525 #if (CONFIG_FREEZE && THAW_ON_RESUME)
2526
2527 while (task->changing_freeze_state) {
2528
2529 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2530 task_unlock(task);
2531 thread_block(THREAD_CONTINUE_NULL);
2532
2533 task_lock(task);
2534 }
2535 task->changing_freeze_state = TRUE;
2536 #endif
2537
2538 kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
2539
2540 task_unlock(task);
2541
2542 #if (CONFIG_FREEZE && THAW_ON_RESUME)
2543 if ((kr == KERN_SUCCESS) && (task->frozen == TRUE)) {
2544
2545 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2546
2547 kr = KERN_SUCCESS;
2548 } else {
2549
2550 kr = vm_map_thaw(task->map);
2551 }
2552 }
2553 task_lock(task);
2554
2555 if (kr == KERN_SUCCESS)
2556 task->frozen = FALSE;
2557 task->changing_freeze_state = FALSE;
2558 thread_wakeup(&task->changing_freeze_state);
2559
2560 task_unlock(task);
2561 #endif
2562
2563 return (kr);
2564 }
2565
2566 #if CONFIG_FREEZE
2567
2568 /*
2569 * task_freeze:
2570 *
2571 * Freeze a task.
2572 *
2573 * Conditions:
2574 * The caller holds a reference to the task
2575 */
2576 extern void vm_wake_compactor_swapper();
2577 extern queue_head_t c_swapout_list_head;
2578
2579 kern_return_t
2580 task_freeze(
2581 register task_t task,
2582 uint32_t *purgeable_count,
2583 uint32_t *wired_count,
2584 uint32_t *clean_count,
2585 uint32_t *dirty_count,
2586 uint32_t dirty_budget,
2587 boolean_t *shared,
2588 boolean_t walk_only)
2589 {
2590 kern_return_t kr;
2591
2592 if (task == TASK_NULL || task == kernel_task)
2593 return (KERN_INVALID_ARGUMENT);
2594
2595 task_lock(task);
2596
2597 while (task->changing_freeze_state) {
2598
2599 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2600 task_unlock(task);
2601 thread_block(THREAD_CONTINUE_NULL);
2602
2603 task_lock(task);
2604 }
2605 if (task->frozen) {
2606 task_unlock(task);
2607 return (KERN_FAILURE);
2608 }
2609 task->changing_freeze_state = TRUE;
2610
2611 task_unlock(task);
2612
2613 if (walk_only) {
2614 kr = vm_map_freeze_walk(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2615 } else {
2616 kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2617 }
2618
2619 task_lock(task);
2620
2621 if (walk_only == FALSE && kr == KERN_SUCCESS)
2622 task->frozen = TRUE;
2623 task->changing_freeze_state = FALSE;
2624 thread_wakeup(&task->changing_freeze_state);
2625
2626 task_unlock(task);
2627
2628 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2629 vm_wake_compactor_swapper();
2630 /*
2631 * We do an explicit wakeup of the swapout thread here
2632 * because the compact_and_swap routines don't have
2633 * knowledge about these kind of "per-task packed c_segs"
2634 * and so will not be evaluating whether we need to do
2635 * a wakeup there.
2636 */
2637 thread_wakeup((event_t)&c_swapout_list_head);
2638 }
2639
2640 return (kr);
2641 }
2642
2643 /*
2644 * task_thaw:
2645 *
2646 * Thaw a currently frozen task.
2647 *
2648 * Conditions:
2649 * The caller holds a reference to the task
2650 */
2651 kern_return_t
2652 task_thaw(
2653 register task_t task)
2654 {
2655 kern_return_t kr;
2656
2657 if (task == TASK_NULL || task == kernel_task)
2658 return (KERN_INVALID_ARGUMENT);
2659
2660 task_lock(task);
2661
2662 while (task->changing_freeze_state) {
2663
2664 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2665 task_unlock(task);
2666 thread_block(THREAD_CONTINUE_NULL);
2667
2668 task_lock(task);
2669 }
2670 if (!task->frozen) {
2671 task_unlock(task);
2672 return (KERN_FAILURE);
2673 }
2674 task->changing_freeze_state = TRUE;
2675
2676 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) {
2677 task_unlock(task);
2678
2679 kr = vm_map_thaw(task->map);
2680
2681 task_lock(task);
2682
2683 if (kr == KERN_SUCCESS)
2684 task->frozen = FALSE;
2685 } else {
2686 task->frozen = FALSE;
2687 kr = KERN_SUCCESS;
2688 }
2689
2690 task->changing_freeze_state = FALSE;
2691 thread_wakeup(&task->changing_freeze_state);
2692
2693 task_unlock(task);
2694
2695 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2696 vm_wake_compactor_swapper();
2697 }
2698
2699 return (kr);
2700 }
2701
2702 #endif /* CONFIG_FREEZE */
2703
2704 kern_return_t
2705 host_security_set_task_token(
2706 host_security_t host_security,
2707 task_t task,
2708 security_token_t sec_token,
2709 audit_token_t audit_token,
2710 host_priv_t host_priv)
2711 {
2712 ipc_port_t host_port;
2713 kern_return_t kr;
2714
2715 if (task == TASK_NULL)
2716 return(KERN_INVALID_ARGUMENT);
2717
2718 if (host_security == HOST_NULL)
2719 return(KERN_INVALID_SECURITY);
2720
2721 task_lock(task);
2722 task->sec_token = sec_token;
2723 task->audit_token = audit_token;
2724
2725 task_unlock(task);
2726
2727 if (host_priv != HOST_PRIV_NULL) {
2728 kr = host_get_host_priv_port(host_priv, &host_port);
2729 } else {
2730 kr = host_get_host_port(host_priv_self(), &host_port);
2731 }
2732 assert(kr == KERN_SUCCESS);
2733 kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
2734 return(kr);
2735 }
2736
2737 kern_return_t
2738 task_send_trace_memory(
2739 task_t target_task,
2740 __unused uint32_t pid,
2741 __unused uint64_t uniqueid)
2742 {
2743 kern_return_t kr = KERN_INVALID_ARGUMENT;
2744 if (target_task == TASK_NULL)
2745 return (KERN_INVALID_ARGUMENT);
2746
2747 #if CONFIG_ATM
2748 kr = atm_send_proc_inspect_notification(target_task,
2749 pid,
2750 uniqueid);
2751
2752 #endif
2753 return (kr);
2754 }
2755 /*
2756 * This routine was added, pretty much exclusively, for registering the
2757 * RPC glue vector for in-kernel short circuited tasks. Rather than
2758 * removing it completely, I have only disabled that feature (which was
2759 * the only feature at the time). It just appears that we are going to
2760 * want to add some user data to tasks in the future (i.e. bsd info,
2761 * task names, etc...), so I left it in the formal task interface.
2762 */
2763 kern_return_t
2764 task_set_info(
2765 task_t task,
2766 task_flavor_t flavor,
2767 __unused task_info_t task_info_in, /* pointer to IN array */
2768 __unused mach_msg_type_number_t task_info_count)
2769 {
2770 if (task == TASK_NULL)
2771 return(KERN_INVALID_ARGUMENT);
2772
2773 switch (flavor) {
2774
2775 #if CONFIG_ATM
2776 case TASK_TRACE_MEMORY_INFO:
2777 {
2778 if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
2779 return (KERN_INVALID_ARGUMENT);
2780
2781 assert(task_info_in != NULL);
2782 task_trace_memory_info_t mem_info;
2783 mem_info = (task_trace_memory_info_t) task_info_in;
2784 kern_return_t kr = atm_register_trace_memory(task,
2785 mem_info->user_memory_address,
2786 mem_info->buffer_size);
2787 return kr;
2788 break;
2789 }
2790
2791 #endif
2792 default:
2793 return (KERN_INVALID_ARGUMENT);
2794 }
2795 return (KERN_SUCCESS);
2796 }
2797
2798 int radar_20146450 = 1;
2799 kern_return_t
2800 task_info(
2801 task_t task,
2802 task_flavor_t flavor,
2803 task_info_t task_info_out,
2804 mach_msg_type_number_t *task_info_count)
2805 {
2806 kern_return_t error = KERN_SUCCESS;
2807
2808 if (task == TASK_NULL)
2809 return (KERN_INVALID_ARGUMENT);
2810
2811 task_lock(task);
2812
2813 if ((task != current_task()) && (!task->active)) {
2814 task_unlock(task);
2815 return (KERN_INVALID_ARGUMENT);
2816 }
2817
2818 switch (flavor) {
2819
2820 case TASK_BASIC_INFO_32:
2821 case TASK_BASIC2_INFO_32:
2822 {
2823 task_basic_info_32_t basic_info;
2824 vm_map_t map;
2825 clock_sec_t secs;
2826 clock_usec_t usecs;
2827
2828 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
2829 error = KERN_INVALID_ARGUMENT;
2830 break;
2831 }
2832
2833 basic_info = (task_basic_info_32_t)task_info_out;
2834
2835 map = (task == kernel_task)? kernel_map: task->map;
2836 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
2837 if (flavor == TASK_BASIC2_INFO_32) {
2838 /*
2839 * The "BASIC2" flavor gets the maximum resident
2840 * size instead of the current resident size...
2841 */
2842 basic_info->resident_size = pmap_resident_max(map->pmap);
2843 } else {
2844 basic_info->resident_size = pmap_resident_count(map->pmap);
2845 }
2846 basic_info->resident_size *= PAGE_SIZE;
2847
2848 basic_info->policy = ((task != kernel_task)?
2849 POLICY_TIMESHARE: POLICY_RR);
2850 basic_info->suspend_count = task->user_stop_count;
2851
2852 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2853 basic_info->user_time.seconds =
2854 (typeof(basic_info->user_time.seconds))secs;
2855 basic_info->user_time.microseconds = usecs;
2856
2857 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2858 basic_info->system_time.seconds =
2859 (typeof(basic_info->system_time.seconds))secs;
2860 basic_info->system_time.microseconds = usecs;
2861
2862 *task_info_count = TASK_BASIC_INFO_32_COUNT;
2863 break;
2864 }
2865
2866 case TASK_BASIC_INFO_64:
2867 {
2868 task_basic_info_64_t basic_info;
2869 vm_map_t map;
2870 clock_sec_t secs;
2871 clock_usec_t usecs;
2872
2873 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
2874 error = KERN_INVALID_ARGUMENT;
2875 break;
2876 }
2877
2878 basic_info = (task_basic_info_64_t)task_info_out;
2879
2880 map = (task == kernel_task)? kernel_map: task->map;
2881 basic_info->virtual_size = map->size;
2882 basic_info->resident_size =
2883 (mach_vm_size_t)(pmap_resident_count(map->pmap))
2884 * PAGE_SIZE_64;
2885
2886 basic_info->policy = ((task != kernel_task)?
2887 POLICY_TIMESHARE: POLICY_RR);
2888 basic_info->suspend_count = task->user_stop_count;
2889
2890 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2891 basic_info->user_time.seconds =
2892 (typeof(basic_info->user_time.seconds))secs;
2893 basic_info->user_time.microseconds = usecs;
2894
2895 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2896 basic_info->system_time.seconds =
2897 (typeof(basic_info->system_time.seconds))secs;
2898 basic_info->system_time.microseconds = usecs;
2899
2900 *task_info_count = TASK_BASIC_INFO_64_COUNT;
2901 break;
2902 }
2903
2904 case MACH_TASK_BASIC_INFO:
2905 {
2906 mach_task_basic_info_t basic_info;
2907 vm_map_t map;
2908 clock_sec_t secs;
2909 clock_usec_t usecs;
2910
2911 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
2912 error = KERN_INVALID_ARGUMENT;
2913 break;
2914 }
2915
2916 basic_info = (mach_task_basic_info_t)task_info_out;
2917
2918 map = (task == kernel_task) ? kernel_map : task->map;
2919
2920 basic_info->virtual_size = map->size;
2921
2922 basic_info->resident_size =
2923 (mach_vm_size_t)(pmap_resident_count(map->pmap));
2924 basic_info->resident_size *= PAGE_SIZE_64;
2925
2926 basic_info->resident_size_max =
2927 (mach_vm_size_t)(pmap_resident_max(map->pmap));
2928 basic_info->resident_size_max *= PAGE_SIZE_64;
2929
2930 basic_info->policy = ((task != kernel_task) ?
2931 POLICY_TIMESHARE : POLICY_RR);
2932
2933 basic_info->suspend_count = task->user_stop_count;
2934
2935 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2936 basic_info->user_time.seconds =
2937 (typeof(basic_info->user_time.seconds))secs;
2938 basic_info->user_time.microseconds = usecs;
2939
2940 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2941 basic_info->system_time.seconds =
2942 (typeof(basic_info->system_time.seconds))secs;
2943 basic_info->system_time.microseconds = usecs;
2944
2945 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
2946 break;
2947 }
2948
2949 case TASK_THREAD_TIMES_INFO:
2950 {
2951 register task_thread_times_info_t times_info;
2952 register thread_t thread;
2953
2954 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
2955 error = KERN_INVALID_ARGUMENT;
2956 break;
2957 }
2958
2959 times_info = (task_thread_times_info_t) task_info_out;
2960 times_info->user_time.seconds = 0;
2961 times_info->user_time.microseconds = 0;
2962 times_info->system_time.seconds = 0;
2963 times_info->system_time.microseconds = 0;
2964
2965
2966 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2967 time_value_t user_time, system_time;
2968
2969 if (thread->options & TH_OPT_IDLE_THREAD)
2970 continue;
2971
2972 thread_read_times(thread, &user_time, &system_time);
2973
2974 time_value_add(&times_info->user_time, &user_time);
2975 time_value_add(&times_info->system_time, &system_time);
2976 }
2977
2978 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
2979 break;
2980 }
2981
2982 case TASK_ABSOLUTETIME_INFO:
2983 {
2984 task_absolutetime_info_t info;
2985 register thread_t thread;
2986
2987 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
2988 error = KERN_INVALID_ARGUMENT;
2989 break;
2990 }
2991
2992 info = (task_absolutetime_info_t)task_info_out;
2993 info->threads_user = info->threads_system = 0;
2994
2995
2996 info->total_user = task->total_user_time;
2997 info->total_system = task->total_system_time;
2998
2999 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3000 uint64_t tval;
3001 spl_t x;
3002
3003 if (thread->options & TH_OPT_IDLE_THREAD)
3004 continue;
3005
3006 x = splsched();
3007 thread_lock(thread);
3008
3009 tval = timer_grab(&thread->user_timer);
3010 info->threads_user += tval;
3011 info->total_user += tval;
3012
3013 tval = timer_grab(&thread->system_timer);
3014 if (thread->precise_user_kernel_time) {
3015 info->threads_system += tval;
3016 info->total_system += tval;
3017 } else {
3018 /* system_timer may represent either sys or user */
3019 info->threads_user += tval;
3020 info->total_user += tval;
3021 }
3022
3023 thread_unlock(thread);
3024 splx(x);
3025 }
3026
3027
3028 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
3029 break;
3030 }
3031
3032 case TASK_DYLD_INFO:
3033 {
3034 task_dyld_info_t info;
3035
3036 /*
3037 * We added the format field to TASK_DYLD_INFO output. For
3038 * temporary backward compatibility, accept the fact that
3039 * clients may ask for the old version - distinquished by the
3040 * size of the expected result structure.
3041 */
3042 #define TASK_LEGACY_DYLD_INFO_COUNT \
3043 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
3044
3045 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
3046 error = KERN_INVALID_ARGUMENT;
3047 break;
3048 }
3049
3050 info = (task_dyld_info_t)task_info_out;
3051 info->all_image_info_addr = task->all_image_info_addr;
3052 info->all_image_info_size = task->all_image_info_size;
3053
3054 /* only set format on output for those expecting it */
3055 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
3056 info->all_image_info_format = task_has_64BitAddr(task) ?
3057 TASK_DYLD_ALL_IMAGE_INFO_64 :
3058 TASK_DYLD_ALL_IMAGE_INFO_32 ;
3059 *task_info_count = TASK_DYLD_INFO_COUNT;
3060 } else {
3061 *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
3062 }
3063 break;
3064 }
3065
3066 case TASK_EXTMOD_INFO:
3067 {
3068 task_extmod_info_t info;
3069 void *p;
3070
3071 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
3072 error = KERN_INVALID_ARGUMENT;
3073 break;
3074 }
3075
3076 info = (task_extmod_info_t)task_info_out;
3077
3078 p = get_bsdtask_info(task);
3079 if (p) {
3080 proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
3081 } else {
3082 bzero(info->task_uuid, sizeof(info->task_uuid));
3083 }
3084 info->extmod_statistics = task->extmod_statistics;
3085 *task_info_count = TASK_EXTMOD_INFO_COUNT;
3086
3087 break;
3088 }
3089
3090 case TASK_KERNELMEMORY_INFO:
3091 {
3092 task_kernelmemory_info_t tkm_info;
3093 ledger_amount_t credit, debit;
3094
3095 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
3096 error = KERN_INVALID_ARGUMENT;
3097 break;
3098 }
3099
3100 tkm_info = (task_kernelmemory_info_t) task_info_out;
3101 tkm_info->total_palloc = 0;
3102 tkm_info->total_pfree = 0;
3103 tkm_info->total_salloc = 0;
3104 tkm_info->total_sfree = 0;
3105
3106 if (task == kernel_task) {
3107 /*
3108 * All shared allocs/frees from other tasks count against
3109 * the kernel private memory usage. If we are looking up
3110 * info for the kernel task, gather from everywhere.
3111 */
3112 task_unlock(task);
3113
3114 /* start by accounting for all the terminated tasks against the kernel */
3115 tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
3116 tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
3117
3118 /* count all other task/thread shared alloc/free against the kernel */
3119 lck_mtx_lock(&tasks_threads_lock);
3120
3121 /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
3122 queue_iterate(&tasks, task, task_t, tasks) {
3123 if (task == kernel_task) {
3124 if (ledger_get_entries(task->ledger,
3125 task_ledgers.tkm_private, &credit,
3126 &debit) == KERN_SUCCESS) {
3127 tkm_info->total_palloc += credit;
3128 tkm_info->total_pfree += debit;
3129 }
3130 }
3131 if (!ledger_get_entries(task->ledger,
3132 task_ledgers.tkm_shared, &credit, &debit)) {
3133 tkm_info->total_palloc += credit;
3134 tkm_info->total_pfree += debit;
3135 }
3136 }
3137 lck_mtx_unlock(&tasks_threads_lock);
3138 } else {
3139 if (!ledger_get_entries(task->ledger,
3140 task_ledgers.tkm_private, &credit, &debit)) {
3141 tkm_info->total_palloc = credit;
3142 tkm_info->total_pfree = debit;
3143 }
3144 if (!ledger_get_entries(task->ledger,
3145 task_ledgers.tkm_shared, &credit, &debit)) {
3146 tkm_info->total_salloc = credit;
3147 tkm_info->total_sfree = debit;
3148 }
3149 task_unlock(task);
3150 }
3151
3152 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
3153 return KERN_SUCCESS;
3154 }
3155
3156 /* OBSOLETE */
3157 case TASK_SCHED_FIFO_INFO:
3158 {
3159
3160 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
3161 error = KERN_INVALID_ARGUMENT;
3162 break;
3163 }
3164
3165 error = KERN_INVALID_POLICY;
3166 break;
3167 }
3168
3169 /* OBSOLETE */
3170 case TASK_SCHED_RR_INFO:
3171 {
3172 register policy_rr_base_t rr_base;
3173 uint32_t quantum_time;
3174 uint64_t quantum_ns;
3175
3176 if (*task_info_count < POLICY_RR_BASE_COUNT) {
3177 error = KERN_INVALID_ARGUMENT;
3178 break;
3179 }
3180
3181 rr_base = (policy_rr_base_t) task_info_out;
3182
3183 if (task != kernel_task) {
3184 error = KERN_INVALID_POLICY;
3185 break;
3186 }
3187
3188 rr_base->base_priority = task->priority;
3189
3190 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
3191 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
3192
3193 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
3194
3195 *task_info_count = POLICY_RR_BASE_COUNT;
3196 break;
3197 }
3198
3199 /* OBSOLETE */
3200 case TASK_SCHED_TIMESHARE_INFO:
3201 {
3202 register policy_timeshare_base_t ts_base;
3203
3204 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
3205 error = KERN_INVALID_ARGUMENT;
3206 break;
3207 }
3208
3209 ts_base = (policy_timeshare_base_t) task_info_out;
3210
3211 if (task == kernel_task) {
3212 error = KERN_INVALID_POLICY;
3213 break;
3214 }
3215
3216 ts_base->base_priority = task->priority;
3217
3218 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
3219 break;
3220 }
3221
3222 case TASK_SECURITY_TOKEN:
3223 {
3224 register security_token_t *sec_token_p;
3225
3226 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
3227 error = KERN_INVALID_ARGUMENT;
3228 break;
3229 }
3230
3231 sec_token_p = (security_token_t *) task_info_out;
3232
3233 *sec_token_p = task->sec_token;
3234
3235 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
3236 break;
3237 }
3238
3239 case TASK_AUDIT_TOKEN:
3240 {
3241 register audit_token_t *audit_token_p;
3242
3243 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
3244 error = KERN_INVALID_ARGUMENT;
3245 break;
3246 }
3247
3248 audit_token_p = (audit_token_t *) task_info_out;
3249
3250 *audit_token_p = task->audit_token;
3251
3252 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
3253 break;
3254 }
3255
3256 case TASK_SCHED_INFO:
3257 error = KERN_INVALID_ARGUMENT;
3258 break;
3259
3260 case TASK_EVENTS_INFO:
3261 {
3262 register task_events_info_t events_info;
3263 register thread_t thread;
3264
3265 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
3266 error = KERN_INVALID_ARGUMENT;
3267 break;
3268 }
3269
3270 events_info = (task_events_info_t) task_info_out;
3271
3272
3273 events_info->faults = task->faults;
3274 events_info->pageins = task->pageins;
3275 events_info->cow_faults = task->cow_faults;
3276 events_info->messages_sent = task->messages_sent;
3277 events_info->messages_received = task->messages_received;
3278 events_info->syscalls_mach = task->syscalls_mach;
3279 events_info->syscalls_unix = task->syscalls_unix;
3280
3281 events_info->csw = task->c_switch;
3282
3283 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3284 events_info->csw += thread->c_switch;
3285 events_info->syscalls_mach += thread->syscalls_mach;
3286 events_info->syscalls_unix += thread->syscalls_unix;
3287 }
3288
3289
3290 *task_info_count = TASK_EVENTS_INFO_COUNT;
3291 break;
3292 }
3293 case TASK_AFFINITY_TAG_INFO:
3294 {
3295 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
3296 error = KERN_INVALID_ARGUMENT;
3297 break;
3298 }
3299
3300 error = task_affinity_info(task, task_info_out, task_info_count);
3301 break;
3302 }
3303 case TASK_POWER_INFO:
3304 {
3305 if (*task_info_count < TASK_POWER_INFO_COUNT) {
3306 error = KERN_INVALID_ARGUMENT;
3307 break;
3308 }
3309
3310 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL);
3311 break;
3312 }
3313
3314 case TASK_POWER_INFO_V2:
3315 {
3316 if (*task_info_count < TASK_POWER_INFO_V2_COUNT) {
3317 error = KERN_INVALID_ARGUMENT;
3318 break;
3319 }
3320 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
3321 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy);
3322 break;
3323 }
3324
3325 case TASK_VM_INFO:
3326 case TASK_VM_INFO_PURGEABLE:
3327 {
3328 task_vm_info_t vm_info;
3329 vm_map_t map;
3330
3331 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
3332 error = KERN_INVALID_ARGUMENT;
3333 break;
3334 }
3335
3336 vm_info = (task_vm_info_t)task_info_out;
3337
3338 if (task == kernel_task) {
3339 map = kernel_map;
3340 /* no lock */
3341 } else {
3342 map = task->map;
3343 vm_map_lock_read(map);
3344 }
3345
3346 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
3347 vm_info->region_count = map->hdr.nentries;
3348 vm_info->page_size = vm_map_page_size(map);
3349
3350 vm_info->resident_size = pmap_resident_count(map->pmap);
3351 vm_info->resident_size *= PAGE_SIZE;
3352 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
3353 vm_info->resident_size_peak *= PAGE_SIZE;
3354
3355 #define _VM_INFO(_name) \
3356 vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
3357
3358 _VM_INFO(device);
3359 _VM_INFO(device_peak);
3360 _VM_INFO(external);
3361 _VM_INFO(external_peak);
3362 _VM_INFO(internal);
3363 _VM_INFO(internal_peak);
3364 _VM_INFO(reusable);
3365 _VM_INFO(reusable_peak);
3366 _VM_INFO(compressed);
3367 _VM_INFO(compressed_peak);
3368 _VM_INFO(compressed_lifetime);
3369
3370 vm_info->purgeable_volatile_pmap = 0;
3371 vm_info->purgeable_volatile_resident = 0;
3372 vm_info->purgeable_volatile_virtual = 0;
3373 if (task == kernel_task) {
3374 /*
3375 * We do not maintain the detailed stats for the
3376 * kernel_pmap, so just count everything as
3377 * "internal"...
3378 */
3379 vm_info->internal = vm_info->resident_size;
3380 /*
3381 * ... but since the memory held by the VM compressor
3382 * in the kernel address space ought to be attributed
3383 * to user-space tasks, we subtract it from "internal"
3384 * to give memory reporting tools a more accurate idea
3385 * of what the kernel itself is actually using, instead
3386 * of making it look like the kernel is leaking memory
3387 * when the system is under memory pressure.
3388 */
3389 vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
3390 PAGE_SIZE);
3391 } else {
3392 mach_vm_size_t volatile_virtual_size;
3393 mach_vm_size_t volatile_resident_size;
3394 mach_vm_size_t volatile_compressed_size;
3395 mach_vm_size_t volatile_pmap_size;
3396 mach_vm_size_t volatile_compressed_pmap_size;
3397 kern_return_t kr;
3398
3399 if (flavor == TASK_VM_INFO_PURGEABLE) {
3400 kr = vm_map_query_volatile(
3401 map,
3402 &volatile_virtual_size,
3403 &volatile_resident_size,
3404 &volatile_compressed_size,
3405 &volatile_pmap_size,
3406 &volatile_compressed_pmap_size);
3407 if (kr == KERN_SUCCESS) {
3408 vm_info->purgeable_volatile_pmap =
3409 volatile_pmap_size;
3410 if (radar_20146450) {
3411 vm_info->compressed -=
3412 volatile_compressed_pmap_size;
3413 }
3414 vm_info->purgeable_volatile_resident =
3415 volatile_resident_size;
3416 vm_info->purgeable_volatile_virtual =
3417 volatile_virtual_size;
3418 }
3419 }
3420 vm_map_unlock_read(map);
3421 }
3422
3423 if (*task_info_count >= TASK_VM_INFO_COUNT) {
3424 vm_info->phys_footprint = 0;
3425 *task_info_count = TASK_VM_INFO_COUNT;
3426 } else {
3427 *task_info_count = TASK_VM_INFO_REV0_COUNT;
3428 }
3429
3430 break;
3431 }
3432
3433 case TASK_WAIT_STATE_INFO:
3434 {
3435 /*
3436 * Deprecated flavor. Currently allowing some results until all users
3437 * stop calling it. The results may not be accurate.
3438 */
3439 task_wait_state_info_t wait_state_info;
3440 uint64_t total_sfi_ledger_val = 0;
3441
3442 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
3443 error = KERN_INVALID_ARGUMENT;
3444 break;
3445 }
3446
3447 wait_state_info = (task_wait_state_info_t) task_info_out;
3448
3449 wait_state_info->total_wait_state_time = 0;
3450 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
3451
3452 #if CONFIG_SCHED_SFI
3453 int i, prev_lentry = -1;
3454 int64_t val_credit, val_debit;
3455
3456 for (i = 0; i < MAX_SFI_CLASS_ID; i++){
3457 val_credit =0;
3458 /*
3459 * checking with prev_lentry != entry ensures adjacent classes
3460 * which share the same ledger do not add wait times twice.
3461 * Note: Use ledger() call to get data for each individual sfi class.
3462 */
3463 if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
3464 KERN_SUCCESS == ledger_get_entries(task->ledger,
3465 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
3466 total_sfi_ledger_val += val_credit;
3467 }
3468 prev_lentry = task_ledgers.sfi_wait_times[i];
3469 }
3470
3471 #endif /* CONFIG_SCHED_SFI */
3472 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
3473 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
3474
3475 break;
3476 }
3477 case TASK_VM_INFO_PURGEABLE_ACCOUNT:
3478 {
3479 #if DEVELOPMENT || DEBUG
3480 pvm_account_info_t acnt_info;
3481
3482 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
3483 error = KERN_INVALID_ARGUMENT;
3484 break;
3485 }
3486
3487 if (task_info_out == NULL) {
3488 error = KERN_INVALID_ARGUMENT;
3489 break;
3490 }
3491
3492 acnt_info = (pvm_account_info_t) task_info_out;
3493
3494 error = vm_purgeable_account(task, acnt_info);
3495
3496 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
3497
3498 break;
3499 #else /* DEVELOPMENT || DEBUG */
3500 error = KERN_NOT_SUPPORTED;
3501 break;
3502 #endif /* DEVELOPMENT || DEBUG */
3503 }
3504 case TASK_FLAGS_INFO:
3505 {
3506 task_flags_info_t flags_info;
3507
3508 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
3509 error = KERN_INVALID_ARGUMENT;
3510 break;
3511 }
3512
3513 flags_info = (task_flags_info_t)task_info_out;
3514
3515 /* only publish the 64-bit flag of the task */
3516 flags_info->flags = task->t_flags & TF_64B_ADDR;
3517
3518 *task_info_count = TASK_FLAGS_INFO_COUNT;
3519 break;
3520 }
3521
3522 case TASK_DEBUG_INFO_INTERNAL:
3523 {
3524 #if DEVELOPMENT || DEBUG
3525 task_debug_info_internal_t dbg_info;
3526 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
3527 error = KERN_NOT_SUPPORTED;
3528 break;
3529 }
3530
3531 if (task_info_out == NULL) {
3532 error = KERN_INVALID_ARGUMENT;
3533 break;
3534 }
3535 dbg_info = (task_debug_info_internal_t) task_info_out;
3536 dbg_info->ipc_space_size = 0;
3537 if (task->itk_space){
3538 dbg_info->ipc_space_size = task->itk_space->is_table_size;
3539 }
3540
3541 error = KERN_SUCCESS;
3542 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
3543 break;
3544 #else /* DEVELOPMENT || DEBUG */
3545 error = KERN_NOT_SUPPORTED;
3546 break;
3547 #endif /* DEVELOPMENT || DEBUG */
3548 }
3549 default:
3550 error = KERN_INVALID_ARGUMENT;
3551 }
3552
3553 task_unlock(task);
3554 return (error);
3555 }
3556
3557 /*
3558 * task_power_info
3559 *
3560 * Returns power stats for the task.
3561 * Note: Called with task locked.
3562 */
3563 void
3564 task_power_info_locked(
3565 task_t task,
3566 task_power_info_t info,
3567 gpu_energy_data_t ginfo)
3568 {
3569 thread_t thread;
3570 ledger_amount_t tmp;
3571
3572 task_lock_assert_owned(task);
3573
3574 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
3575 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
3576 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
3577 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
3578
3579 info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
3580 info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
3581
3582 info->total_user = task->total_user_time;
3583 info->total_system = task->total_system_time;
3584
3585 if (ginfo) {
3586 ginfo->task_gpu_utilisation = task->task_gpu_ns;
3587 }
3588
3589 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3590 uint64_t tval;
3591 spl_t x;
3592
3593 if (thread->options & TH_OPT_IDLE_THREAD)
3594 continue;
3595
3596 x = splsched();
3597 thread_lock(thread);
3598
3599 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
3600 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
3601
3602 tval = timer_grab(&thread->user_timer);
3603 info->total_user += tval;
3604
3605 tval = timer_grab(&thread->system_timer);
3606 if (thread->precise_user_kernel_time) {
3607 info->total_system += tval;
3608 } else {
3609 /* system_timer may represent either sys or user */
3610 info->total_user += tval;
3611 }
3612
3613 if (ginfo) {
3614 ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
3615 }
3616 thread_unlock(thread);
3617 splx(x);
3618 }
3619 }
3620
3621 /*
3622 * task_gpu_utilisation
3623 *
3624 * Returns the total gpu time used by the all the threads of the task
3625 * (both dead and alive)
3626 */
3627 uint64_t
3628 task_gpu_utilisation(
3629 task_t task)
3630 {
3631 uint64_t gpu_time = 0;
3632 thread_t thread;
3633
3634 task_lock(task);
3635 gpu_time += task->task_gpu_ns;
3636
3637 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3638 spl_t x;
3639 x = splsched();
3640 thread_lock(thread);
3641 gpu_time += ml_gpu_stat(thread);
3642 thread_unlock(thread);
3643 splx(x);
3644 }
3645
3646 task_unlock(task);
3647 return gpu_time;
3648 }
3649
3650 kern_return_t
3651 task_purgable_info(
3652 task_t task,
3653 task_purgable_info_t *stats)
3654 {
3655 if (task == TASK_NULL || stats == NULL)
3656 return KERN_INVALID_ARGUMENT;
3657 /* Take task reference */
3658 task_reference(task);
3659 vm_purgeable_stats((vm_purgeable_info_t)stats, task);
3660 /* Drop task reference */
3661 task_deallocate(task);
3662 return KERN_SUCCESS;
3663 }
3664
3665 void
3666 task_vtimer_set(
3667 task_t task,
3668 integer_t which)
3669 {
3670 thread_t thread;
3671 spl_t x;
3672
3673 /* assert(task == current_task()); */ /* bogus assert 4803227 4807483 */
3674
3675 task_lock(task);
3676
3677 task->vtimers |= which;
3678
3679 switch (which) {
3680
3681 case TASK_VTIMER_USER:
3682 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3683 x = splsched();
3684 thread_lock(thread);
3685 if (thread->precise_user_kernel_time)
3686 thread->vtimer_user_save = timer_grab(&thread->user_timer);
3687 else
3688 thread->vtimer_user_save = timer_grab(&thread->system_timer);
3689 thread_unlock(thread);
3690 splx(x);
3691 }
3692 break;
3693
3694 case TASK_VTIMER_PROF:
3695 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3696 x = splsched();
3697 thread_lock(thread);
3698 thread->vtimer_prof_save = timer_grab(&thread->user_timer);
3699 thread->vtimer_prof_save += timer_grab(&thread->system_timer);
3700 thread_unlock(thread);
3701 splx(x);
3702 }
3703 break;
3704
3705 case TASK_VTIMER_RLIM:
3706 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3707 x = splsched();
3708 thread_lock(thread);
3709 thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
3710 thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
3711 thread_unlock(thread);
3712 splx(x);
3713 }
3714 break;
3715 }
3716
3717 task_unlock(task);
3718 }
3719
3720 void
3721 task_vtimer_clear(
3722 task_t task,
3723 integer_t which)
3724 {
3725 assert(task == current_task());
3726
3727 task_lock(task);
3728
3729 task->vtimers &= ~which;
3730
3731 task_unlock(task);
3732 }
3733
3734 void
3735 task_vtimer_update(
3736 __unused
3737 task_t task,
3738 integer_t which,
3739 uint32_t *microsecs)
3740 {
3741 thread_t thread = current_thread();
3742 uint32_t tdelt;
3743 clock_sec_t secs;
3744 uint64_t tsum;
3745
3746 assert(task == current_task());
3747
3748 assert(task->vtimers & which);
3749
3750 secs = tdelt = 0;
3751
3752 switch (which) {
3753
3754 case TASK_VTIMER_USER:
3755 if (thread->precise_user_kernel_time) {
3756 tdelt = (uint32_t)timer_delta(&thread->user_timer,
3757 &thread->vtimer_user_save);
3758 } else {
3759 tdelt = (uint32_t)timer_delta(&thread->system_timer,
3760 &thread->vtimer_user_save);
3761 }
3762 absolutetime_to_microtime(tdelt, &secs, microsecs);
3763 break;
3764
3765 case TASK_VTIMER_PROF:
3766 tsum = timer_grab(&thread->user_timer);
3767 tsum += timer_grab(&thread->system_timer);
3768 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
3769 absolutetime_to_microtime(tdelt, &secs, microsecs);
3770 /* if the time delta is smaller than a usec, ignore */
3771 if (*microsecs != 0)
3772 thread->vtimer_prof_save = tsum;
3773 break;
3774
3775 case TASK_VTIMER_RLIM:
3776 tsum = timer_grab(&thread->user_timer);
3777 tsum += timer_grab(&thread->system_timer);
3778 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
3779 thread->vtimer_rlim_save = tsum;
3780 absolutetime_to_microtime(tdelt, &secs, microsecs);
3781 break;
3782 }
3783
3784 }
3785
3786 /*
3787 * task_assign:
3788 *
3789 * Change the assigned processor set for the task
3790 */
3791 kern_return_t
3792 task_assign(
3793 __unused task_t task,
3794 __unused processor_set_t new_pset,
3795 __unused boolean_t assign_threads)
3796 {
3797 return(KERN_FAILURE);
3798 }
3799
3800 /*
3801 * task_assign_default:
3802 *
3803 * Version of task_assign to assign to default processor set.
3804 */
3805 kern_return_t
3806 task_assign_default(
3807 task_t task,
3808 boolean_t assign_threads)
3809 {
3810 return (task_assign(task, &pset0, assign_threads));
3811 }
3812
3813 /*
3814 * task_get_assignment
3815 *
3816 * Return name of processor set that task is assigned to.
3817 */
3818 kern_return_t
3819 task_get_assignment(
3820 task_t task,
3821 processor_set_t *pset)
3822 {
3823 if (!task || !task->active)
3824 return KERN_FAILURE;
3825
3826 *pset = &pset0;
3827
3828 return KERN_SUCCESS;
3829 }
3830
3831 uint64_t
3832 get_task_dispatchqueue_offset(
3833 task_t task)
3834 {
3835 return task->dispatchqueue_offset;
3836 }
3837
3838 /*
3839 * task_policy
3840 *
3841 * Set scheduling policy and parameters, both base and limit, for
3842 * the given task. Policy must be a policy which is enabled for the
3843 * processor set. Change contained threads if requested.
3844 */
3845 kern_return_t
3846 task_policy(
3847 __unused task_t task,
3848 __unused policy_t policy_id,
3849 __unused policy_base_t base,
3850 __unused mach_msg_type_number_t count,
3851 __unused boolean_t set_limit,
3852 __unused boolean_t change)
3853 {
3854 return(KERN_FAILURE);
3855 }
3856
3857 /*
3858 * task_set_policy
3859 *
3860 * Set scheduling policy and parameters, both base and limit, for
3861 * the given task. Policy can be any policy implemented by the
3862 * processor set, whether enabled or not. Change contained threads
3863 * if requested.
3864 */
3865 kern_return_t
3866 task_set_policy(
3867 __unused task_t task,
3868 __unused processor_set_t pset,
3869 __unused policy_t policy_id,
3870 __unused policy_base_t base,
3871 __unused mach_msg_type_number_t base_count,
3872 __unused policy_limit_t limit,
3873 __unused mach_msg_type_number_t limit_count,
3874 __unused boolean_t change)
3875 {
3876 return(KERN_FAILURE);
3877 }
3878
3879 kern_return_t
3880 task_set_ras_pc(
3881 __unused task_t task,
3882 __unused vm_offset_t pc,
3883 __unused vm_offset_t endpc)
3884 {
3885 return KERN_FAILURE;
3886 }
3887
3888 void
3889 task_synchronizer_destroy_all(task_t task)
3890 {
3891 /*
3892 * Destroy owned semaphores
3893 */
3894 semaphore_destroy_all(task);
3895 }
3896
3897 /*
3898 * Install default (machine-dependent) initial thread state
3899 * on the task. Subsequent thread creation will have this initial
3900 * state set on the thread by machine_thread_inherit_taskwide().
3901 * Flavors and structures are exactly the same as those to thread_set_state()
3902 */
3903 kern_return_t
3904 task_set_state(
3905 task_t task,
3906 int flavor,
3907 thread_state_t state,
3908 mach_msg_type_number_t state_count)
3909 {
3910 kern_return_t ret;
3911
3912 if (task == TASK_NULL) {
3913 return (KERN_INVALID_ARGUMENT);
3914 }
3915
3916 task_lock(task);
3917
3918 if (!task->active) {
3919 task_unlock(task);
3920 return (KERN_FAILURE);
3921 }
3922
3923 ret = machine_task_set_state(task, flavor, state, state_count);
3924
3925 task_unlock(task);
3926 return ret;
3927 }
3928
3929 /*
3930 * Examine the default (machine-dependent) initial thread state
3931 * on the task, as set by task_set_state(). Flavors and structures
3932 * are exactly the same as those passed to thread_get_state().
3933 */
3934 kern_return_t
3935 task_get_state(
3936 task_t task,
3937 int flavor,
3938 thread_state_t state,
3939 mach_msg_type_number_t *state_count)
3940 {
3941 kern_return_t ret;
3942
3943 if (task == TASK_NULL) {
3944 return (KERN_INVALID_ARGUMENT);
3945 }
3946
3947 task_lock(task);
3948
3949 if (!task->active) {
3950 task_unlock(task);
3951 return (KERN_FAILURE);
3952 }
3953
3954 ret = machine_task_get_state(task, flavor, state, state_count);
3955
3956 task_unlock(task);
3957 return ret;
3958 }
3959
3960 #if CONFIG_JETSAM
3961 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
3962
3963 void __attribute__((noinline))
3964 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb)
3965 {
3966 task_t task = current_task();
3967 int pid = 0;
3968 const char *procname = "unknown";
3969 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
3970
3971 #ifdef MACH_BSD
3972 pid = proc_selfpid();
3973
3974 if (pid == 1) {
3975 /*
3976 * Cannot have ReportCrash analyzing
3977 * a suspended initproc.
3978 */
3979 return;
3980 }
3981
3982 if (task->bsd_info != NULL)
3983 procname = proc_name_address(current_task()->bsd_info);
3984 #endif
3985
3986 if (hwm_user_cores) {
3987 int error;
3988 uint64_t starttime, end;
3989 clock_sec_t secs = 0;
3990 uint32_t microsecs = 0;
3991
3992 starttime = mach_absolute_time();
3993 /*
3994 * Trigger a coredump of this process. Don't proceed unless we know we won't
3995 * be filling up the disk; and ignore the core size resource limit for this
3996 * core file.
3997 */
3998 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
3999 printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
4000 }
4001 /*
4002 * coredump() leaves the task suspended.
4003 */
4004 task_resume_internal(current_task());
4005
4006 end = mach_absolute_time();
4007 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
4008 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
4009 proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
4010 }
4011
4012 if (disable_exc_resource) {
4013 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
4014 "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
4015 return;
4016 }
4017
4018 /*
4019 * A task that has triggered an EXC_RESOURCE, should not be
4020 * jetsammed when the device is under memory pressure. Here
4021 * we set the P_MEMSTAT_TERMINATED flag so that the process
4022 * will be skipped if the memorystatus_thread wakes up.
4023 */
4024 proc_memstat_terminated(current_task()->bsd_info, TRUE);
4025
4026 printf("process %s[%d] crossed memory high watermark (%d MB); sending "
4027 "EXC_RESOURCE.\n", procname, pid, max_footprint_mb);
4028
4029 code[0] = code[1] = 0;
4030 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
4031 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
4032 EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
4033
4034 /*
4035 * Use the _internal_ variant so that no user-space
4036 * process can resume our task from under us.
4037 */
4038 task_suspend_internal(task);
4039 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
4040 task_resume_internal(task);
4041
4042 /*
4043 * After the EXC_RESOURCE has been handled, we must clear the
4044 * P_MEMSTAT_TERMINATED flag so that the process can again be
4045 * considered for jetsam if the memorystatus_thread wakes up.
4046 */
4047 proc_memstat_terminated(current_task()->bsd_info, FALSE); /* clear the flag */
4048 }
4049
4050 /*
4051 * Callback invoked when a task exceeds its physical footprint limit.
4052 */
4053 void
4054 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4055 {
4056 ledger_amount_t max_footprint, max_footprint_mb;
4057 ledger_amount_t footprint_after_purge;
4058 task_t task;
4059
4060 if (warning == LEDGER_WARNING_DIPPED_BELOW) {
4061 /*
4062 * Task memory limits only provide a warning on the way up.
4063 */
4064 return;
4065 }
4066
4067 task = current_task();
4068
4069 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
4070 max_footprint_mb = max_footprint >> 20;
4071
4072 /*
4073 * Try and purge all "volatile" memory in that task first.
4074 */
4075 (void) task_purge_volatile_memory(task);
4076 /* are we still over the limit ? */
4077 ledger_get_balance(task->ledger,
4078 task_ledgers.phys_footprint,
4079 &footprint_after_purge);
4080 if ((!warning &&
4081 footprint_after_purge <= max_footprint) ||
4082 (warning &&
4083 footprint_after_purge <= ((max_footprint *
4084 PHYS_FOOTPRINT_WARNING_LEVEL) / 100))) {
4085 /* all better now */
4086 ledger_reset_callback_state(task->ledger,
4087 task_ledgers.phys_footprint);
4088 return;
4089 }
4090 /* still over the limit after purging... */
4091
4092 /*
4093 * If this an actual violation (not a warning),
4094 * generate a non-fatal high watermark EXC_RESOURCE.
4095 */
4096 if ((warning == 0) && (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) {
4097 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb);
4098 }
4099
4100 memorystatus_on_ledger_footprint_exceeded((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE,
4101 (int)max_footprint_mb);
4102 }
4103
4104 extern int proc_check_footprint_priv(void);
4105
4106 kern_return_t
4107 task_set_phys_footprint_limit(
4108 task_t task,
4109 int new_limit_mb,
4110 int *old_limit_mb)
4111 {
4112 kern_return_t error;
4113
4114 if ((error = proc_check_footprint_priv())) {
4115 return (KERN_NO_ACCESS);
4116 }
4117
4118 return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, FALSE);
4119 }
4120
4121 kern_return_t
4122 task_convert_phys_footprint_limit(
4123 int limit_mb,
4124 int *converted_limit_mb)
4125 {
4126 if (limit_mb == -1) {
4127 /*
4128 * No limit
4129 */
4130 if (max_task_footprint != 0) {
4131 *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024); /* bytes to MB */
4132 } else {
4133 *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
4134 }
4135 } else {
4136 /* nothing to convert */
4137 *converted_limit_mb = limit_mb;
4138 }
4139 return (KERN_SUCCESS);
4140 }
4141
4142
4143 kern_return_t
4144 task_set_phys_footprint_limit_internal(
4145 task_t task,
4146 int new_limit_mb,
4147 int *old_limit_mb,
4148 boolean_t trigger_exception)
4149 {
4150 ledger_amount_t old;
4151
4152 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
4153
4154 if (old_limit_mb) {
4155 /*
4156 * Check that limit >> 20 will not give an "unexpected" 32-bit
4157 * result. There are, however, implicit assumptions that -1 mb limit
4158 * equates to LEDGER_LIMIT_INFINITY.
4159 */
4160 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
4161 *old_limit_mb = (int)(old >> 20);
4162 }
4163
4164 if (new_limit_mb == -1) {
4165 /*
4166 * Caller wishes to remove the limit.
4167 */
4168 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4169 max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
4170 max_task_footprint ? PHYS_FOOTPRINT_WARNING_LEVEL : 0);
4171 return (KERN_SUCCESS);
4172 }
4173
4174 #ifdef CONFIG_NOMONITORS
4175 return (KERN_SUCCESS);
4176 #endif /* CONFIG_NOMONITORS */
4177
4178 task_lock(task);
4179
4180 if (trigger_exception) {
4181 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4182 } else {
4183 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4184 }
4185
4186 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4187 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
4188
4189 if (task == current_task()) {
4190 ledger_check_new_balance(task->ledger, task_ledgers.phys_footprint);
4191 }
4192
4193 task_unlock(task);
4194
4195 return (KERN_SUCCESS);
4196 }
4197
4198 kern_return_t
4199 task_get_phys_footprint_limit(
4200 task_t task,
4201 int *limit_mb)
4202 {
4203 ledger_amount_t limit;
4204
4205 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
4206 /*
4207 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
4208 * result. There are, however, implicit assumptions that -1 mb limit
4209 * equates to LEDGER_LIMIT_INFINITY.
4210 */
4211 assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
4212 *limit_mb = (int)(limit >> 20);
4213
4214 return (KERN_SUCCESS);
4215 }
4216 #else /* CONFIG_JETSAM */
4217 kern_return_t
4218 task_set_phys_footprint_limit(
4219 __unused task_t task,
4220 __unused int new_limit_mb,
4221 __unused int *old_limit_mb)
4222 {
4223 return (KERN_FAILURE);
4224 }
4225
4226 kern_return_t
4227 task_get_phys_footprint_limit(
4228 __unused task_t task,
4229 __unused int *limit_mb)
4230 {
4231 return (KERN_FAILURE);
4232 }
4233 #endif /* CONFIG_JETSAM */
4234
4235 /*
4236 * We need to export some functions to other components that
4237 * are currently implemented in macros within the osfmk
4238 * component. Just export them as functions of the same name.
4239 */
4240 boolean_t is_kerneltask(task_t t)
4241 {
4242 if (t == kernel_task)
4243 return (TRUE);
4244
4245 return (FALSE);
4246 }
4247
4248 int
4249 check_for_tasksuspend(task_t task)
4250 {
4251
4252 if (task == TASK_NULL)
4253 return (0);
4254
4255 return (task->suspend_count > 0);
4256 }
4257
4258 #undef current_task
4259 task_t current_task(void);
4260 task_t current_task(void)
4261 {
4262 return (current_task_fast());
4263 }
4264
4265 #undef task_reference
4266 void task_reference(task_t task);
4267 void
4268 task_reference(
4269 task_t task)
4270 {
4271 if (task != TASK_NULL)
4272 task_reference_internal(task);
4273 }
4274
4275 /* defined in bsd/kern/kern_prot.c */
4276 extern int get_audit_token_pid(audit_token_t *audit_token);
4277
4278 int task_pid(task_t task)
4279 {
4280 if (task)
4281 return get_audit_token_pid(&task->audit_token);
4282 return -1;
4283 }
4284
4285
4286 /*
4287 * This routine is called always with task lock held.
4288 * And it returns a thread handle without reference as the caller
4289 * operates on it under the task lock held.
4290 */
4291 thread_t
4292 task_findtid(task_t task, uint64_t tid)
4293 {
4294 thread_t thread= THREAD_NULL;
4295
4296 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4297 if (thread->thread_id == tid)
4298 return(thread);
4299 }
4300 return(THREAD_NULL);
4301 }
4302
4303 /*
4304 * Control the CPU usage monitor for a task.
4305 */
4306 kern_return_t
4307 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
4308 {
4309 int error = KERN_SUCCESS;
4310
4311 if (*flags & CPUMON_MAKE_FATAL) {
4312 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
4313 } else {
4314 error = KERN_INVALID_ARGUMENT;
4315 }
4316
4317 return error;
4318 }
4319
4320 /*
4321 * Control the wakeups monitor for a task.
4322 */
4323 kern_return_t
4324 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
4325 {
4326 ledger_t ledger = task->ledger;
4327
4328 task_lock(task);
4329 if (*flags & WAKEMON_GET_PARAMS) {
4330 ledger_amount_t limit;
4331 uint64_t period;
4332
4333 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
4334 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
4335
4336 if (limit != LEDGER_LIMIT_INFINITY) {
4337 /*
4338 * An active limit means the wakeups monitor is enabled.
4339 */
4340 *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
4341 *flags = WAKEMON_ENABLE;
4342 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
4343 *flags |= WAKEMON_MAKE_FATAL;
4344 }
4345 } else {
4346 *flags = WAKEMON_DISABLE;
4347 *rate_hz = -1;
4348 }
4349
4350 /*
4351 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
4352 */
4353 task_unlock(task);
4354 return KERN_SUCCESS;
4355 }
4356
4357 if (*flags & WAKEMON_ENABLE) {
4358 if (*flags & WAKEMON_SET_DEFAULTS) {
4359 *rate_hz = task_wakeups_monitor_rate;
4360 }
4361
4362 #ifndef CONFIG_NOMONITORS
4363 if (*flags & WAKEMON_MAKE_FATAL) {
4364 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
4365 }
4366 #endif /* CONFIG_NOMONITORS */
4367
4368 if (*rate_hz < 0) {
4369 task_unlock(task);
4370 return KERN_INVALID_ARGUMENT;
4371 }
4372
4373 #ifndef CONFIG_NOMONITORS
4374 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
4375 task_wakeups_monitor_ustackshots_trigger_pct);
4376 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
4377 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
4378 #endif /* CONFIG_NOMONITORS */
4379 } else if (*flags & WAKEMON_DISABLE) {
4380 /*
4381 * Caller wishes to disable wakeups monitor on the task.
4382 *
4383 * Disable telemetry if it was triggered by the wakeups monitor, and
4384 * remove the limit & callback on the wakeups ledger entry.
4385 */
4386 #if CONFIG_TELEMETRY
4387 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
4388 #endif
4389 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
4390 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
4391 }
4392
4393 task_unlock(task);
4394 return KERN_SUCCESS;
4395 }
4396
4397 void
4398 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4399 {
4400 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
4401 #if CONFIG_TELEMETRY
4402 /*
4403 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
4404 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
4405 */
4406 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
4407 #endif
4408 return;
4409 }
4410
4411 #if CONFIG_TELEMETRY
4412 /*
4413 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
4414 * exceeded the limit, turn telemetry off for the task.
4415 */
4416 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
4417 #endif
4418
4419 if (warning == 0) {
4420 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE();
4421 }
4422 }
4423
4424 void __attribute__((noinline))
4425 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void)
4426 {
4427 task_t task = current_task();
4428 int pid = 0;
4429 const char *procname = "unknown";
4430 uint64_t observed_wakeups_rate;
4431 uint64_t permitted_wakeups_rate;
4432 uint64_t observation_interval;
4433 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
4434 struct ledger_entry_info lei;
4435
4436 #ifdef MACH_BSD
4437 pid = proc_selfpid();
4438 if (task->bsd_info != NULL)
4439 procname = proc_name_address(current_task()->bsd_info);
4440 #endif
4441
4442 ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
4443
4444 /*
4445 * Disable the exception notification so we don't overwhelm
4446 * the listener with an endless stream of redundant exceptions.
4447 */
4448 uint32_t flags = WAKEMON_DISABLE;
4449 task_wakeups_monitor_ctl(task, &flags, NULL);
4450
4451 observed_wakeups_rate = (lei.lei_balance * (int64_t)NSEC_PER_SEC) / lei.lei_last_refill;
4452 permitted_wakeups_rate = lei.lei_limit / task_wakeups_monitor_interval;
4453 observation_interval = lei.lei_refill_period / NSEC_PER_SEC;
4454
4455 if (disable_exc_resource) {
4456 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
4457 "supressed by a boot-arg\n", procname, pid);
4458 return;
4459 }
4460 if (audio_active) {
4461 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
4462 "supressed due to audio playback\n", procname, pid);
4463 return;
4464 }
4465 printf("process %s[%d] caught causing excessive wakeups. Observed wakeups rate "
4466 "(per sec): %lld; Maximum permitted wakeups rate (per sec): %lld; Observation "
4467 "period: %lld seconds; Task lifetime number of wakeups: %lld\n",
4468 procname, pid, observed_wakeups_rate, permitted_wakeups_rate,
4469 observation_interval, lei.lei_credit);
4470
4471 code[0] = code[1] = 0;
4472 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
4473 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
4474 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0], task_wakeups_monitor_rate);
4475 EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0], observation_interval);
4476 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1], lei.lei_balance * (int64_t)NSEC_PER_SEC / lei.lei_last_refill);
4477 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
4478
4479 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
4480 task_terminate_internal(task);
4481 }
4482 }
4483
4484 kern_return_t
4485 task_purge_volatile_memory(
4486 task_t task)
4487 {
4488 vm_map_t map;
4489 int num_object_purged;
4490
4491 if (task == TASK_NULL)
4492 return KERN_INVALID_TASK;
4493
4494 task_lock(task);
4495
4496 if (!task->active) {
4497 task_unlock(task);
4498 return KERN_INVALID_TASK;
4499 }
4500 map = task->map;
4501 if (map == VM_MAP_NULL) {
4502 task_unlock(task);
4503 return KERN_INVALID_TASK;
4504 }
4505 vm_map_reference(task->map);
4506
4507 task_unlock(task);
4508
4509 num_object_purged = vm_map_purge(map);
4510 vm_map_deallocate(map);
4511
4512 return KERN_SUCCESS;
4513 }
4514
4515 /* Placeholders for the task set/get voucher interfaces */
4516 kern_return_t
4517 task_get_mach_voucher(
4518 task_t task,
4519 mach_voucher_selector_t __unused which,
4520 ipc_voucher_t *voucher)
4521 {
4522 if (TASK_NULL == task)
4523 return KERN_INVALID_TASK;
4524
4525 *voucher = NULL;
4526 return KERN_SUCCESS;
4527 }
4528
4529 kern_return_t
4530 task_set_mach_voucher(
4531 task_t task,
4532 ipc_voucher_t __unused voucher)
4533 {
4534 if (TASK_NULL == task)
4535 return KERN_INVALID_TASK;
4536
4537 return KERN_SUCCESS;
4538 }
4539
4540 kern_return_t
4541 task_swap_mach_voucher(
4542 task_t task,
4543 ipc_voucher_t new_voucher,
4544 ipc_voucher_t *in_out_old_voucher)
4545 {
4546 if (TASK_NULL == task)
4547 return KERN_INVALID_TASK;
4548
4549 *in_out_old_voucher = new_voucher;
4550 return KERN_SUCCESS;
4551 }
4552
4553 void task_set_gpu_denied(task_t task, boolean_t denied)
4554 {
4555 task_lock(task);
4556
4557 if (denied) {
4558 task->t_flags |= TF_GPU_DENIED;
4559 } else {
4560 task->t_flags &= ~TF_GPU_DENIED;
4561 }
4562
4563 task_unlock(task);
4564 }
4565
4566 boolean_t task_is_gpu_denied(task_t task)
4567 {
4568 /* We don't need the lock to read this flag */
4569 return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
4570 }
4571
4572 void task_update_logical_writes(task_t task, uint32_t io_size, int flags)
4573 {
4574 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE, task_pid(task), io_size, flags, 0, 0);
4575 switch(flags) {
4576 case TASK_WRITE_IMMEDIATE:
4577 OSAddAtomic64(io_size, (SInt64 *)&(task->task_immediate_writes));
4578 break;
4579 case TASK_WRITE_DEFERRED:
4580 OSAddAtomic64(io_size, (SInt64 *)&(task->task_deferred_writes));
4581 break;
4582 case TASK_WRITE_INVALIDATED:
4583 OSAddAtomic64(io_size, (SInt64 *)&(task->task_invalidated_writes));
4584 break;
4585 case TASK_WRITE_METADATA:
4586 OSAddAtomic64(io_size, (SInt64 *)&(task->task_metadata_writes));
4587 break;
4588 }
4589 return;
4590 }
4591
4592 queue_head_t *
4593 task_io_user_clients(task_t task)
4594 {
4595 return (&task->io_user_clients);
4596 }