]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/task.c
xnu-3247.1.106.tar.gz
[apple/xnu.git] / osfmk / kern / task.c
1 /*
2 * Copyright (c) 2000-2010, 2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/task.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59 * David Black
60 *
61 * Task management primitives implementation.
62 */
63 /*
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL). All rights reserved.
66 *
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
72 *
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76 *
77 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
78 * improvements that they make and grant CSL redistribution rights.
79 *
80 */
81 /*
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections. This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
85 * Version 2.0.
86 * Copyright (c) 2005 SPARTA, Inc.
87 */
88
89 #include <mach/mach_types.h>
90 #include <mach/boolean.h>
91 #include <mach/host_priv.h>
92 #include <mach/machine/vm_types.h>
93 #include <mach/vm_param.h>
94 #include <mach/mach_vm.h>
95 #include <mach/semaphore.h>
96 #include <mach/task_info.h>
97 #include <mach/task_special_ports.h>
98
99 #include <ipc/ipc_importance.h>
100 #include <ipc/ipc_types.h>
101 #include <ipc/ipc_space.h>
102 #include <ipc/ipc_entry.h>
103 #include <ipc/ipc_hash.h>
104
105 #include <kern/kern_types.h>
106 #include <kern/mach_param.h>
107 #include <kern/misc_protos.h>
108 #include <kern/task.h>
109 #include <kern/thread.h>
110 #include <kern/coalition.h>
111 #include <kern/zalloc.h>
112 #include <kern/kalloc.h>
113 #include <kern/kern_cdata.h>
114 #include <kern/processor.h>
115 #include <kern/sched_prim.h> /* for thread_wakeup */
116 #include <kern/ipc_tt.h>
117 #include <kern/host.h>
118 #include <kern/clock.h>
119 #include <kern/timer.h>
120 #include <kern/assert.h>
121 #include <kern/sync_lock.h>
122 #include <kern/affinity.h>
123 #include <kern/exc_resource.h>
124 #include <kern/machine.h>
125 #include <corpses/task_corpse.h>
126 #if CONFIG_TELEMETRY
127 #include <kern/telemetry.h>
128 #endif
129
130 #include <vm/pmap.h>
131 #include <vm/vm_map.h>
132 #include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
133 #include <vm/vm_pageout.h>
134 #include <vm/vm_protos.h>
135 #include <vm/vm_purgeable_internal.h>
136
137 #include <sys/resource.h>
138 #include <sys/signalvar.h> /* for coredump */
139
140 /*
141 * Exported interfaces
142 */
143
144 #include <mach/task_server.h>
145 #include <mach/mach_host_server.h>
146 #include <mach/host_security_server.h>
147 #include <mach/mach_port_server.h>
148
149 #include <vm/vm_shared_region.h>
150
151 #include <libkern/OSDebug.h>
152 #include <libkern/OSAtomic.h>
153
154 #if CONFIG_ATM
155 #include <atm/atm_internal.h>
156 #endif
157
158 #include <kern/sfi.h>
159
160 #if KPERF
161 extern int kpc_force_all_ctrs(task_t, int);
162 #endif
163
164 uint32_t qos_override_mode;
165
166 task_t kernel_task;
167 zone_t task_zone;
168 lck_attr_t task_lck_attr;
169 lck_grp_t task_lck_grp;
170 lck_grp_attr_t task_lck_grp_attr;
171
172 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
173 int audio_active = 0;
174
175 zinfo_usage_store_t tasks_tkm_private;
176 zinfo_usage_store_t tasks_tkm_shared;
177
178 /* A container to accumulate statistics for expired tasks */
179 expired_task_statistics_t dead_task_statistics;
180 lck_spin_t dead_task_statistics_lock;
181
182 ledger_template_t task_ledger_template = NULL;
183
184 struct _task_ledger_indices task_ledgers __attribute__((used)) =
185 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
186 { 0 /* initialized at runtime */},
187 #ifdef CONFIG_BANK
188 -1, -1,
189 #endif
190 };
191
192 void init_task_ledgers(void);
193 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
194 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
195 void __attribute__((noinline)) THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void);
196 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb);
197
198 kern_return_t task_suspend_internal(task_t);
199 kern_return_t task_resume_internal(task_t);
200 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
201
202
203 void proc_init_cpumon_params(void);
204 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
205
206 // Warn tasks when they hit 80% of their memory limit.
207 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
208
209 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
210 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
211
212 /*
213 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
214 *
215 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
216 * stacktraces, aka micro-stackshots)
217 */
218 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
219
220 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
221 int task_wakeups_monitor_rate; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
222
223 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
224
225 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
226
227 ledger_amount_t max_task_footprint = 0; /* Per-task limit on physical memory consumption in bytes */
228 int max_task_footprint_mb = 0; /* Per-task limit on physical memory consumption in megabytes */
229
230 #if MACH_ASSERT
231 int pmap_ledgers_panic = 1;
232 #endif /* MACH_ASSERT */
233
234 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
235
236 int hwm_user_cores = 0; /* high watermark violations generate user core files */
237
238 #ifdef MACH_BSD
239 extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
240 extern int proc_pid(struct proc *p);
241 extern int proc_selfpid(void);
242 extern char *proc_name_address(struct proc *p);
243 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
244 #if CONFIG_JETSAM
245 extern void proc_memstat_terminated(struct proc* p, boolean_t set);
246 extern void memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb);
247 #endif
248 #endif
249 #if MACH_ASSERT
250 extern int pmap_ledgers_panic;
251 #endif /* MACH_ASSERT */
252
253 /* Forwards */
254
255 void task_hold_locked(
256 task_t task);
257 void task_wait_locked(
258 task_t task,
259 boolean_t until_not_runnable);
260 void task_release_locked(
261 task_t task);
262 void task_free(
263 task_t task );
264 void task_synchronizer_destroy_all(
265 task_t task);
266
267 int check_for_tasksuspend(
268 task_t task);
269
270 void
271 task_backing_store_privileged(
272 task_t task)
273 {
274 task_lock(task);
275 task->priv_flags |= VM_BACKING_STORE_PRIV;
276 task_unlock(task);
277 return;
278 }
279
280
281 void
282 task_set_64bit(
283 task_t task,
284 boolean_t is64bit)
285 {
286 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
287 thread_t thread;
288 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
289
290 task_lock(task);
291
292 if (is64bit) {
293 if (task_has_64BitAddr(task))
294 goto out;
295 task_set_64BitAddr(task);
296 } else {
297 if ( !task_has_64BitAddr(task))
298 goto out;
299 task_clear_64BitAddr(task);
300 }
301 /* FIXME: On x86, the thread save state flavor can diverge from the
302 * task's 64-bit feature flag due to the 32-bit/64-bit register save
303 * state dichotomy. Since we can be pre-empted in this interval,
304 * certain routines may observe the thread as being in an inconsistent
305 * state with respect to its task's 64-bitness.
306 */
307
308 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
309 queue_iterate(&task->threads, thread, thread_t, task_threads) {
310 thread_mtx_lock(thread);
311 machine_thread_switch_addrmode(thread);
312 thread_mtx_unlock(thread);
313 }
314 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
315
316 out:
317 task_unlock(task);
318 }
319
320
321 void
322 task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size)
323 {
324 task_lock(task);
325 task->all_image_info_addr = addr;
326 task->all_image_info_size = size;
327 task_unlock(task);
328 }
329
330 void
331 task_atm_reset(__unused task_t task) {
332
333 #if CONFIG_ATM
334 if (task->atm_context != NULL) {
335 atm_task_descriptor_destroy(task->atm_context);
336 task->atm_context = NULL;
337 }
338 #endif
339
340 }
341
342 #if TASK_REFERENCE_LEAK_DEBUG
343 #include <kern/btlog.h>
344
345 decl_simple_lock_data(static,task_ref_lock);
346 static btlog_t *task_ref_btlog;
347 #define TASK_REF_OP_INCR 0x1
348 #define TASK_REF_OP_DECR 0x2
349
350 #define TASK_REF_BTDEPTH 7
351
352 static void
353 task_ref_lock_lock(void *context)
354 {
355 simple_lock((simple_lock_t)context);
356 }
357 static void
358 task_ref_lock_unlock(void *context)
359 {
360 simple_unlock((simple_lock_t)context);
361 }
362
363 void
364 task_reference_internal(task_t task)
365 {
366 void * bt[TASK_REF_BTDEPTH];
367 int numsaved = 0;
368
369 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
370
371 (void)hw_atomic_add(&(task)->ref_count, 1);
372 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
373 bt, numsaved);
374 }
375
376 uint32_t
377 task_deallocate_internal(task_t task)
378 {
379 void * bt[TASK_REF_BTDEPTH];
380 int numsaved = 0;
381
382 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
383
384 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
385 bt, numsaved);
386 return hw_atomic_sub(&(task)->ref_count, 1);
387 }
388
389 #endif /* TASK_REFERENCE_LEAK_DEBUG */
390
391 void
392 task_init(void)
393 {
394
395 lck_grp_attr_setdefault(&task_lck_grp_attr);
396 lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
397 lck_attr_setdefault(&task_lck_attr);
398 lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
399
400 task_zone = zinit(
401 sizeof(struct task),
402 task_max * sizeof(struct task),
403 TASK_CHUNK * sizeof(struct task),
404 "tasks");
405
406 zone_change(task_zone, Z_NOENCRYPT, TRUE);
407
408 /*
409 * Configure per-task memory limit.
410 * The boot-arg is interpreted as Megabytes,
411 * and takes precedence over the device tree.
412 * Setting the boot-arg to 0 disables task limits.
413 */
414 if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
415 sizeof (max_task_footprint_mb))) {
416 /*
417 * No limit was found in boot-args, so go look in the device tree.
418 */
419 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
420 sizeof(max_task_footprint_mb))) {
421 /*
422 * No limit was found in device tree.
423 */
424 max_task_footprint_mb = 0;
425 }
426 }
427
428 if (max_task_footprint_mb != 0) {
429 #if CONFIG_JETSAM
430 if (max_task_footprint_mb < 50) {
431 printf("Warning: max_task_pmem %d below minimum.\n",
432 max_task_footprint_mb);
433 max_task_footprint_mb = 50;
434 }
435 printf("Limiting task physical memory footprint to %d MB\n",
436 max_task_footprint_mb);
437
438 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
439 #else
440 printf("Warning: max_task_footprint specified, but jetsam not configured; ignoring.\n");
441 #endif
442 }
443
444 #if MACH_ASSERT
445 PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
446 sizeof (pmap_ledgers_panic));
447 #endif /* MACH_ASSERT */
448
449 if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
450 sizeof (hwm_user_cores))) {
451 hwm_user_cores = 0;
452 }
453
454 if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
455 printf("QOS override mode: 0x%08x\n", qos_override_mode);
456 } else {
457 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
458 }
459
460 proc_init_cpumon_params();
461
462 if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
463 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
464 }
465
466 if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
467 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
468 }
469
470 if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
471 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
472 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
473 }
474
475 if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
476 sizeof (disable_exc_resource))) {
477 disable_exc_resource = 0;
478 }
479
480 /*
481 * If we have coalitions, coalition_init() will call init_task_ledgers() as it
482 * sets up the ledgers for the default coalition. If we don't have coalitions,
483 * then we have to call it now.
484 */
485 #if CONFIG_COALITIONS
486 assert(task_ledger_template);
487 #else /* CONFIG_COALITIONS */
488 init_task_ledgers();
489 #endif /* CONFIG_COALITIONS */
490
491 #if TASK_REFERENCE_LEAK_DEBUG
492 simple_lock_init(&task_ref_lock, 0);
493 task_ref_btlog = btlog_create(100000,
494 TASK_REF_BTDEPTH,
495 task_ref_lock_lock,
496 task_ref_lock_unlock,
497 &task_ref_lock);
498 assert(task_ref_btlog);
499 #endif
500
501 /*
502 * Create the kernel task as the first task.
503 */
504 #ifdef __LP64__
505 if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS)
506 #else
507 if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS)
508 #endif
509 panic("task_init\n");
510
511 vm_map_deallocate(kernel_task->map);
512 kernel_task->map = kernel_map;
513 lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
514
515 }
516
517 /*
518 * Create a task running in the kernel address space. It may
519 * have its own map of size mem_size and may have ipc privileges.
520 */
521 kern_return_t
522 kernel_task_create(
523 __unused task_t parent_task,
524 __unused vm_offset_t map_base,
525 __unused vm_size_t map_size,
526 __unused task_t *child_task)
527 {
528 return (KERN_INVALID_ARGUMENT);
529 }
530
531 kern_return_t
532 task_create(
533 task_t parent_task,
534 __unused ledger_port_array_t ledger_ports,
535 __unused mach_msg_type_number_t num_ledger_ports,
536 __unused boolean_t inherit_memory,
537 __unused task_t *child_task) /* OUT */
538 {
539 if (parent_task == TASK_NULL)
540 return(KERN_INVALID_ARGUMENT);
541
542 /*
543 * No longer supported: too many calls assume that a task has a valid
544 * process attached.
545 */
546 return(KERN_FAILURE);
547 }
548
549 kern_return_t
550 host_security_create_task_token(
551 host_security_t host_security,
552 task_t parent_task,
553 __unused security_token_t sec_token,
554 __unused audit_token_t audit_token,
555 __unused host_priv_t host_priv,
556 __unused ledger_port_array_t ledger_ports,
557 __unused mach_msg_type_number_t num_ledger_ports,
558 __unused boolean_t inherit_memory,
559 __unused task_t *child_task) /* OUT */
560 {
561 if (parent_task == TASK_NULL)
562 return(KERN_INVALID_ARGUMENT);
563
564 if (host_security == HOST_NULL)
565 return(KERN_INVALID_SECURITY);
566
567 /*
568 * No longer supported.
569 */
570 return(KERN_FAILURE);
571 }
572
573 /*
574 * Task ledgers
575 * ------------
576 *
577 * phys_footprint
578 * Physical footprint: This is the sum of:
579 * + (internal - alternate_accounting)
580 * + (internal_compressed - alternate_accounting_compressed)
581 * + iokit_mapped
582 * + purgeable_nonvolatile
583 * + purgeable_nonvolatile_compressed
584 *
585 * internal
586 * The task's anonymous memory, which on iOS is always resident.
587 *
588 * internal_compressed
589 * Amount of this task's internal memory which is held by the compressor.
590 * Such memory is no longer actually resident for the task [i.e., resident in its pmap],
591 * and could be either decompressed back into memory, or paged out to storage, depending
592 * on our implementation.
593 *
594 * iokit_mapped
595 * IOKit mappings: The total size of all IOKit mappings in this task, regardless of
596 clean/dirty or internal/external state].
597 *
598 * alternate_accounting
599 * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
600 * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
601 * double counting.
602 */
603 void
604 init_task_ledgers(void)
605 {
606 ledger_template_t t;
607
608 assert(task_ledger_template == NULL);
609 assert(kernel_task == TASK_NULL);
610
611 if ((t = ledger_template_create("Per-task ledger")) == NULL)
612 panic("couldn't create task ledger template");
613
614 task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
615 task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
616 "physmem", "bytes");
617 task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
618 "bytes");
619 task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
620 "bytes");
621 task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
622 "bytes");
623 task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
624 "bytes");
625 task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
626 "bytes");
627 task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
628 "bytes");
629 task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
630 "bytes");
631 task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
632 "bytes");
633 task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
634 "bytes");
635 task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
636 task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
637 task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
638 task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
639 task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
640 "count");
641 task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
642 "count");
643
644 #if CONFIG_SCHED_SFI
645 sfi_class_id_t class_id, ledger_alias;
646 for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
647 task_ledgers.sfi_wait_times[class_id] = -1;
648 }
649
650 /* don't account for UNSPECIFIED */
651 for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
652 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
653 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
654 /* Check to see if alias has been registered yet */
655 if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
656 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
657 } else {
658 /* Otherwise, initialize it first */
659 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
660 }
661 } else {
662 task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
663 }
664
665 if (task_ledgers.sfi_wait_times[class_id] < 0) {
666 panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
667 }
668 }
669
670 assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
671 #endif /* CONFIG_SCHED_SFI */
672
673 #ifdef CONFIG_BANK
674 task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
675 task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
676 #endif
677 if ((task_ledgers.cpu_time < 0) ||
678 (task_ledgers.tkm_private < 0) ||
679 (task_ledgers.tkm_shared < 0) ||
680 (task_ledgers.phys_mem < 0) ||
681 (task_ledgers.wired_mem < 0) ||
682 (task_ledgers.internal < 0) ||
683 (task_ledgers.iokit_mapped < 0) ||
684 (task_ledgers.alternate_accounting < 0) ||
685 (task_ledgers.alternate_accounting_compressed < 0) ||
686 (task_ledgers.phys_footprint < 0) ||
687 (task_ledgers.internal_compressed < 0) ||
688 (task_ledgers.purgeable_volatile < 0) ||
689 (task_ledgers.purgeable_nonvolatile < 0) ||
690 (task_ledgers.purgeable_volatile_compressed < 0) ||
691 (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
692 (task_ledgers.platform_idle_wakeups < 0) ||
693 (task_ledgers.interrupt_wakeups < 0)
694 #ifdef CONFIG_BANK
695 || (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0)
696 #endif
697 ) {
698 panic("couldn't create entries for task ledger template");
699 }
700
701 ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
702 #if MACH_ASSERT
703 if (pmap_ledgers_panic) {
704 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
705 ledger_panic_on_negative(t, task_ledgers.internal);
706 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
707 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
708 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
709 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
710 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
711 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
712 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
713 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
714 }
715 #endif /* MACH_ASSERT */
716
717 #if CONFIG_JETSAM
718 ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
719 #endif
720
721 ledger_set_callback(t, task_ledgers.interrupt_wakeups,
722 task_wakeups_rate_exceeded, NULL, NULL);
723
724 task_ledger_template = t;
725 }
726
727 kern_return_t
728 task_create_internal(
729 task_t parent_task,
730 coalition_t *parent_coalitions __unused,
731 boolean_t inherit_memory,
732 boolean_t is_64bit,
733 task_t *child_task) /* OUT */
734 {
735 task_t new_task;
736 vm_shared_region_t shared_region;
737 ledger_t ledger = NULL;
738
739 new_task = (task_t) zalloc(task_zone);
740
741 if (new_task == TASK_NULL)
742 return(KERN_RESOURCE_SHORTAGE);
743
744 /* one ref for just being alive; one for our caller */
745 new_task->ref_count = 2;
746
747 /* allocate with active entries */
748 assert(task_ledger_template != NULL);
749 if ((ledger = ledger_instantiate(task_ledger_template,
750 LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
751 zfree(task_zone, new_task);
752 return(KERN_RESOURCE_SHORTAGE);
753 }
754
755 new_task->ledger = ledger;
756
757 #if defined(CONFIG_SCHED_MULTIQ)
758 new_task->sched_group = sched_group_create();
759 #endif
760
761 /* if inherit_memory is true, parent_task MUST not be NULL */
762 if (inherit_memory)
763 new_task->map = vm_map_fork(ledger, parent_task->map);
764 else
765 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
766 (vm_map_offset_t)(VM_MIN_ADDRESS),
767 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
768
769 /* Inherit memlock limit from parent */
770 if (parent_task)
771 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
772
773 lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
774 queue_init(&new_task->threads);
775 new_task->suspend_count = 0;
776 new_task->thread_count = 0;
777 new_task->active_thread_count = 0;
778 new_task->user_stop_count = 0;
779 new_task->legacy_stop_count = 0;
780 new_task->active = TRUE;
781 new_task->halting = FALSE;
782 new_task->user_data = NULL;
783 new_task->faults = 0;
784 new_task->cow_faults = 0;
785 new_task->pageins = 0;
786 new_task->messages_sent = 0;
787 new_task->messages_received = 0;
788 new_task->syscalls_mach = 0;
789 new_task->priv_flags = 0;
790 new_task->syscalls_unix=0;
791 new_task->c_switch = new_task->p_switch = new_task->ps_switch = 0;
792 new_task->t_flags = 0;
793 new_task->importance = 0;
794
795 #if CONFIG_ATM
796 new_task->atm_context = NULL;
797 #endif
798 #if CONFIG_BANK
799 new_task->bank_context = NULL;
800 #endif
801
802 zinfo_task_init(new_task);
803
804 #ifdef MACH_BSD
805 new_task->bsd_info = NULL;
806 new_task->corpse_info = NULL;
807 #endif /* MACH_BSD */
808
809 #if CONFIG_JETSAM
810 if (max_task_footprint != 0) {
811 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
812 }
813 #endif
814
815 if (task_wakeups_monitor_rate != 0) {
816 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
817 int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
818 task_wakeups_monitor_ctl(new_task, &flags, &rate);
819 }
820
821 #if defined(__i386__) || defined(__x86_64__)
822 new_task->i386_ldt = 0;
823 #endif
824
825 new_task->task_debug = NULL;
826
827 queue_init(&new_task->semaphore_list);
828 new_task->semaphores_owned = 0;
829
830 ipc_task_init(new_task, parent_task);
831
832 new_task->total_user_time = 0;
833 new_task->total_system_time = 0;
834
835 new_task->vtimers = 0;
836
837 new_task->shared_region = NULL;
838
839 new_task->affinity_space = NULL;
840
841 new_task->pidsuspended = FALSE;
842 new_task->frozen = FALSE;
843 new_task->changing_freeze_state = FALSE;
844 new_task->rusage_cpu_flags = 0;
845 new_task->rusage_cpu_percentage = 0;
846 new_task->rusage_cpu_interval = 0;
847 new_task->rusage_cpu_deadline = 0;
848 new_task->rusage_cpu_callt = NULL;
849 #if MACH_ASSERT
850 new_task->suspends_outstanding = 0;
851 #endif
852
853 #if HYPERVISOR
854 new_task->hv_task_target = NULL;
855 #endif /* HYPERVISOR */
856
857
858 new_task->low_mem_notified_warn = 0;
859 new_task->low_mem_notified_critical = 0;
860 new_task->low_mem_privileged_listener = 0;
861 new_task->purged_memory_warn = 0;
862 new_task->purged_memory_critical = 0;
863 new_task->mem_notify_reserved = 0;
864 #if IMPORTANCE_INHERITANCE
865 new_task->task_imp_base = NULL;
866 #endif /* IMPORTANCE_INHERITANCE */
867
868 #if defined(__x86_64__)
869 new_task->uexc_range_start = new_task->uexc_range_size = new_task->uexc_handler = 0;
870 #endif
871
872 new_task->requested_policy = default_task_requested_policy;
873 new_task->effective_policy = default_task_effective_policy;
874 new_task->pended_policy = default_task_pended_policy;
875
876 if (parent_task != TASK_NULL) {
877 new_task->sec_token = parent_task->sec_token;
878 new_task->audit_token = parent_task->audit_token;
879
880 /* inherit the parent's shared region */
881 shared_region = vm_shared_region_get(parent_task);
882 vm_shared_region_set(new_task, shared_region);
883
884 if(task_has_64BitAddr(parent_task))
885 task_set_64BitAddr(new_task);
886 new_task->all_image_info_addr = parent_task->all_image_info_addr;
887 new_task->all_image_info_size = parent_task->all_image_info_size;
888
889 #if defined(__i386__) || defined(__x86_64__)
890 if (inherit_memory && parent_task->i386_ldt)
891 new_task->i386_ldt = user_ldt_copy(parent_task->i386_ldt);
892 #endif
893 if (inherit_memory && parent_task->affinity_space)
894 task_affinity_create(parent_task, new_task);
895
896 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
897
898 #if IMPORTANCE_INHERITANCE
899 ipc_importance_task_t new_task_imp = IIT_NULL;
900
901 if (task_is_marked_importance_donor(parent_task)) {
902 new_task_imp = ipc_importance_for_task(new_task, FALSE);
903 assert(IIT_NULL != new_task_imp);
904 ipc_importance_task_mark_donor(new_task_imp, TRUE);
905 }
906 /* Embedded doesn't want this to inherit */
907 if (task_is_marked_importance_receiver(parent_task)) {
908 if (IIT_NULL == new_task_imp)
909 new_task_imp = ipc_importance_for_task(new_task, FALSE);
910 assert(IIT_NULL != new_task_imp);
911 ipc_importance_task_mark_receiver(new_task_imp, TRUE);
912 }
913 if (task_is_marked_importance_denap_receiver(parent_task)) {
914 if (IIT_NULL == new_task_imp)
915 new_task_imp = ipc_importance_for_task(new_task, FALSE);
916 assert(IIT_NULL != new_task_imp);
917 ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
918 }
919
920 if (IIT_NULL != new_task_imp) {
921 assert(new_task->task_imp_base == new_task_imp);
922 ipc_importance_task_release(new_task_imp);
923 }
924 #endif /* IMPORTANCE_INHERITANCE */
925
926 new_task->priority = BASEPRI_DEFAULT;
927 new_task->max_priority = MAXPRI_USER;
928
929 new_task->requested_policy.t_apptype = parent_task->requested_policy.t_apptype;
930
931 new_task->requested_policy.int_darwinbg = parent_task->requested_policy.int_darwinbg;
932 new_task->requested_policy.ext_darwinbg = parent_task->requested_policy.ext_darwinbg;
933 new_task->requested_policy.int_iotier = parent_task->requested_policy.int_iotier;
934 new_task->requested_policy.ext_iotier = parent_task->requested_policy.ext_iotier;
935 new_task->requested_policy.int_iopassive = parent_task->requested_policy.int_iopassive;
936 new_task->requested_policy.ext_iopassive = parent_task->requested_policy.ext_iopassive;
937 new_task->requested_policy.bg_iotier = parent_task->requested_policy.bg_iotier;
938 new_task->requested_policy.terminated = parent_task->requested_policy.terminated;
939 new_task->requested_policy.t_qos_clamp = parent_task->requested_policy.t_qos_clamp;
940
941 task_policy_create(new_task, parent_task->requested_policy.t_boosted);
942 } else {
943 new_task->sec_token = KERNEL_SECURITY_TOKEN;
944 new_task->audit_token = KERNEL_AUDIT_TOKEN;
945 #ifdef __LP64__
946 if(is_64bit)
947 task_set_64BitAddr(new_task);
948 #endif
949 new_task->all_image_info_addr = (mach_vm_address_t)0;
950 new_task->all_image_info_size = (mach_vm_size_t)0;
951
952 new_task->pset_hint = PROCESSOR_SET_NULL;
953
954 if (kernel_task == TASK_NULL) {
955 new_task->priority = BASEPRI_KERNEL;
956 new_task->max_priority = MAXPRI_KERNEL;
957 } else {
958 new_task->priority = BASEPRI_DEFAULT;
959 new_task->max_priority = MAXPRI_USER;
960 }
961 }
962
963 bzero(new_task->coalition, sizeof(new_task->coalition));
964 for (int i = 0; i < COALITION_NUM_TYPES; i++)
965 queue_chain_init(new_task->task_coalition[i]);
966
967 /* Allocate I/O Statistics */
968 new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
969 assert(new_task->task_io_stats != NULL);
970 bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
971
972 bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats));
973
974 bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
975 new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0;
976 new_task->task_gpu_ns = 0;
977
978 #if CONFIG_COALITIONS
979
980 /* TODO: there is no graceful failure path here... */
981 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
982 coalitions_adopt_task(parent_coalitions, new_task);
983 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
984 /*
985 * all tasks at least have a resource coalition, so
986 * if the parent has one then inherit all coalitions
987 * the parent is a part of
988 */
989 coalitions_adopt_task(parent_task->coalition, new_task);
990 } else {
991 /* TODO: assert that new_task will be PID 1 (launchd) */
992 coalitions_adopt_init_task(new_task);
993 }
994
995 if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
996 panic("created task is not a member of a resource coalition");
997 }
998 #endif /* CONFIG_COALITIONS */
999
1000 new_task->dispatchqueue_offset = 0;
1001 if (parent_task != NULL) {
1002 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1003 }
1004
1005 if (vm_backing_store_low && parent_task != NULL)
1006 new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
1007
1008 new_task->task_volatile_objects = 0;
1009 new_task->task_nonvolatile_objects = 0;
1010 new_task->task_purgeable_disowning = FALSE;
1011 new_task->task_purgeable_disowned = FALSE;
1012
1013 ipc_task_enable(new_task);
1014
1015 lck_mtx_lock(&tasks_threads_lock);
1016 queue_enter(&tasks, new_task, task_t, tasks);
1017 tasks_count++;
1018 lck_mtx_unlock(&tasks_threads_lock);
1019
1020 *child_task = new_task;
1021 return(KERN_SUCCESS);
1022 }
1023
1024 int task_dropped_imp_count = 0;
1025
1026 /*
1027 * task_deallocate:
1028 *
1029 * Drop a reference on a task.
1030 */
1031 void
1032 task_deallocate(
1033 task_t task)
1034 {
1035 ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1036 uint32_t refs;
1037
1038 if (task == TASK_NULL)
1039 return;
1040
1041 refs = task_deallocate_internal(task);
1042
1043 #if IMPORTANCE_INHERITANCE
1044 if (refs > 1)
1045 return;
1046
1047 if (refs == 1) {
1048 /*
1049 * If last ref potentially comes from the task's importance,
1050 * disconnect it. But more task refs may be added before
1051 * that completes, so wait for the reference to go to zero
1052 * naturually (it may happen on a recursive task_deallocate()
1053 * from the ipc_importance_disconnect_task() call).
1054 */
1055 if (IIT_NULL != task->task_imp_base)
1056 ipc_importance_disconnect_task(task);
1057 return;
1058 }
1059 #else
1060 if (refs > 0)
1061 return;
1062 #endif /* IMPORTANCE_INHERITANCE */
1063
1064 lck_mtx_lock(&tasks_threads_lock);
1065 queue_remove(&terminated_tasks, task, task_t, tasks);
1066 terminated_tasks_count--;
1067 lck_mtx_unlock(&tasks_threads_lock);
1068
1069 /*
1070 * remove the reference on atm descriptor
1071 */
1072 task_atm_reset(task);
1073
1074 #if CONFIG_BANK
1075 /*
1076 * remove the reference on bank context
1077 */
1078 if (task->bank_context != NULL) {
1079 bank_task_destroy(task->bank_context);
1080 task->bank_context = NULL;
1081 }
1082 #endif
1083
1084 if (task->task_io_stats)
1085 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1086
1087 /*
1088 * Give the machine dependent code a chance
1089 * to perform cleanup before ripping apart
1090 * the task.
1091 */
1092 machine_task_terminate(task);
1093
1094 ipc_task_terminate(task);
1095
1096 if (task->affinity_space)
1097 task_affinity_deallocate(task);
1098
1099 #if MACH_ASSERT
1100 if (task->ledger != NULL &&
1101 task->map != NULL &&
1102 task->map->pmap != NULL &&
1103 task->map->pmap->ledger != NULL) {
1104 assert(task->ledger == task->map->pmap->ledger);
1105 }
1106 #endif /* MACH_ASSERT */
1107
1108 vm_purgeable_disown(task);
1109 assert(task->task_purgeable_disowned);
1110 if (task->task_volatile_objects != 0 ||
1111 task->task_nonvolatile_objects != 0) {
1112 panic("task_deallocate(%p): "
1113 "volatile_objects=%d nonvolatile_objects=%d\n",
1114 task,
1115 task->task_volatile_objects,
1116 task->task_nonvolatile_objects);
1117 }
1118
1119 vm_map_deallocate(task->map);
1120 is_release(task->itk_space);
1121
1122 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1123 &interrupt_wakeups, &debit);
1124 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1125 &platform_idle_wakeups, &debit);
1126
1127 #if defined(CONFIG_SCHED_MULTIQ)
1128 sched_group_destroy(task->sched_group);
1129 #endif
1130
1131 /* Accumulate statistics for dead tasks */
1132 lck_spin_lock(&dead_task_statistics_lock);
1133 dead_task_statistics.total_user_time += task->total_user_time;
1134 dead_task_statistics.total_system_time += task->total_system_time;
1135
1136 dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1137 dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1138
1139 dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1140 dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1141
1142 lck_spin_unlock(&dead_task_statistics_lock);
1143 lck_mtx_destroy(&task->lock, &task_lck_grp);
1144
1145 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1146 &debit)) {
1147 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1148 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1149 }
1150 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1151 &debit)) {
1152 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1153 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1154 }
1155 ledger_dereference(task->ledger);
1156 zinfo_task_free(task);
1157
1158 #if TASK_REFERENCE_LEAK_DEBUG
1159 btlog_remove_entries_for_element(task_ref_btlog, task);
1160 #endif
1161
1162 #if CONFIG_COALITIONS
1163 if (!task->coalition[COALITION_TYPE_RESOURCE])
1164 panic("deallocating task was not a member of a resource coalition");
1165 task_release_coalitions(task);
1166 #endif /* CONFIG_COALITIONS */
1167
1168 bzero(task->coalition, sizeof(task->coalition));
1169
1170 #if MACH_BSD
1171 /* clean up collected information since last reference to task is gone */
1172 if (task->corpse_info) {
1173 task_crashinfo_destroy(task->corpse_info);
1174 task->corpse_info = NULL;
1175 }
1176 #endif
1177
1178 zfree(task_zone, task);
1179 }
1180
1181 /*
1182 * task_name_deallocate:
1183 *
1184 * Drop a reference on a task name.
1185 */
1186 void
1187 task_name_deallocate(
1188 task_name_t task_name)
1189 {
1190 return(task_deallocate((task_t)task_name));
1191 }
1192
1193 /*
1194 * task_suspension_token_deallocate:
1195 *
1196 * Drop a reference on a task suspension token.
1197 */
1198 void
1199 task_suspension_token_deallocate(
1200 task_suspension_token_t token)
1201 {
1202 return(task_deallocate((task_t)token));
1203 }
1204
1205
1206 /*
1207 * task_collect_crash_info:
1208 *
1209 * collect crash info from bsd and mach based data
1210 */
1211 kern_return_t
1212 task_collect_crash_info(task_t task)
1213 {
1214 kern_return_t kr = KERN_SUCCESS;
1215
1216 kcdata_descriptor_t crash_data = NULL;
1217 kcdata_descriptor_t crash_data_release = NULL;
1218 mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1219 mach_vm_offset_t crash_data_user_ptr = 0;
1220
1221 if (!corpses_enabled()) {
1222 return KERN_NOT_SUPPORTED;
1223 }
1224
1225 task_lock(task);
1226 assert(task->bsd_info != NULL);
1227 if (task->corpse_info == NULL && task->bsd_info != NULL) {
1228 task_unlock(task);
1229 /* map crash data memory in task's vm map */
1230 kr = mach_vm_allocate(task->map, &crash_data_user_ptr, size, (VM_MAKE_TAG(VM_MEMORY_CORPSEINFO) | VM_FLAGS_ANYWHERE));
1231
1232 if (kr != KERN_SUCCESS)
1233 goto out_no_lock;
1234
1235 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_user_ptr, size);
1236 if (crash_data) {
1237 task_lock(task);
1238 crash_data_release = task->corpse_info;
1239 task->corpse_info = crash_data;
1240 task_unlock(task);
1241 kr = KERN_SUCCESS;
1242 } else {
1243 /* if failed to create corpse info, free the mapping */
1244 if (KERN_SUCCESS != mach_vm_deallocate(task->map, crash_data_user_ptr, size)) {
1245 printf("mach_vm_deallocate failed to clear corpse_data for pid %d.\n", task_pid(task));
1246 }
1247 kr = KERN_FAILURE;
1248 }
1249
1250 if (crash_data_release != NULL) {
1251 task_crashinfo_destroy(crash_data_release);
1252 }
1253 } else {
1254 task_unlock(task);
1255 }
1256
1257 out_no_lock:
1258 return kr;
1259 }
1260
1261 /*
1262 * task_deliver_crash_notification:
1263 *
1264 * Makes outcall to registered host port for a corpse.
1265 */
1266 kern_return_t
1267 task_deliver_crash_notification(task_t task)
1268 {
1269 kcdata_descriptor_t crash_info = task->corpse_info;
1270 thread_t th_iter = NULL;
1271 kern_return_t kr = KERN_SUCCESS;
1272 wait_interrupt_t wsave;
1273 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1274
1275 if (crash_info == NULL)
1276 return KERN_FAILURE;
1277
1278 code[0] = crash_info->kcd_addr_begin;
1279 code[1] = crash_info->kcd_length;
1280
1281 task_lock(task);
1282 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1283 {
1284 ipc_thread_reset(th_iter);
1285 }
1286 task_unlock(task);
1287
1288 wsave = thread_interrupt_level(THREAD_UNINT);
1289 kr = exception_triage(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX);
1290 if (kr != KERN_SUCCESS) {
1291 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
1292 }
1293
1294 /*
1295 * crash reporting is done. Now release threads
1296 * for reaping by thread_terminate_daemon
1297 */
1298 task_lock(task);
1299 assert(task->active_thread_count == 0);
1300 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1301 {
1302 thread_mtx_lock(th_iter);
1303 assert(th_iter->inspection == TRUE);
1304 th_iter->inspection = FALSE;
1305 /* now that the corpse has been autopsied, dispose of the thread name */
1306 uthread_cleanup_name(th_iter->uthread);
1307 thread_mtx_unlock(th_iter);
1308 }
1309
1310 thread_terminate_crashed_threads();
1311 /* remove the pending corpse report flag */
1312 task_clear_corpse_pending_report(task);
1313
1314 task_unlock(task);
1315
1316 (void)thread_interrupt_level(wsave);
1317 task_terminate_internal(task);
1318
1319 return kr;
1320 }
1321
1322 /*
1323 * task_terminate:
1324 *
1325 * Terminate the specified task. See comments on thread_terminate
1326 * (kern/thread.c) about problems with terminating the "current task."
1327 */
1328
1329 kern_return_t
1330 task_terminate(
1331 task_t task)
1332 {
1333 if (task == TASK_NULL)
1334 return (KERN_INVALID_ARGUMENT);
1335
1336 if (task->bsd_info)
1337 return (KERN_FAILURE);
1338
1339 return (task_terminate_internal(task));
1340 }
1341
1342 #if MACH_ASSERT
1343 extern int proc_pid(struct proc *);
1344 extern void proc_name_kdp(task_t t, char *buf, int size);
1345 #endif /* MACH_ASSERT */
1346
1347 #define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
1348 static void
1349 __unused task_partial_reap(task_t task, __unused int pid)
1350 {
1351 unsigned int reclaimed_resident = 0;
1352 unsigned int reclaimed_compressed = 0;
1353 uint64_t task_page_count;
1354
1355 task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1356
1357 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
1358 pid, task_page_count, 0, 0, 0);
1359
1360 vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1361
1362 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
1363 pid, reclaimed_resident, reclaimed_compressed, 0, 0);
1364 }
1365
1366 kern_return_t
1367 task_mark_corpse(task_t task)
1368 {
1369 kern_return_t kr = KERN_SUCCESS;
1370 thread_t self_thread;
1371 (void) self_thread;
1372 wait_interrupt_t wsave;
1373
1374 assert(task != kernel_task);
1375 assert(task == current_task());
1376 assert(!task_is_a_corpse(task));
1377
1378 kr = task_collect_crash_info(task);
1379 if (kr != KERN_SUCCESS) {
1380 return kr;
1381 }
1382
1383 self_thread = current_thread();
1384
1385 wsave = thread_interrupt_level(THREAD_UNINT);
1386 task_lock(task);
1387
1388 task_set_corpse_pending_report(task);
1389 task_set_corpse(task);
1390
1391 kr = task_start_halt_locked(task, TRUE);
1392 assert(kr == KERN_SUCCESS);
1393 ipc_task_reset(task);
1394 ipc_task_enable(task);
1395
1396 task_unlock(task);
1397 /* terminate the ipc space */
1398 ipc_space_terminate(task->itk_space);
1399
1400 task_start_halt(task);
1401 thread_terminate_internal(self_thread);
1402 (void) thread_interrupt_level(wsave);
1403 assert(task->halting == TRUE);
1404 return kr;
1405 }
1406
1407 kern_return_t
1408 task_terminate_internal(
1409 task_t task)
1410 {
1411 thread_t thread, self;
1412 task_t self_task;
1413 boolean_t interrupt_save;
1414 int pid = 0;
1415
1416 assert(task != kernel_task);
1417
1418 self = current_thread();
1419 self_task = self->task;
1420
1421 /*
1422 * Get the task locked and make sure that we are not racing
1423 * with someone else trying to terminate us.
1424 */
1425 if (task == self_task)
1426 task_lock(task);
1427 else
1428 if (task < self_task) {
1429 task_lock(task);
1430 task_lock(self_task);
1431 }
1432 else {
1433 task_lock(self_task);
1434 task_lock(task);
1435 }
1436
1437 if (!task->active) {
1438 /*
1439 * Task is already being terminated.
1440 * Just return an error. If we are dying, this will
1441 * just get us to our AST special handler and that
1442 * will get us to finalize the termination of ourselves.
1443 */
1444 task_unlock(task);
1445 if (self_task != task)
1446 task_unlock(self_task);
1447
1448 return (KERN_FAILURE);
1449 }
1450
1451 if (task_corpse_pending_report(task)) {
1452 /*
1453 * Task is marked for reporting as corpse.
1454 * Just return an error. This will
1455 * just get us to our AST special handler and that
1456 * will get us to finish the path to death
1457 */
1458 task_unlock(task);
1459 if (self_task != task)
1460 task_unlock(self_task);
1461
1462 return (KERN_FAILURE);
1463 }
1464
1465 if (self_task != task)
1466 task_unlock(self_task);
1467
1468 /*
1469 * Make sure the current thread does not get aborted out of
1470 * the waits inside these operations.
1471 */
1472 interrupt_save = thread_interrupt_level(THREAD_UNINT);
1473
1474 /*
1475 * Indicate that we want all the threads to stop executing
1476 * at user space by holding the task (we would have held
1477 * each thread independently in thread_terminate_internal -
1478 * but this way we may be more likely to already find it
1479 * held there). Mark the task inactive, and prevent
1480 * further task operations via the task port.
1481 */
1482 task_hold_locked(task);
1483 task->active = FALSE;
1484 ipc_task_disable(task);
1485
1486 #if CONFIG_TELEMETRY
1487 /*
1488 * Notify telemetry that this task is going away.
1489 */
1490 telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
1491 #endif
1492
1493 /*
1494 * Terminate each thread in the task.
1495 */
1496 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1497 thread_terminate_internal(thread);
1498 }
1499
1500 #ifdef MACH_BSD
1501 if (task->bsd_info != NULL) {
1502 pid = proc_pid(task->bsd_info);
1503 }
1504 #endif /* MACH_BSD */
1505
1506 task_unlock(task);
1507
1508 proc_set_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE,
1509 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
1510
1511 /* Early object reap phase */
1512
1513 // PR-17045188: Revisit implementation
1514 // task_partial_reap(task, pid);
1515
1516
1517 /*
1518 * Destroy all synchronizers owned by the task.
1519 */
1520 task_synchronizer_destroy_all(task);
1521
1522 /*
1523 * Destroy the IPC space, leaving just a reference for it.
1524 */
1525 ipc_space_terminate(task->itk_space);
1526
1527 #if 00
1528 /* if some ledgers go negative on tear-down again... */
1529 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1530 task_ledgers.phys_footprint);
1531 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1532 task_ledgers.internal);
1533 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1534 task_ledgers.internal_compressed);
1535 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1536 task_ledgers.iokit_mapped);
1537 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1538 task_ledgers.alternate_accounting);
1539 ledger_disable_panic_on_negative(task->map->pmap->ledger,
1540 task_ledgers.alternate_accounting_compressed);
1541 #endif
1542
1543 /*
1544 * If the current thread is a member of the task
1545 * being terminated, then the last reference to
1546 * the task will not be dropped until the thread
1547 * is finally reaped. To avoid incurring the
1548 * expense of removing the address space regions
1549 * at reap time, we do it explictly here.
1550 */
1551
1552 vm_map_lock(task->map);
1553 vm_map_disable_hole_optimization(task->map);
1554 vm_map_unlock(task->map);
1555
1556 vm_map_remove(task->map,
1557 task->map->min_offset,
1558 task->map->max_offset,
1559 /* no unnesting on final cleanup: */
1560 VM_MAP_REMOVE_NO_UNNESTING);
1561
1562 /* release our shared region */
1563 vm_shared_region_set(task, NULL);
1564
1565
1566 #if MACH_ASSERT
1567 /*
1568 * Identify the pmap's process, in case the pmap ledgers drift
1569 * and we have to report it.
1570 */
1571 char procname[17];
1572 if (task->bsd_info) {
1573 pid = proc_pid(task->bsd_info);
1574 proc_name_kdp(task, procname, sizeof (procname));
1575 } else {
1576 pid = 0;
1577 strlcpy(procname, "<unknown>", sizeof (procname));
1578 }
1579 pmap_set_process(task->map->pmap, pid, procname);
1580 #endif /* MACH_ASSERT */
1581
1582 lck_mtx_lock(&tasks_threads_lock);
1583 queue_remove(&tasks, task, task_t, tasks);
1584 queue_enter(&terminated_tasks, task, task_t, tasks);
1585 tasks_count--;
1586 terminated_tasks_count++;
1587 lck_mtx_unlock(&tasks_threads_lock);
1588
1589 /*
1590 * We no longer need to guard against being aborted, so restore
1591 * the previous interruptible state.
1592 */
1593 thread_interrupt_level(interrupt_save);
1594
1595 #if KPERF
1596 /* force the task to release all ctrs */
1597 if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS)
1598 kpc_force_all_ctrs(task, 0);
1599 #endif
1600
1601 #if CONFIG_COALITIONS
1602 /*
1603 * Leave our coalitions. (drop activation but not reference)
1604 */
1605 coalitions_remove_task(task);
1606 #endif
1607
1608 /*
1609 * Get rid of the task active reference on itself.
1610 */
1611 task_deallocate(task);
1612
1613 return (KERN_SUCCESS);
1614 }
1615
1616 /*
1617 * task_start_halt:
1618 *
1619 * Shut the current task down (except for the current thread) in
1620 * preparation for dramatic changes to the task (probably exec).
1621 * We hold the task and mark all other threads in the task for
1622 * termination.
1623 */
1624 kern_return_t
1625 task_start_halt(task_t task)
1626 {
1627 kern_return_t kr = KERN_SUCCESS;
1628 task_lock(task);
1629 kr = task_start_halt_locked(task, FALSE);
1630 task_unlock(task);
1631 return kr;
1632 }
1633
1634 static kern_return_t
1635 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
1636 {
1637 thread_t thread, self;
1638 uint64_t dispatchqueue_offset;
1639
1640 assert(task != kernel_task);
1641
1642 self = current_thread();
1643
1644 if (task != self->task)
1645 return (KERN_INVALID_ARGUMENT);
1646
1647 if (task->halting || !task->active || !self->active) {
1648 /*
1649 * Task or current thread is already being terminated.
1650 * Hurry up and return out of the current kernel context
1651 * so that we run our AST special handler to terminate
1652 * ourselves.
1653 */
1654 return (KERN_FAILURE);
1655 }
1656
1657 task->halting = TRUE;
1658
1659 /*
1660 * Mark all the threads to keep them from starting any more
1661 * user-level execution. The thread_terminate_internal code
1662 * would do this on a thread by thread basis anyway, but this
1663 * gives us a better chance of not having to wait there.
1664 */
1665 task_hold_locked(task);
1666 dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
1667
1668 /*
1669 * Terminate all the other threads in the task.
1670 */
1671 queue_iterate(&task->threads, thread, thread_t, task_threads)
1672 {
1673 if (should_mark_corpse) {
1674 thread_mtx_lock(thread);
1675 thread->inspection = TRUE;
1676 thread_mtx_unlock(thread);
1677 }
1678 if (thread != self)
1679 thread_terminate_internal(thread);
1680 }
1681 task->dispatchqueue_offset = dispatchqueue_offset;
1682
1683 task_release_locked(task);
1684
1685 return KERN_SUCCESS;
1686 }
1687
1688
1689 /*
1690 * task_complete_halt:
1691 *
1692 * Complete task halt by waiting for threads to terminate, then clean
1693 * up task resources (VM, port namespace, etc...) and then let the
1694 * current thread go in the (practically empty) task context.
1695 */
1696 void
1697 task_complete_halt(task_t task)
1698 {
1699 task_lock(task);
1700 assert(task->halting);
1701 assert(task == current_task());
1702
1703 /*
1704 * Wait for the other threads to get shut down.
1705 * When the last other thread is reaped, we'll be
1706 * woken up.
1707 */
1708 if (task->thread_count > 1) {
1709 assert_wait((event_t)&task->halting, THREAD_UNINT);
1710 task_unlock(task);
1711 thread_block(THREAD_CONTINUE_NULL);
1712 } else {
1713 task_unlock(task);
1714 }
1715
1716 /*
1717 * Give the machine dependent code a chance
1718 * to perform cleanup of task-level resources
1719 * associated with the current thread before
1720 * ripping apart the task.
1721 */
1722 machine_task_terminate(task);
1723
1724 /*
1725 * Destroy all synchronizers owned by the task.
1726 */
1727 task_synchronizer_destroy_all(task);
1728
1729 /*
1730 * Destroy the contents of the IPC space, leaving just
1731 * a reference for it.
1732 */
1733 ipc_space_clean(task->itk_space);
1734
1735 /*
1736 * Clean out the address space, as we are going to be
1737 * getting a new one.
1738 */
1739 vm_map_remove(task->map, task->map->min_offset,
1740 task->map->max_offset,
1741 /* no unnesting on final cleanup: */
1742 VM_MAP_REMOVE_NO_UNNESTING);
1743
1744 task->halting = FALSE;
1745 }
1746
1747 /*
1748 * task_hold_locked:
1749 *
1750 * Suspend execution of the specified task.
1751 * This is a recursive-style suspension of the task, a count of
1752 * suspends is maintained.
1753 *
1754 * CONDITIONS: the task is locked and active.
1755 */
1756 void
1757 task_hold_locked(
1758 register task_t task)
1759 {
1760 register thread_t thread;
1761
1762 assert(task->active);
1763
1764 if (task->suspend_count++ > 0)
1765 return;
1766
1767 /*
1768 * Iterate through all the threads and hold them.
1769 */
1770 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1771 thread_mtx_lock(thread);
1772 thread_hold(thread);
1773 thread_mtx_unlock(thread);
1774 }
1775 }
1776
1777 /*
1778 * task_hold:
1779 *
1780 * Same as the internal routine above, except that is must lock
1781 * and verify that the task is active. This differs from task_suspend
1782 * in that it places a kernel hold on the task rather than just a
1783 * user-level hold. This keeps users from over resuming and setting
1784 * it running out from under the kernel.
1785 *
1786 * CONDITIONS: the caller holds a reference on the task
1787 */
1788 kern_return_t
1789 task_hold(
1790 register task_t task)
1791 {
1792 if (task == TASK_NULL)
1793 return (KERN_INVALID_ARGUMENT);
1794
1795 task_lock(task);
1796
1797 if (!task->active) {
1798 task_unlock(task);
1799
1800 return (KERN_FAILURE);
1801 }
1802
1803 task_hold_locked(task);
1804 task_unlock(task);
1805
1806 return (KERN_SUCCESS);
1807 }
1808
1809 kern_return_t
1810 task_wait(
1811 task_t task,
1812 boolean_t until_not_runnable)
1813 {
1814 if (task == TASK_NULL)
1815 return (KERN_INVALID_ARGUMENT);
1816
1817 task_lock(task);
1818
1819 if (!task->active) {
1820 task_unlock(task);
1821
1822 return (KERN_FAILURE);
1823 }
1824
1825 task_wait_locked(task, until_not_runnable);
1826 task_unlock(task);
1827
1828 return (KERN_SUCCESS);
1829 }
1830
1831 /*
1832 * task_wait_locked:
1833 *
1834 * Wait for all threads in task to stop.
1835 *
1836 * Conditions:
1837 * Called with task locked, active, and held.
1838 */
1839 void
1840 task_wait_locked(
1841 register task_t task,
1842 boolean_t until_not_runnable)
1843 {
1844 register thread_t thread, self;
1845
1846 assert(task->active);
1847 assert(task->suspend_count > 0);
1848
1849 self = current_thread();
1850
1851 /*
1852 * Iterate through all the threads and wait for them to
1853 * stop. Do not wait for the current thread if it is within
1854 * the task.
1855 */
1856 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1857 if (thread != self)
1858 thread_wait(thread, until_not_runnable);
1859 }
1860 }
1861
1862 /*
1863 * task_release_locked:
1864 *
1865 * Release a kernel hold on a task.
1866 *
1867 * CONDITIONS: the task is locked and active
1868 */
1869 void
1870 task_release_locked(
1871 register task_t task)
1872 {
1873 register thread_t thread;
1874
1875 assert(task->active);
1876 assert(task->suspend_count > 0);
1877
1878 if (--task->suspend_count > 0)
1879 return;
1880
1881 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1882 thread_mtx_lock(thread);
1883 thread_release(thread);
1884 thread_mtx_unlock(thread);
1885 }
1886 }
1887
1888 /*
1889 * task_release:
1890 *
1891 * Same as the internal routine above, except that it must lock
1892 * and verify that the task is active.
1893 *
1894 * CONDITIONS: The caller holds a reference to the task
1895 */
1896 kern_return_t
1897 task_release(
1898 task_t task)
1899 {
1900 if (task == TASK_NULL)
1901 return (KERN_INVALID_ARGUMENT);
1902
1903 task_lock(task);
1904
1905 if (!task->active) {
1906 task_unlock(task);
1907
1908 return (KERN_FAILURE);
1909 }
1910
1911 task_release_locked(task);
1912 task_unlock(task);
1913
1914 return (KERN_SUCCESS);
1915 }
1916
1917 kern_return_t
1918 task_threads(
1919 task_t task,
1920 thread_act_array_t *threads_out,
1921 mach_msg_type_number_t *count)
1922 {
1923 mach_msg_type_number_t actual;
1924 thread_t *thread_list;
1925 thread_t thread;
1926 vm_size_t size, size_needed;
1927 void *addr;
1928 unsigned int i, j;
1929
1930 if (task == TASK_NULL)
1931 return (KERN_INVALID_ARGUMENT);
1932
1933 size = 0; addr = NULL;
1934
1935 for (;;) {
1936 task_lock(task);
1937 if (!task->active) {
1938 task_unlock(task);
1939
1940 if (size != 0)
1941 kfree(addr, size);
1942
1943 return (KERN_FAILURE);
1944 }
1945
1946 actual = task->thread_count;
1947
1948 /* do we have the memory we need? */
1949 size_needed = actual * sizeof (mach_port_t);
1950 if (size_needed <= size)
1951 break;
1952
1953 /* unlock the task and allocate more memory */
1954 task_unlock(task);
1955
1956 if (size != 0)
1957 kfree(addr, size);
1958
1959 assert(size_needed > 0);
1960 size = size_needed;
1961
1962 addr = kalloc(size);
1963 if (addr == 0)
1964 return (KERN_RESOURCE_SHORTAGE);
1965 }
1966
1967 /* OK, have memory and the task is locked & active */
1968 thread_list = (thread_t *)addr;
1969
1970 i = j = 0;
1971
1972 for (thread = (thread_t)queue_first(&task->threads); i < actual;
1973 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
1974 thread_reference_internal(thread);
1975 thread_list[j++] = thread;
1976 }
1977
1978 assert(queue_end(&task->threads, (queue_entry_t)thread));
1979
1980 actual = j;
1981 size_needed = actual * sizeof (mach_port_t);
1982
1983 /* can unlock task now that we've got the thread refs */
1984 task_unlock(task);
1985
1986 if (actual == 0) {
1987 /* no threads, so return null pointer and deallocate memory */
1988
1989 *threads_out = NULL;
1990 *count = 0;
1991
1992 if (size != 0)
1993 kfree(addr, size);
1994 }
1995 else {
1996 /* if we allocated too much, must copy */
1997
1998 if (size_needed < size) {
1999 void *newaddr;
2000
2001 newaddr = kalloc(size_needed);
2002 if (newaddr == 0) {
2003 for (i = 0; i < actual; ++i)
2004 thread_deallocate(thread_list[i]);
2005 kfree(addr, size);
2006 return (KERN_RESOURCE_SHORTAGE);
2007 }
2008
2009 bcopy(addr, newaddr, size_needed);
2010 kfree(addr, size);
2011 thread_list = (thread_t *)newaddr;
2012 }
2013
2014 *threads_out = thread_list;
2015 *count = actual;
2016
2017 /* do the conversion that Mig should handle */
2018
2019 for (i = 0; i < actual; ++i)
2020 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
2021 }
2022
2023 return (KERN_SUCCESS);
2024 }
2025
2026 #define TASK_HOLD_NORMAL 0
2027 #define TASK_HOLD_PIDSUSPEND 1
2028 #define TASK_HOLD_LEGACY 2
2029 #define TASK_HOLD_LEGACY_ALL 3
2030
2031 static kern_return_t
2032 place_task_hold (
2033 register task_t task,
2034 int mode)
2035 {
2036 if (!task->active) {
2037 return (KERN_FAILURE);
2038 }
2039
2040 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2041 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
2042 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2043 task->user_stop_count, task->user_stop_count + 1, 0);
2044
2045 #if MACH_ASSERT
2046 current_task()->suspends_outstanding++;
2047 #endif
2048
2049 if (mode == TASK_HOLD_LEGACY)
2050 task->legacy_stop_count++;
2051
2052 if (task->user_stop_count++ > 0) {
2053 /*
2054 * If the stop count was positive, the task is
2055 * already stopped and we can exit.
2056 */
2057 return (KERN_SUCCESS);
2058 }
2059
2060 /*
2061 * Put a kernel-level hold on the threads in the task (all
2062 * user-level task suspensions added together represent a
2063 * single kernel-level hold). We then wait for the threads
2064 * to stop executing user code.
2065 */
2066 task_hold_locked(task);
2067 task_wait_locked(task, FALSE);
2068
2069 return (KERN_SUCCESS);
2070 }
2071
2072 static kern_return_t
2073 release_task_hold (
2074 register task_t task,
2075 int mode)
2076 {
2077 register boolean_t release = FALSE;
2078
2079 if (!task->active) {
2080 return (KERN_FAILURE);
2081 }
2082
2083 if (mode == TASK_HOLD_PIDSUSPEND) {
2084 if (task->pidsuspended == FALSE) {
2085 return (KERN_FAILURE);
2086 }
2087 task->pidsuspended = FALSE;
2088 }
2089
2090 if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
2091
2092 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2093 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
2094 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2095 task->user_stop_count, mode, task->legacy_stop_count);
2096
2097 #if MACH_ASSERT
2098 /*
2099 * This is obviously not robust; if we suspend one task and then resume a different one,
2100 * we'll fly under the radar. This is only meant to catch the common case of a crashed
2101 * or buggy suspender.
2102 */
2103 current_task()->suspends_outstanding--;
2104 #endif
2105
2106 if (mode == TASK_HOLD_LEGACY_ALL) {
2107 if (task->legacy_stop_count >= task->user_stop_count) {
2108 task->user_stop_count = 0;
2109 release = TRUE;
2110 } else {
2111 task->user_stop_count -= task->legacy_stop_count;
2112 }
2113 task->legacy_stop_count = 0;
2114 } else {
2115 if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
2116 task->legacy_stop_count--;
2117 if (--task->user_stop_count == 0)
2118 release = TRUE;
2119 }
2120 }
2121 else {
2122 return (KERN_FAILURE);
2123 }
2124
2125 /*
2126 * Release the task if necessary.
2127 */
2128 if (release)
2129 task_release_locked(task);
2130
2131 return (KERN_SUCCESS);
2132 }
2133
2134
2135 /*
2136 * task_suspend:
2137 *
2138 * Implement an (old-fashioned) user-level suspension on a task.
2139 *
2140 * Because the user isn't expecting to have to manage a suspension
2141 * token, we'll track it for him in the kernel in the form of a naked
2142 * send right to the task's resume port. All such send rights
2143 * account for a single suspension against the task (unlike task_suspend2()
2144 * where each caller gets a unique suspension count represented by a
2145 * unique send-once right).
2146 *
2147 * Conditions:
2148 * The caller holds a reference to the task
2149 */
2150 kern_return_t
2151 task_suspend(
2152 register task_t task)
2153 {
2154 kern_return_t kr;
2155 mach_port_t port, send, old_notify;
2156 mach_port_name_t name;
2157
2158 if (task == TASK_NULL || task == kernel_task)
2159 return (KERN_INVALID_ARGUMENT);
2160
2161 task_lock(task);
2162
2163 /*
2164 * Claim a send right on the task resume port, and request a no-senders
2165 * notification on that port (if none outstanding).
2166 */
2167 if (task->itk_resume == IP_NULL) {
2168 task->itk_resume = ipc_port_alloc_kernel();
2169 if (!IP_VALID(task->itk_resume))
2170 panic("failed to create resume port");
2171 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
2172 }
2173
2174 port = task->itk_resume;
2175 ip_lock(port);
2176 assert(ip_active(port));
2177
2178 send = ipc_port_make_send_locked(port);
2179 assert(IP_VALID(send));
2180
2181 if (port->ip_nsrequest == IP_NULL) {
2182 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2183 assert(old_notify == IP_NULL);
2184 /* port unlocked */
2185 } else {
2186 ip_unlock(port);
2187 }
2188
2189 /*
2190 * place a legacy hold on the task.
2191 */
2192 kr = place_task_hold(task, TASK_HOLD_LEGACY);
2193 if (kr != KERN_SUCCESS) {
2194 task_unlock(task);
2195 ipc_port_release_send(send);
2196 return kr;
2197 }
2198
2199 task_unlock(task);
2200
2201 /*
2202 * Copyout the send right into the calling task's IPC space. It won't know it is there,
2203 * but we'll look it up when calling a traditional resume. Any IPC operations that
2204 * deallocate the send right will auto-release the suspension.
2205 */
2206 if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
2207 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
2208 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
2209 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2210 task_pid(task), kr);
2211 return (kr);
2212 }
2213
2214 return (kr);
2215 }
2216
2217 /*
2218 * task_resume:
2219 * Release a user hold on a task.
2220 *
2221 * Conditions:
2222 * The caller holds a reference to the task
2223 */
2224 kern_return_t
2225 task_resume(
2226 register task_t task)
2227 {
2228 kern_return_t kr;
2229 mach_port_name_t resume_port_name;
2230 ipc_entry_t resume_port_entry;
2231 ipc_space_t space = current_task()->itk_space;
2232
2233 if (task == TASK_NULL || task == kernel_task )
2234 return (KERN_INVALID_ARGUMENT);
2235
2236 /* release a legacy task hold */
2237 task_lock(task);
2238 kr = release_task_hold(task, TASK_HOLD_LEGACY);
2239 task_unlock(task);
2240
2241 is_write_lock(space);
2242 if (is_active(space) && IP_VALID(task->itk_resume) &&
2243 ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
2244 /*
2245 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
2246 * we are holding one less legacy hold on the task from this caller. If the release failed,
2247 * go ahead and drop all the rights, as someone either already released our holds or the task
2248 * is gone.
2249 */
2250 if (kr == KERN_SUCCESS)
2251 ipc_right_dealloc(space, resume_port_name, resume_port_entry);
2252 else
2253 ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
2254 /* space unlocked */
2255 } else {
2256 is_write_unlock(space);
2257 if (kr == KERN_SUCCESS)
2258 printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
2259 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2260 task_pid(task));
2261 }
2262
2263 return kr;
2264 }
2265
2266 /*
2267 * Suspend the target task.
2268 * Making/holding a token/reference/port is the callers responsibility.
2269 */
2270 kern_return_t
2271 task_suspend_internal(task_t task)
2272 {
2273 kern_return_t kr;
2274
2275 if (task == TASK_NULL || task == kernel_task)
2276 return (KERN_INVALID_ARGUMENT);
2277
2278 task_lock(task);
2279 kr = place_task_hold(task, TASK_HOLD_NORMAL);
2280 task_unlock(task);
2281 return (kr);
2282 }
2283
2284 /*
2285 * Suspend the target task, and return a suspension token. The token
2286 * represents a reference on the suspended task.
2287 */
2288 kern_return_t
2289 task_suspend2(
2290 register task_t task,
2291 task_suspension_token_t *suspend_token)
2292 {
2293 kern_return_t kr;
2294
2295 kr = task_suspend_internal(task);
2296 if (kr != KERN_SUCCESS) {
2297 *suspend_token = TASK_NULL;
2298 return (kr);
2299 }
2300
2301 /*
2302 * Take a reference on the target task and return that to the caller
2303 * as a "suspension token," which can be converted into an SO right to
2304 * the now-suspended task's resume port.
2305 */
2306 task_reference_internal(task);
2307 *suspend_token = task;
2308
2309 return (KERN_SUCCESS);
2310 }
2311
2312 /*
2313 * Resume the task
2314 * (reference/token/port management is caller's responsibility).
2315 */
2316 kern_return_t
2317 task_resume_internal(
2318 register task_suspension_token_t task)
2319 {
2320 kern_return_t kr;
2321
2322 if (task == TASK_NULL || task == kernel_task)
2323 return (KERN_INVALID_ARGUMENT);
2324
2325 task_lock(task);
2326 kr = release_task_hold(task, TASK_HOLD_NORMAL);
2327 task_unlock(task);
2328 return (kr);
2329 }
2330
2331 /*
2332 * Resume the task using a suspension token. Consumes the token's ref.
2333 */
2334 kern_return_t
2335 task_resume2(
2336 register task_suspension_token_t task)
2337 {
2338 kern_return_t kr;
2339
2340 kr = task_resume_internal(task);
2341 task_suspension_token_deallocate(task);
2342
2343 return (kr);
2344 }
2345
2346 boolean_t
2347 task_suspension_notify(mach_msg_header_t *request_header)
2348 {
2349 ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
2350 task_t task = convert_port_to_task_suspension_token(port);
2351 mach_msg_type_number_t not_count;
2352
2353 if (task == TASK_NULL || task == kernel_task)
2354 return TRUE; /* nothing to do */
2355
2356 switch (request_header->msgh_id) {
2357
2358 case MACH_NOTIFY_SEND_ONCE:
2359 /* release the hold held by this specific send-once right */
2360 task_lock(task);
2361 release_task_hold(task, TASK_HOLD_NORMAL);
2362 task_unlock(task);
2363 break;
2364
2365 case MACH_NOTIFY_NO_SENDERS:
2366 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
2367
2368 task_lock(task);
2369 ip_lock(port);
2370 if (port->ip_mscount == not_count) {
2371
2372 /* release all the [remaining] outstanding legacy holds */
2373 assert(port->ip_nsrequest == IP_NULL);
2374 ip_unlock(port);
2375 release_task_hold(task, TASK_HOLD_LEGACY_ALL);
2376 task_unlock(task);
2377
2378 } else if (port->ip_nsrequest == IP_NULL) {
2379 ipc_port_t old_notify;
2380
2381 task_unlock(task);
2382 /* new send rights, re-arm notification at current make-send count */
2383 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2384 assert(old_notify == IP_NULL);
2385 /* port unlocked */
2386 } else {
2387 ip_unlock(port);
2388 task_unlock(task);
2389 }
2390 break;
2391
2392 default:
2393 break;
2394 }
2395
2396 task_suspension_token_deallocate(task); /* drop token reference */
2397 return TRUE;
2398 }
2399
2400 kern_return_t
2401 task_pidsuspend_locked(task_t task)
2402 {
2403 kern_return_t kr;
2404
2405 if (task->pidsuspended) {
2406 kr = KERN_FAILURE;
2407 goto out;
2408 }
2409
2410 task->pidsuspended = TRUE;
2411
2412 kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
2413 if (kr != KERN_SUCCESS) {
2414 task->pidsuspended = FALSE;
2415 }
2416 out:
2417 return(kr);
2418 }
2419
2420
2421 /*
2422 * task_pidsuspend:
2423 *
2424 * Suspends a task by placing a hold on its threads.
2425 *
2426 * Conditions:
2427 * The caller holds a reference to the task
2428 */
2429 kern_return_t
2430 task_pidsuspend(
2431 register task_t task)
2432 {
2433 kern_return_t kr;
2434
2435 if (task == TASK_NULL || task == kernel_task)
2436 return (KERN_INVALID_ARGUMENT);
2437
2438 task_lock(task);
2439
2440 kr = task_pidsuspend_locked(task);
2441
2442 task_unlock(task);
2443
2444 return (kr);
2445 }
2446
2447 /* If enabled, we bring all the frozen pages back in prior to resumption; otherwise, they're faulted back in on demand */
2448 #define THAW_ON_RESUME 1
2449
2450 /*
2451 * task_pidresume:
2452 * Resumes a previously suspended task.
2453 *
2454 * Conditions:
2455 * The caller holds a reference to the task
2456 */
2457 kern_return_t
2458 task_pidresume(
2459 register task_t task)
2460 {
2461 kern_return_t kr;
2462
2463 if (task == TASK_NULL || task == kernel_task)
2464 return (KERN_INVALID_ARGUMENT);
2465
2466 task_lock(task);
2467
2468 #if (CONFIG_FREEZE && THAW_ON_RESUME)
2469
2470 while (task->changing_freeze_state) {
2471
2472 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2473 task_unlock(task);
2474 thread_block(THREAD_CONTINUE_NULL);
2475
2476 task_lock(task);
2477 }
2478 task->changing_freeze_state = TRUE;
2479 #endif
2480
2481 kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
2482
2483 task_unlock(task);
2484
2485 #if (CONFIG_FREEZE && THAW_ON_RESUME)
2486 if ((kr == KERN_SUCCESS) && (task->frozen == TRUE)) {
2487
2488 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2489
2490 kr = KERN_SUCCESS;
2491 } else {
2492
2493 kr = vm_map_thaw(task->map);
2494 }
2495 }
2496 task_lock(task);
2497
2498 if (kr == KERN_SUCCESS)
2499 task->frozen = FALSE;
2500 task->changing_freeze_state = FALSE;
2501 thread_wakeup(&task->changing_freeze_state);
2502
2503 task_unlock(task);
2504 #endif
2505
2506 return (kr);
2507 }
2508
2509 #if CONFIG_FREEZE
2510
2511 /*
2512 * task_freeze:
2513 *
2514 * Freeze a task.
2515 *
2516 * Conditions:
2517 * The caller holds a reference to the task
2518 */
2519 extern void vm_wake_compactor_swapper();
2520 extern queue_head_t c_swapout_list_head;
2521
2522 kern_return_t
2523 task_freeze(
2524 register task_t task,
2525 uint32_t *purgeable_count,
2526 uint32_t *wired_count,
2527 uint32_t *clean_count,
2528 uint32_t *dirty_count,
2529 uint32_t dirty_budget,
2530 boolean_t *shared,
2531 boolean_t walk_only)
2532 {
2533 kern_return_t kr;
2534
2535 if (task == TASK_NULL || task == kernel_task)
2536 return (KERN_INVALID_ARGUMENT);
2537
2538 task_lock(task);
2539
2540 while (task->changing_freeze_state) {
2541
2542 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2543 task_unlock(task);
2544 thread_block(THREAD_CONTINUE_NULL);
2545
2546 task_lock(task);
2547 }
2548 if (task->frozen) {
2549 task_unlock(task);
2550 return (KERN_FAILURE);
2551 }
2552 task->changing_freeze_state = TRUE;
2553
2554 task_unlock(task);
2555
2556 if (walk_only) {
2557 kr = vm_map_freeze_walk(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2558 } else {
2559 kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2560 }
2561
2562 task_lock(task);
2563
2564 if (walk_only == FALSE && kr == KERN_SUCCESS)
2565 task->frozen = TRUE;
2566 task->changing_freeze_state = FALSE;
2567 thread_wakeup(&task->changing_freeze_state);
2568
2569 task_unlock(task);
2570
2571 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2572 vm_wake_compactor_swapper();
2573 /*
2574 * We do an explicit wakeup of the swapout thread here
2575 * because the compact_and_swap routines don't have
2576 * knowledge about these kind of "per-task packed c_segs"
2577 * and so will not be evaluating whether we need to do
2578 * a wakeup there.
2579 */
2580 thread_wakeup((event_t)&c_swapout_list_head);
2581 }
2582
2583 return (kr);
2584 }
2585
2586 /*
2587 * task_thaw:
2588 *
2589 * Thaw a currently frozen task.
2590 *
2591 * Conditions:
2592 * The caller holds a reference to the task
2593 */
2594 kern_return_t
2595 task_thaw(
2596 register task_t task)
2597 {
2598 kern_return_t kr;
2599
2600 if (task == TASK_NULL || task == kernel_task)
2601 return (KERN_INVALID_ARGUMENT);
2602
2603 task_lock(task);
2604
2605 while (task->changing_freeze_state) {
2606
2607 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2608 task_unlock(task);
2609 thread_block(THREAD_CONTINUE_NULL);
2610
2611 task_lock(task);
2612 }
2613 if (!task->frozen) {
2614 task_unlock(task);
2615 return (KERN_FAILURE);
2616 }
2617 task->changing_freeze_state = TRUE;
2618
2619 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) {
2620 task_unlock(task);
2621
2622 kr = vm_map_thaw(task->map);
2623
2624 task_lock(task);
2625
2626 if (kr == KERN_SUCCESS)
2627 task->frozen = FALSE;
2628 } else {
2629 task->frozen = FALSE;
2630 kr = KERN_SUCCESS;
2631 }
2632
2633 task->changing_freeze_state = FALSE;
2634 thread_wakeup(&task->changing_freeze_state);
2635
2636 task_unlock(task);
2637
2638 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2639 vm_wake_compactor_swapper();
2640 }
2641
2642 return (kr);
2643 }
2644
2645 #endif /* CONFIG_FREEZE */
2646
2647 kern_return_t
2648 host_security_set_task_token(
2649 host_security_t host_security,
2650 task_t task,
2651 security_token_t sec_token,
2652 audit_token_t audit_token,
2653 host_priv_t host_priv)
2654 {
2655 ipc_port_t host_port;
2656 kern_return_t kr;
2657
2658 if (task == TASK_NULL)
2659 return(KERN_INVALID_ARGUMENT);
2660
2661 if (host_security == HOST_NULL)
2662 return(KERN_INVALID_SECURITY);
2663
2664 task_lock(task);
2665 task->sec_token = sec_token;
2666 task->audit_token = audit_token;
2667
2668 task_unlock(task);
2669
2670 if (host_priv != HOST_PRIV_NULL) {
2671 kr = host_get_host_priv_port(host_priv, &host_port);
2672 } else {
2673 kr = host_get_host_port(host_priv_self(), &host_port);
2674 }
2675 assert(kr == KERN_SUCCESS);
2676 kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
2677 return(kr);
2678 }
2679
2680 kern_return_t
2681 task_send_trace_memory(
2682 task_t target_task,
2683 __unused uint32_t pid,
2684 __unused uint64_t uniqueid)
2685 {
2686 kern_return_t kr = KERN_INVALID_ARGUMENT;
2687 if (target_task == TASK_NULL)
2688 return (KERN_INVALID_ARGUMENT);
2689
2690 #if CONFIG_ATM
2691 kr = atm_send_proc_inspect_notification(target_task,
2692 pid,
2693 uniqueid);
2694
2695 #endif
2696 return (kr);
2697 }
2698 /*
2699 * This routine was added, pretty much exclusively, for registering the
2700 * RPC glue vector for in-kernel short circuited tasks. Rather than
2701 * removing it completely, I have only disabled that feature (which was
2702 * the only feature at the time). It just appears that we are going to
2703 * want to add some user data to tasks in the future (i.e. bsd info,
2704 * task names, etc...), so I left it in the formal task interface.
2705 */
2706 kern_return_t
2707 task_set_info(
2708 task_t task,
2709 task_flavor_t flavor,
2710 __unused task_info_t task_info_in, /* pointer to IN array */
2711 __unused mach_msg_type_number_t task_info_count)
2712 {
2713 if (task == TASK_NULL)
2714 return(KERN_INVALID_ARGUMENT);
2715
2716 switch (flavor) {
2717
2718 #if CONFIG_ATM
2719 case TASK_TRACE_MEMORY_INFO:
2720 {
2721 if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
2722 return (KERN_INVALID_ARGUMENT);
2723
2724 assert(task_info_in != NULL);
2725 task_trace_memory_info_t mem_info;
2726 mem_info = (task_trace_memory_info_t) task_info_in;
2727 kern_return_t kr = atm_register_trace_memory(task,
2728 mem_info->user_memory_address,
2729 mem_info->buffer_size);
2730 return kr;
2731 break;
2732 }
2733
2734 #endif
2735 default:
2736 return (KERN_INVALID_ARGUMENT);
2737 }
2738 return (KERN_SUCCESS);
2739 }
2740
2741 int radar_20146450 = 1;
2742 kern_return_t
2743 task_info(
2744 task_t task,
2745 task_flavor_t flavor,
2746 task_info_t task_info_out,
2747 mach_msg_type_number_t *task_info_count)
2748 {
2749 kern_return_t error = KERN_SUCCESS;
2750
2751 if (task == TASK_NULL)
2752 return (KERN_INVALID_ARGUMENT);
2753
2754 task_lock(task);
2755
2756 if ((task != current_task()) && (!task->active)) {
2757 task_unlock(task);
2758 return (KERN_INVALID_ARGUMENT);
2759 }
2760
2761 switch (flavor) {
2762
2763 case TASK_BASIC_INFO_32:
2764 case TASK_BASIC2_INFO_32:
2765 {
2766 task_basic_info_32_t basic_info;
2767 vm_map_t map;
2768 clock_sec_t secs;
2769 clock_usec_t usecs;
2770
2771 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
2772 error = KERN_INVALID_ARGUMENT;
2773 break;
2774 }
2775
2776 basic_info = (task_basic_info_32_t)task_info_out;
2777
2778 map = (task == kernel_task)? kernel_map: task->map;
2779 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
2780 if (flavor == TASK_BASIC2_INFO_32) {
2781 /*
2782 * The "BASIC2" flavor gets the maximum resident
2783 * size instead of the current resident size...
2784 */
2785 basic_info->resident_size = pmap_resident_max(map->pmap);
2786 } else {
2787 basic_info->resident_size = pmap_resident_count(map->pmap);
2788 }
2789 basic_info->resident_size *= PAGE_SIZE;
2790
2791 basic_info->policy = ((task != kernel_task)?
2792 POLICY_TIMESHARE: POLICY_RR);
2793 basic_info->suspend_count = task->user_stop_count;
2794
2795 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2796 basic_info->user_time.seconds =
2797 (typeof(basic_info->user_time.seconds))secs;
2798 basic_info->user_time.microseconds = usecs;
2799
2800 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2801 basic_info->system_time.seconds =
2802 (typeof(basic_info->system_time.seconds))secs;
2803 basic_info->system_time.microseconds = usecs;
2804
2805 *task_info_count = TASK_BASIC_INFO_32_COUNT;
2806 break;
2807 }
2808
2809 case TASK_BASIC_INFO_64:
2810 {
2811 task_basic_info_64_t basic_info;
2812 vm_map_t map;
2813 clock_sec_t secs;
2814 clock_usec_t usecs;
2815
2816 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
2817 error = KERN_INVALID_ARGUMENT;
2818 break;
2819 }
2820
2821 basic_info = (task_basic_info_64_t)task_info_out;
2822
2823 map = (task == kernel_task)? kernel_map: task->map;
2824 basic_info->virtual_size = map->size;
2825 basic_info->resident_size =
2826 (mach_vm_size_t)(pmap_resident_count(map->pmap))
2827 * PAGE_SIZE_64;
2828
2829 basic_info->policy = ((task != kernel_task)?
2830 POLICY_TIMESHARE: POLICY_RR);
2831 basic_info->suspend_count = task->user_stop_count;
2832
2833 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2834 basic_info->user_time.seconds =
2835 (typeof(basic_info->user_time.seconds))secs;
2836 basic_info->user_time.microseconds = usecs;
2837
2838 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2839 basic_info->system_time.seconds =
2840 (typeof(basic_info->system_time.seconds))secs;
2841 basic_info->system_time.microseconds = usecs;
2842
2843 *task_info_count = TASK_BASIC_INFO_64_COUNT;
2844 break;
2845 }
2846
2847 case MACH_TASK_BASIC_INFO:
2848 {
2849 mach_task_basic_info_t basic_info;
2850 vm_map_t map;
2851 clock_sec_t secs;
2852 clock_usec_t usecs;
2853
2854 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
2855 error = KERN_INVALID_ARGUMENT;
2856 break;
2857 }
2858
2859 basic_info = (mach_task_basic_info_t)task_info_out;
2860
2861 map = (task == kernel_task) ? kernel_map : task->map;
2862
2863 basic_info->virtual_size = map->size;
2864
2865 basic_info->resident_size =
2866 (mach_vm_size_t)(pmap_resident_count(map->pmap));
2867 basic_info->resident_size *= PAGE_SIZE_64;
2868
2869 basic_info->resident_size_max =
2870 (mach_vm_size_t)(pmap_resident_max(map->pmap));
2871 basic_info->resident_size_max *= PAGE_SIZE_64;
2872
2873 basic_info->policy = ((task != kernel_task) ?
2874 POLICY_TIMESHARE : POLICY_RR);
2875
2876 basic_info->suspend_count = task->user_stop_count;
2877
2878 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2879 basic_info->user_time.seconds =
2880 (typeof(basic_info->user_time.seconds))secs;
2881 basic_info->user_time.microseconds = usecs;
2882
2883 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2884 basic_info->system_time.seconds =
2885 (typeof(basic_info->system_time.seconds))secs;
2886 basic_info->system_time.microseconds = usecs;
2887
2888 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
2889 break;
2890 }
2891
2892 case TASK_THREAD_TIMES_INFO:
2893 {
2894 register task_thread_times_info_t times_info;
2895 register thread_t thread;
2896
2897 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
2898 error = KERN_INVALID_ARGUMENT;
2899 break;
2900 }
2901
2902 times_info = (task_thread_times_info_t) task_info_out;
2903 times_info->user_time.seconds = 0;
2904 times_info->user_time.microseconds = 0;
2905 times_info->system_time.seconds = 0;
2906 times_info->system_time.microseconds = 0;
2907
2908
2909 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2910 time_value_t user_time, system_time;
2911
2912 if (thread->options & TH_OPT_IDLE_THREAD)
2913 continue;
2914
2915 thread_read_times(thread, &user_time, &system_time);
2916
2917 time_value_add(&times_info->user_time, &user_time);
2918 time_value_add(&times_info->system_time, &system_time);
2919 }
2920
2921 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
2922 break;
2923 }
2924
2925 case TASK_ABSOLUTETIME_INFO:
2926 {
2927 task_absolutetime_info_t info;
2928 register thread_t thread;
2929
2930 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
2931 error = KERN_INVALID_ARGUMENT;
2932 break;
2933 }
2934
2935 info = (task_absolutetime_info_t)task_info_out;
2936 info->threads_user = info->threads_system = 0;
2937
2938
2939 info->total_user = task->total_user_time;
2940 info->total_system = task->total_system_time;
2941
2942 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2943 uint64_t tval;
2944 spl_t x;
2945
2946 if (thread->options & TH_OPT_IDLE_THREAD)
2947 continue;
2948
2949 x = splsched();
2950 thread_lock(thread);
2951
2952 tval = timer_grab(&thread->user_timer);
2953 info->threads_user += tval;
2954 info->total_user += tval;
2955
2956 tval = timer_grab(&thread->system_timer);
2957 if (thread->precise_user_kernel_time) {
2958 info->threads_system += tval;
2959 info->total_system += tval;
2960 } else {
2961 /* system_timer may represent either sys or user */
2962 info->threads_user += tval;
2963 info->total_user += tval;
2964 }
2965
2966 thread_unlock(thread);
2967 splx(x);
2968 }
2969
2970
2971 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
2972 break;
2973 }
2974
2975 case TASK_DYLD_INFO:
2976 {
2977 task_dyld_info_t info;
2978
2979 /*
2980 * We added the format field to TASK_DYLD_INFO output. For
2981 * temporary backward compatibility, accept the fact that
2982 * clients may ask for the old version - distinquished by the
2983 * size of the expected result structure.
2984 */
2985 #define TASK_LEGACY_DYLD_INFO_COUNT \
2986 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
2987
2988 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
2989 error = KERN_INVALID_ARGUMENT;
2990 break;
2991 }
2992
2993 info = (task_dyld_info_t)task_info_out;
2994 info->all_image_info_addr = task->all_image_info_addr;
2995 info->all_image_info_size = task->all_image_info_size;
2996
2997 /* only set format on output for those expecting it */
2998 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
2999 info->all_image_info_format = task_has_64BitAddr(task) ?
3000 TASK_DYLD_ALL_IMAGE_INFO_64 :
3001 TASK_DYLD_ALL_IMAGE_INFO_32 ;
3002 *task_info_count = TASK_DYLD_INFO_COUNT;
3003 } else {
3004 *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
3005 }
3006 break;
3007 }
3008
3009 case TASK_EXTMOD_INFO:
3010 {
3011 task_extmod_info_t info;
3012 void *p;
3013
3014 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
3015 error = KERN_INVALID_ARGUMENT;
3016 break;
3017 }
3018
3019 info = (task_extmod_info_t)task_info_out;
3020
3021 p = get_bsdtask_info(task);
3022 if (p) {
3023 proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
3024 } else {
3025 bzero(info->task_uuid, sizeof(info->task_uuid));
3026 }
3027 info->extmod_statistics = task->extmod_statistics;
3028 *task_info_count = TASK_EXTMOD_INFO_COUNT;
3029
3030 break;
3031 }
3032
3033 case TASK_KERNELMEMORY_INFO:
3034 {
3035 task_kernelmemory_info_t tkm_info;
3036 ledger_amount_t credit, debit;
3037
3038 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
3039 error = KERN_INVALID_ARGUMENT;
3040 break;
3041 }
3042
3043 tkm_info = (task_kernelmemory_info_t) task_info_out;
3044 tkm_info->total_palloc = 0;
3045 tkm_info->total_pfree = 0;
3046 tkm_info->total_salloc = 0;
3047 tkm_info->total_sfree = 0;
3048
3049 if (task == kernel_task) {
3050 /*
3051 * All shared allocs/frees from other tasks count against
3052 * the kernel private memory usage. If we are looking up
3053 * info for the kernel task, gather from everywhere.
3054 */
3055 task_unlock(task);
3056
3057 /* start by accounting for all the terminated tasks against the kernel */
3058 tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
3059 tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
3060
3061 /* count all other task/thread shared alloc/free against the kernel */
3062 lck_mtx_lock(&tasks_threads_lock);
3063
3064 /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
3065 queue_iterate(&tasks, task, task_t, tasks) {
3066 if (task == kernel_task) {
3067 if (ledger_get_entries(task->ledger,
3068 task_ledgers.tkm_private, &credit,
3069 &debit) == KERN_SUCCESS) {
3070 tkm_info->total_palloc += credit;
3071 tkm_info->total_pfree += debit;
3072 }
3073 }
3074 if (!ledger_get_entries(task->ledger,
3075 task_ledgers.tkm_shared, &credit, &debit)) {
3076 tkm_info->total_palloc += credit;
3077 tkm_info->total_pfree += debit;
3078 }
3079 }
3080 lck_mtx_unlock(&tasks_threads_lock);
3081 } else {
3082 if (!ledger_get_entries(task->ledger,
3083 task_ledgers.tkm_private, &credit, &debit)) {
3084 tkm_info->total_palloc = credit;
3085 tkm_info->total_pfree = debit;
3086 }
3087 if (!ledger_get_entries(task->ledger,
3088 task_ledgers.tkm_shared, &credit, &debit)) {
3089 tkm_info->total_salloc = credit;
3090 tkm_info->total_sfree = debit;
3091 }
3092 task_unlock(task);
3093 }
3094
3095 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
3096 return KERN_SUCCESS;
3097 }
3098
3099 /* OBSOLETE */
3100 case TASK_SCHED_FIFO_INFO:
3101 {
3102
3103 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
3104 error = KERN_INVALID_ARGUMENT;
3105 break;
3106 }
3107
3108 error = KERN_INVALID_POLICY;
3109 break;
3110 }
3111
3112 /* OBSOLETE */
3113 case TASK_SCHED_RR_INFO:
3114 {
3115 register policy_rr_base_t rr_base;
3116 uint32_t quantum_time;
3117 uint64_t quantum_ns;
3118
3119 if (*task_info_count < POLICY_RR_BASE_COUNT) {
3120 error = KERN_INVALID_ARGUMENT;
3121 break;
3122 }
3123
3124 rr_base = (policy_rr_base_t) task_info_out;
3125
3126 if (task != kernel_task) {
3127 error = KERN_INVALID_POLICY;
3128 break;
3129 }
3130
3131 rr_base->base_priority = task->priority;
3132
3133 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
3134 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
3135
3136 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
3137
3138 *task_info_count = POLICY_RR_BASE_COUNT;
3139 break;
3140 }
3141
3142 /* OBSOLETE */
3143 case TASK_SCHED_TIMESHARE_INFO:
3144 {
3145 register policy_timeshare_base_t ts_base;
3146
3147 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
3148 error = KERN_INVALID_ARGUMENT;
3149 break;
3150 }
3151
3152 ts_base = (policy_timeshare_base_t) task_info_out;
3153
3154 if (task == kernel_task) {
3155 error = KERN_INVALID_POLICY;
3156 break;
3157 }
3158
3159 ts_base->base_priority = task->priority;
3160
3161 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
3162 break;
3163 }
3164
3165 case TASK_SECURITY_TOKEN:
3166 {
3167 register security_token_t *sec_token_p;
3168
3169 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
3170 error = KERN_INVALID_ARGUMENT;
3171 break;
3172 }
3173
3174 sec_token_p = (security_token_t *) task_info_out;
3175
3176 *sec_token_p = task->sec_token;
3177
3178 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
3179 break;
3180 }
3181
3182 case TASK_AUDIT_TOKEN:
3183 {
3184 register audit_token_t *audit_token_p;
3185
3186 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
3187 error = KERN_INVALID_ARGUMENT;
3188 break;
3189 }
3190
3191 audit_token_p = (audit_token_t *) task_info_out;
3192
3193 *audit_token_p = task->audit_token;
3194
3195 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
3196 break;
3197 }
3198
3199 case TASK_SCHED_INFO:
3200 error = KERN_INVALID_ARGUMENT;
3201 break;
3202
3203 case TASK_EVENTS_INFO:
3204 {
3205 register task_events_info_t events_info;
3206 register thread_t thread;
3207
3208 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
3209 error = KERN_INVALID_ARGUMENT;
3210 break;
3211 }
3212
3213 events_info = (task_events_info_t) task_info_out;
3214
3215
3216 events_info->faults = task->faults;
3217 events_info->pageins = task->pageins;
3218 events_info->cow_faults = task->cow_faults;
3219 events_info->messages_sent = task->messages_sent;
3220 events_info->messages_received = task->messages_received;
3221 events_info->syscalls_mach = task->syscalls_mach;
3222 events_info->syscalls_unix = task->syscalls_unix;
3223
3224 events_info->csw = task->c_switch;
3225
3226 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3227 events_info->csw += thread->c_switch;
3228 events_info->syscalls_mach += thread->syscalls_mach;
3229 events_info->syscalls_unix += thread->syscalls_unix;
3230 }
3231
3232
3233 *task_info_count = TASK_EVENTS_INFO_COUNT;
3234 break;
3235 }
3236 case TASK_AFFINITY_TAG_INFO:
3237 {
3238 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
3239 error = KERN_INVALID_ARGUMENT;
3240 break;
3241 }
3242
3243 error = task_affinity_info(task, task_info_out, task_info_count);
3244 break;
3245 }
3246 case TASK_POWER_INFO:
3247 {
3248 if (*task_info_count < TASK_POWER_INFO_COUNT) {
3249 error = KERN_INVALID_ARGUMENT;
3250 break;
3251 }
3252
3253 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL);
3254 break;
3255 }
3256
3257 case TASK_POWER_INFO_V2:
3258 {
3259 if (*task_info_count < TASK_POWER_INFO_V2_COUNT) {
3260 error = KERN_INVALID_ARGUMENT;
3261 break;
3262 }
3263 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
3264 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy);
3265 break;
3266 }
3267
3268 case TASK_VM_INFO:
3269 case TASK_VM_INFO_PURGEABLE:
3270 {
3271 task_vm_info_t vm_info;
3272 vm_map_t map;
3273
3274 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
3275 error = KERN_INVALID_ARGUMENT;
3276 break;
3277 }
3278
3279 vm_info = (task_vm_info_t)task_info_out;
3280
3281 if (task == kernel_task) {
3282 map = kernel_map;
3283 /* no lock */
3284 } else {
3285 map = task->map;
3286 vm_map_lock_read(map);
3287 }
3288
3289 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
3290 vm_info->region_count = map->hdr.nentries;
3291 vm_info->page_size = vm_map_page_size(map);
3292
3293 vm_info->resident_size = pmap_resident_count(map->pmap);
3294 vm_info->resident_size *= PAGE_SIZE;
3295 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
3296 vm_info->resident_size_peak *= PAGE_SIZE;
3297
3298 #define _VM_INFO(_name) \
3299 vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
3300
3301 _VM_INFO(device);
3302 _VM_INFO(device_peak);
3303 _VM_INFO(external);
3304 _VM_INFO(external_peak);
3305 _VM_INFO(internal);
3306 _VM_INFO(internal_peak);
3307 _VM_INFO(reusable);
3308 _VM_INFO(reusable_peak);
3309 _VM_INFO(compressed);
3310 _VM_INFO(compressed_peak);
3311 _VM_INFO(compressed_lifetime);
3312
3313 vm_info->purgeable_volatile_pmap = 0;
3314 vm_info->purgeable_volatile_resident = 0;
3315 vm_info->purgeable_volatile_virtual = 0;
3316 if (task == kernel_task) {
3317 /*
3318 * We do not maintain the detailed stats for the
3319 * kernel_pmap, so just count everything as
3320 * "internal"...
3321 */
3322 vm_info->internal = vm_info->resident_size;
3323 /*
3324 * ... but since the memory held by the VM compressor
3325 * in the kernel address space ought to be attributed
3326 * to user-space tasks, we subtract it from "internal"
3327 * to give memory reporting tools a more accurate idea
3328 * of what the kernel itself is actually using, instead
3329 * of making it look like the kernel is leaking memory
3330 * when the system is under memory pressure.
3331 */
3332 vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
3333 PAGE_SIZE);
3334 } else {
3335 mach_vm_size_t volatile_virtual_size;
3336 mach_vm_size_t volatile_resident_size;
3337 mach_vm_size_t volatile_compressed_size;
3338 mach_vm_size_t volatile_pmap_size;
3339 mach_vm_size_t volatile_compressed_pmap_size;
3340 kern_return_t kr;
3341
3342 if (flavor == TASK_VM_INFO_PURGEABLE) {
3343 kr = vm_map_query_volatile(
3344 map,
3345 &volatile_virtual_size,
3346 &volatile_resident_size,
3347 &volatile_compressed_size,
3348 &volatile_pmap_size,
3349 &volatile_compressed_pmap_size);
3350 if (kr == KERN_SUCCESS) {
3351 vm_info->purgeable_volatile_pmap =
3352 volatile_pmap_size;
3353 if (radar_20146450) {
3354 vm_info->compressed -=
3355 volatile_compressed_pmap_size;
3356 }
3357 vm_info->purgeable_volatile_resident =
3358 volatile_resident_size;
3359 vm_info->purgeable_volatile_virtual =
3360 volatile_virtual_size;
3361 }
3362 }
3363 vm_map_unlock_read(map);
3364 }
3365
3366 if (*task_info_count >= TASK_VM_INFO_COUNT) {
3367 vm_info->phys_footprint = 0;
3368 *task_info_count = TASK_VM_INFO_COUNT;
3369 } else {
3370 *task_info_count = TASK_VM_INFO_REV0_COUNT;
3371 }
3372
3373 break;
3374 }
3375
3376 case TASK_WAIT_STATE_INFO:
3377 {
3378 /*
3379 * Deprecated flavor. Currently allowing some results until all users
3380 * stop calling it. The results may not be accurate.
3381 */
3382 task_wait_state_info_t wait_state_info;
3383 uint64_t total_sfi_ledger_val = 0;
3384
3385 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
3386 error = KERN_INVALID_ARGUMENT;
3387 break;
3388 }
3389
3390 wait_state_info = (task_wait_state_info_t) task_info_out;
3391
3392 wait_state_info->total_wait_state_time = 0;
3393 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
3394
3395 #if CONFIG_SCHED_SFI
3396 int i, prev_lentry = -1;
3397 int64_t val_credit, val_debit;
3398
3399 for (i = 0; i < MAX_SFI_CLASS_ID; i++){
3400 val_credit =0;
3401 /*
3402 * checking with prev_lentry != entry ensures adjacent classes
3403 * which share the same ledger do not add wait times twice.
3404 * Note: Use ledger() call to get data for each individual sfi class.
3405 */
3406 if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
3407 KERN_SUCCESS == ledger_get_entries(task->ledger,
3408 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
3409 total_sfi_ledger_val += val_credit;
3410 }
3411 prev_lentry = task_ledgers.sfi_wait_times[i];
3412 }
3413
3414 #endif /* CONFIG_SCHED_SFI */
3415 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
3416 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
3417
3418 break;
3419 }
3420 case TASK_VM_INFO_PURGEABLE_ACCOUNT:
3421 {
3422 #if DEVELOPMENT || DEBUG
3423 pvm_account_info_t acnt_info;
3424
3425 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
3426 error = KERN_INVALID_ARGUMENT;
3427 break;
3428 }
3429
3430 if (task_info_out == NULL) {
3431 error = KERN_INVALID_ARGUMENT;
3432 break;
3433 }
3434
3435 acnt_info = (pvm_account_info_t) task_info_out;
3436
3437 error = vm_purgeable_account(task, acnt_info);
3438
3439 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
3440
3441 break;
3442 #else /* DEVELOPMENT || DEBUG */
3443 error = KERN_NOT_SUPPORTED;
3444 break;
3445 #endif /* DEVELOPMENT || DEBUG */
3446 }
3447 case TASK_FLAGS_INFO:
3448 {
3449 task_flags_info_t flags_info;
3450
3451 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
3452 error = KERN_INVALID_ARGUMENT;
3453 break;
3454 }
3455
3456 flags_info = (task_flags_info_t)task_info_out;
3457
3458 /* only publish the 64-bit flag of the task */
3459 flags_info->flags = task->t_flags & TF_64B_ADDR;
3460
3461 *task_info_count = TASK_FLAGS_INFO_COUNT;
3462 break;
3463 }
3464
3465 case TASK_DEBUG_INFO_INTERNAL:
3466 {
3467 #if DEVELOPMENT || DEBUG
3468 task_debug_info_internal_t dbg_info;
3469 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
3470 error = KERN_NOT_SUPPORTED;
3471 break;
3472 }
3473
3474 if (task_info_out == NULL) {
3475 error = KERN_INVALID_ARGUMENT;
3476 break;
3477 }
3478 dbg_info = (task_debug_info_internal_t) task_info_out;
3479 dbg_info->ipc_space_size = 0;
3480 if (task->itk_space){
3481 dbg_info->ipc_space_size = task->itk_space->is_table_size;
3482 }
3483
3484 error = KERN_SUCCESS;
3485 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
3486 break;
3487 #else /* DEVELOPMENT || DEBUG */
3488 error = KERN_NOT_SUPPORTED;
3489 break;
3490 #endif /* DEVELOPMENT || DEBUG */
3491 }
3492 default:
3493 error = KERN_INVALID_ARGUMENT;
3494 }
3495
3496 task_unlock(task);
3497 return (error);
3498 }
3499
3500 /*
3501 * task_power_info
3502 *
3503 * Returns power stats for the task.
3504 * Note: Called with task locked.
3505 */
3506 void
3507 task_power_info_locked(
3508 task_t task,
3509 task_power_info_t info,
3510 gpu_energy_data_t ginfo)
3511 {
3512 thread_t thread;
3513 ledger_amount_t tmp;
3514
3515 task_lock_assert_owned(task);
3516
3517 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
3518 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
3519 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
3520 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
3521
3522 info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
3523 info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
3524
3525 info->total_user = task->total_user_time;
3526 info->total_system = task->total_system_time;
3527
3528 if (ginfo) {
3529 ginfo->task_gpu_utilisation = task->task_gpu_ns;
3530 }
3531
3532 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3533 uint64_t tval;
3534 spl_t x;
3535
3536 if (thread->options & TH_OPT_IDLE_THREAD)
3537 continue;
3538
3539 x = splsched();
3540 thread_lock(thread);
3541
3542 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
3543 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
3544
3545 tval = timer_grab(&thread->user_timer);
3546 info->total_user += tval;
3547
3548 tval = timer_grab(&thread->system_timer);
3549 if (thread->precise_user_kernel_time) {
3550 info->total_system += tval;
3551 } else {
3552 /* system_timer may represent either sys or user */
3553 info->total_user += tval;
3554 }
3555
3556 if (ginfo) {
3557 ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
3558 }
3559 thread_unlock(thread);
3560 splx(x);
3561 }
3562 }
3563
3564 /*
3565 * task_gpu_utilisation
3566 *
3567 * Returns the total gpu time used by the all the threads of the task
3568 * (both dead and alive)
3569 */
3570 uint64_t
3571 task_gpu_utilisation(
3572 task_t task)
3573 {
3574 uint64_t gpu_time = 0;
3575 thread_t thread;
3576
3577 task_lock(task);
3578 gpu_time += task->task_gpu_ns;
3579
3580 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3581 spl_t x;
3582 x = splsched();
3583 thread_lock(thread);
3584 gpu_time += ml_gpu_stat(thread);
3585 thread_unlock(thread);
3586 splx(x);
3587 }
3588
3589 task_unlock(task);
3590 return gpu_time;
3591 }
3592
3593 kern_return_t
3594 task_purgable_info(
3595 task_t task,
3596 task_purgable_info_t *stats)
3597 {
3598 if (task == TASK_NULL || stats == NULL)
3599 return KERN_INVALID_ARGUMENT;
3600 /* Take task reference */
3601 task_reference(task);
3602 vm_purgeable_stats((vm_purgeable_info_t)stats, task);
3603 /* Drop task reference */
3604 task_deallocate(task);
3605 return KERN_SUCCESS;
3606 }
3607
3608 void
3609 task_vtimer_set(
3610 task_t task,
3611 integer_t which)
3612 {
3613 thread_t thread;
3614 spl_t x;
3615
3616 /* assert(task == current_task()); */ /* bogus assert 4803227 4807483 */
3617
3618 task_lock(task);
3619
3620 task->vtimers |= which;
3621
3622 switch (which) {
3623
3624 case TASK_VTIMER_USER:
3625 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3626 x = splsched();
3627 thread_lock(thread);
3628 if (thread->precise_user_kernel_time)
3629 thread->vtimer_user_save = timer_grab(&thread->user_timer);
3630 else
3631 thread->vtimer_user_save = timer_grab(&thread->system_timer);
3632 thread_unlock(thread);
3633 splx(x);
3634 }
3635 break;
3636
3637 case TASK_VTIMER_PROF:
3638 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3639 x = splsched();
3640 thread_lock(thread);
3641 thread->vtimer_prof_save = timer_grab(&thread->user_timer);
3642 thread->vtimer_prof_save += timer_grab(&thread->system_timer);
3643 thread_unlock(thread);
3644 splx(x);
3645 }
3646 break;
3647
3648 case TASK_VTIMER_RLIM:
3649 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3650 x = splsched();
3651 thread_lock(thread);
3652 thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
3653 thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
3654 thread_unlock(thread);
3655 splx(x);
3656 }
3657 break;
3658 }
3659
3660 task_unlock(task);
3661 }
3662
3663 void
3664 task_vtimer_clear(
3665 task_t task,
3666 integer_t which)
3667 {
3668 assert(task == current_task());
3669
3670 task_lock(task);
3671
3672 task->vtimers &= ~which;
3673
3674 task_unlock(task);
3675 }
3676
3677 void
3678 task_vtimer_update(
3679 __unused
3680 task_t task,
3681 integer_t which,
3682 uint32_t *microsecs)
3683 {
3684 thread_t thread = current_thread();
3685 uint32_t tdelt;
3686 clock_sec_t secs;
3687 uint64_t tsum;
3688
3689 assert(task == current_task());
3690
3691 assert(task->vtimers & which);
3692
3693 secs = tdelt = 0;
3694
3695 switch (which) {
3696
3697 case TASK_VTIMER_USER:
3698 if (thread->precise_user_kernel_time) {
3699 tdelt = (uint32_t)timer_delta(&thread->user_timer,
3700 &thread->vtimer_user_save);
3701 } else {
3702 tdelt = (uint32_t)timer_delta(&thread->system_timer,
3703 &thread->vtimer_user_save);
3704 }
3705 absolutetime_to_microtime(tdelt, &secs, microsecs);
3706 break;
3707
3708 case TASK_VTIMER_PROF:
3709 tsum = timer_grab(&thread->user_timer);
3710 tsum += timer_grab(&thread->system_timer);
3711 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
3712 absolutetime_to_microtime(tdelt, &secs, microsecs);
3713 /* if the time delta is smaller than a usec, ignore */
3714 if (*microsecs != 0)
3715 thread->vtimer_prof_save = tsum;
3716 break;
3717
3718 case TASK_VTIMER_RLIM:
3719 tsum = timer_grab(&thread->user_timer);
3720 tsum += timer_grab(&thread->system_timer);
3721 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
3722 thread->vtimer_rlim_save = tsum;
3723 absolutetime_to_microtime(tdelt, &secs, microsecs);
3724 break;
3725 }
3726
3727 }
3728
3729 /*
3730 * task_assign:
3731 *
3732 * Change the assigned processor set for the task
3733 */
3734 kern_return_t
3735 task_assign(
3736 __unused task_t task,
3737 __unused processor_set_t new_pset,
3738 __unused boolean_t assign_threads)
3739 {
3740 return(KERN_FAILURE);
3741 }
3742
3743 /*
3744 * task_assign_default:
3745 *
3746 * Version of task_assign to assign to default processor set.
3747 */
3748 kern_return_t
3749 task_assign_default(
3750 task_t task,
3751 boolean_t assign_threads)
3752 {
3753 return (task_assign(task, &pset0, assign_threads));
3754 }
3755
3756 /*
3757 * task_get_assignment
3758 *
3759 * Return name of processor set that task is assigned to.
3760 */
3761 kern_return_t
3762 task_get_assignment(
3763 task_t task,
3764 processor_set_t *pset)
3765 {
3766 if (!task->active)
3767 return(KERN_FAILURE);
3768
3769 *pset = &pset0;
3770
3771 return (KERN_SUCCESS);
3772 }
3773
3774 uint64_t
3775 get_task_dispatchqueue_offset(
3776 task_t task)
3777 {
3778 return task->dispatchqueue_offset;
3779 }
3780
3781 /*
3782 * task_policy
3783 *
3784 * Set scheduling policy and parameters, both base and limit, for
3785 * the given task. Policy must be a policy which is enabled for the
3786 * processor set. Change contained threads if requested.
3787 */
3788 kern_return_t
3789 task_policy(
3790 __unused task_t task,
3791 __unused policy_t policy_id,
3792 __unused policy_base_t base,
3793 __unused mach_msg_type_number_t count,
3794 __unused boolean_t set_limit,
3795 __unused boolean_t change)
3796 {
3797 return(KERN_FAILURE);
3798 }
3799
3800 /*
3801 * task_set_policy
3802 *
3803 * Set scheduling policy and parameters, both base and limit, for
3804 * the given task. Policy can be any policy implemented by the
3805 * processor set, whether enabled or not. Change contained threads
3806 * if requested.
3807 */
3808 kern_return_t
3809 task_set_policy(
3810 __unused task_t task,
3811 __unused processor_set_t pset,
3812 __unused policy_t policy_id,
3813 __unused policy_base_t base,
3814 __unused mach_msg_type_number_t base_count,
3815 __unused policy_limit_t limit,
3816 __unused mach_msg_type_number_t limit_count,
3817 __unused boolean_t change)
3818 {
3819 return(KERN_FAILURE);
3820 }
3821
3822 kern_return_t
3823 task_set_ras_pc(
3824 __unused task_t task,
3825 __unused vm_offset_t pc,
3826 __unused vm_offset_t endpc)
3827 {
3828 return KERN_FAILURE;
3829 }
3830
3831 void
3832 task_synchronizer_destroy_all(task_t task)
3833 {
3834 semaphore_t semaphore;
3835
3836 /*
3837 * Destroy owned semaphores
3838 */
3839
3840 while (!queue_empty(&task->semaphore_list)) {
3841 semaphore = (semaphore_t) queue_first(&task->semaphore_list);
3842 (void) semaphore_destroy_internal(task, semaphore);
3843 }
3844 }
3845
3846 /*
3847 * Install default (machine-dependent) initial thread state
3848 * on the task. Subsequent thread creation will have this initial
3849 * state set on the thread by machine_thread_inherit_taskwide().
3850 * Flavors and structures are exactly the same as those to thread_set_state()
3851 */
3852 kern_return_t
3853 task_set_state(
3854 task_t task,
3855 int flavor,
3856 thread_state_t state,
3857 mach_msg_type_number_t state_count)
3858 {
3859 kern_return_t ret;
3860
3861 if (task == TASK_NULL) {
3862 return (KERN_INVALID_ARGUMENT);
3863 }
3864
3865 task_lock(task);
3866
3867 if (!task->active) {
3868 task_unlock(task);
3869 return (KERN_FAILURE);
3870 }
3871
3872 ret = machine_task_set_state(task, flavor, state, state_count);
3873
3874 task_unlock(task);
3875 return ret;
3876 }
3877
3878 /*
3879 * Examine the default (machine-dependent) initial thread state
3880 * on the task, as set by task_set_state(). Flavors and structures
3881 * are exactly the same as those passed to thread_get_state().
3882 */
3883 kern_return_t
3884 task_get_state(
3885 task_t task,
3886 int flavor,
3887 thread_state_t state,
3888 mach_msg_type_number_t *state_count)
3889 {
3890 kern_return_t ret;
3891
3892 if (task == TASK_NULL) {
3893 return (KERN_INVALID_ARGUMENT);
3894 }
3895
3896 task_lock(task);
3897
3898 if (!task->active) {
3899 task_unlock(task);
3900 return (KERN_FAILURE);
3901 }
3902
3903 ret = machine_task_get_state(task, flavor, state, state_count);
3904
3905 task_unlock(task);
3906 return ret;
3907 }
3908
3909 #if CONFIG_JETSAM
3910 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
3911
3912 void __attribute__((noinline))
3913 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb)
3914 {
3915 task_t task = current_task();
3916 int pid = 0;
3917 const char *procname = "unknown";
3918 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
3919
3920 #ifdef MACH_BSD
3921 pid = proc_selfpid();
3922
3923 if (pid == 1) {
3924 /*
3925 * Cannot have ReportCrash analyzing
3926 * a suspended initproc.
3927 */
3928 return;
3929 }
3930
3931 if (task->bsd_info != NULL)
3932 procname = proc_name_address(current_task()->bsd_info);
3933 #endif
3934
3935 if (hwm_user_cores) {
3936 int error;
3937 uint64_t starttime, end;
3938 clock_sec_t secs = 0;
3939 uint32_t microsecs = 0;
3940
3941 starttime = mach_absolute_time();
3942 /*
3943 * Trigger a coredump of this process. Don't proceed unless we know we won't
3944 * be filling up the disk; and ignore the core size resource limit for this
3945 * core file.
3946 */
3947 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
3948 printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
3949 }
3950 /*
3951 * coredump() leaves the task suspended.
3952 */
3953 task_resume_internal(current_task());
3954
3955 end = mach_absolute_time();
3956 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
3957 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
3958 proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
3959 }
3960
3961 if (disable_exc_resource) {
3962 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
3963 "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
3964 return;
3965 }
3966
3967 /*
3968 * A task that has triggered an EXC_RESOURCE, should not be
3969 * jetsammed when the device is under memory pressure. Here
3970 * we set the P_MEMSTAT_TERMINATED flag so that the process
3971 * will be skipped if the memorystatus_thread wakes up.
3972 */
3973 proc_memstat_terminated(current_task()->bsd_info, TRUE);
3974
3975 printf("process %s[%d] crossed memory high watermark (%d MB); sending "
3976 "EXC_RESOURCE.\n", procname, pid, max_footprint_mb);
3977
3978 code[0] = code[1] = 0;
3979 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
3980 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
3981 EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
3982
3983 /*
3984 * Use the _internal_ variant so that no user-space
3985 * process can resume our task from under us.
3986 */
3987 task_suspend_internal(task);
3988 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
3989 task_resume_internal(task);
3990
3991 /*
3992 * After the EXC_RESOURCE has been handled, we must clear the
3993 * P_MEMSTAT_TERMINATED flag so that the process can again be
3994 * considered for jetsam if the memorystatus_thread wakes up.
3995 */
3996 proc_memstat_terminated(current_task()->bsd_info, FALSE); /* clear the flag */
3997 }
3998
3999 /*
4000 * Callback invoked when a task exceeds its physical footprint limit.
4001 */
4002 void
4003 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4004 {
4005 ledger_amount_t max_footprint, max_footprint_mb;
4006 ledger_amount_t footprint_after_purge;
4007 task_t task;
4008
4009 if (warning == LEDGER_WARNING_DIPPED_BELOW) {
4010 /*
4011 * Task memory limits only provide a warning on the way up.
4012 */
4013 return;
4014 }
4015
4016 task = current_task();
4017
4018 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
4019 max_footprint_mb = max_footprint >> 20;
4020
4021 /*
4022 * Try and purge all "volatile" memory in that task first.
4023 */
4024 (void) task_purge_volatile_memory(task);
4025 /* are we still over the limit ? */
4026 ledger_get_balance(task->ledger,
4027 task_ledgers.phys_footprint,
4028 &footprint_after_purge);
4029 if ((!warning &&
4030 footprint_after_purge <= max_footprint) ||
4031 (warning &&
4032 footprint_after_purge <= ((max_footprint *
4033 PHYS_FOOTPRINT_WARNING_LEVEL) / 100))) {
4034 /* all better now */
4035 ledger_reset_callback_state(task->ledger,
4036 task_ledgers.phys_footprint);
4037 return;
4038 }
4039 /* still over the limit after purging... */
4040
4041 /*
4042 * If this an actual violation (not a warning),
4043 * generate a non-fatal high watermark EXC_RESOURCE.
4044 */
4045 if ((warning == 0) && (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) {
4046 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb);
4047 }
4048
4049 memorystatus_on_ledger_footprint_exceeded((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE,
4050 (int)max_footprint_mb);
4051 }
4052
4053 extern int proc_check_footprint_priv(void);
4054
4055 kern_return_t
4056 task_set_phys_footprint_limit(
4057 task_t task,
4058 int new_limit_mb,
4059 int *old_limit_mb)
4060 {
4061 kern_return_t error;
4062
4063 if ((error = proc_check_footprint_priv())) {
4064 return (KERN_NO_ACCESS);
4065 }
4066
4067 return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, FALSE);
4068 }
4069
4070 kern_return_t
4071 task_convert_phys_footprint_limit(
4072 int limit_mb,
4073 int *converted_limit_mb)
4074 {
4075 if (limit_mb == -1) {
4076 /*
4077 * No limit
4078 */
4079 if (max_task_footprint != 0) {
4080 *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024); /* bytes to MB */
4081 } else {
4082 *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
4083 }
4084 } else {
4085 /* nothing to convert */
4086 *converted_limit_mb = limit_mb;
4087 }
4088 return (KERN_SUCCESS);
4089 }
4090
4091
4092 kern_return_t
4093 task_set_phys_footprint_limit_internal(
4094 task_t task,
4095 int new_limit_mb,
4096 int *old_limit_mb,
4097 boolean_t trigger_exception)
4098 {
4099 ledger_amount_t old;
4100
4101 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
4102
4103 if (old_limit_mb) {
4104 /*
4105 * Check that limit >> 20 will not give an "unexpected" 32-bit
4106 * result. There are, however, implicit assumptions that -1 mb limit
4107 * equates to LEDGER_LIMIT_INFINITY.
4108 */
4109 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
4110 *old_limit_mb = (int)(old >> 20);
4111 }
4112
4113 if (new_limit_mb == -1) {
4114 /*
4115 * Caller wishes to remove the limit.
4116 */
4117 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4118 max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
4119 max_task_footprint ? PHYS_FOOTPRINT_WARNING_LEVEL : 0);
4120 return (KERN_SUCCESS);
4121 }
4122
4123 #ifdef CONFIG_NOMONITORS
4124 return (KERN_SUCCESS);
4125 #endif /* CONFIG_NOMONITORS */
4126
4127 task_lock(task);
4128
4129 if (trigger_exception) {
4130 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4131 } else {
4132 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
4133 }
4134
4135 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
4136 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
4137
4138 if (task == current_task()) {
4139 ledger_check_new_balance(task->ledger, task_ledgers.phys_footprint);
4140 }
4141
4142 task_unlock(task);
4143
4144 return (KERN_SUCCESS);
4145 }
4146
4147 kern_return_t
4148 task_get_phys_footprint_limit(
4149 task_t task,
4150 int *limit_mb)
4151 {
4152 ledger_amount_t limit;
4153
4154 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
4155 /*
4156 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
4157 * result. There are, however, implicit assumptions that -1 mb limit
4158 * equates to LEDGER_LIMIT_INFINITY.
4159 */
4160 assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
4161 *limit_mb = (int)(limit >> 20);
4162
4163 return (KERN_SUCCESS);
4164 }
4165 #else /* CONFIG_JETSAM */
4166 kern_return_t
4167 task_set_phys_footprint_limit(
4168 __unused task_t task,
4169 __unused int new_limit_mb,
4170 __unused int *old_limit_mb)
4171 {
4172 return (KERN_FAILURE);
4173 }
4174
4175 kern_return_t
4176 task_get_phys_footprint_limit(
4177 __unused task_t task,
4178 __unused int *limit_mb)
4179 {
4180 return (KERN_FAILURE);
4181 }
4182 #endif /* CONFIG_JETSAM */
4183
4184 /*
4185 * We need to export some functions to other components that
4186 * are currently implemented in macros within the osfmk
4187 * component. Just export them as functions of the same name.
4188 */
4189 boolean_t is_kerneltask(task_t t)
4190 {
4191 if (t == kernel_task)
4192 return (TRUE);
4193
4194 return (FALSE);
4195 }
4196
4197 int
4198 check_for_tasksuspend(task_t task)
4199 {
4200
4201 if (task == TASK_NULL)
4202 return (0);
4203
4204 return (task->suspend_count > 0);
4205 }
4206
4207 #undef current_task
4208 task_t current_task(void);
4209 task_t current_task(void)
4210 {
4211 return (current_task_fast());
4212 }
4213
4214 #undef task_reference
4215 void task_reference(task_t task);
4216 void
4217 task_reference(
4218 task_t task)
4219 {
4220 if (task != TASK_NULL)
4221 task_reference_internal(task);
4222 }
4223
4224 /* defined in bsd/kern/kern_prot.c */
4225 extern int get_audit_token_pid(audit_token_t *audit_token);
4226
4227 int task_pid(task_t task)
4228 {
4229 if (task)
4230 return get_audit_token_pid(&task->audit_token);
4231 return -1;
4232 }
4233
4234
4235 /*
4236 * This routine is called always with task lock held.
4237 * And it returns a thread handle without reference as the caller
4238 * operates on it under the task lock held.
4239 */
4240 thread_t
4241 task_findtid(task_t task, uint64_t tid)
4242 {
4243 thread_t thread= THREAD_NULL;
4244
4245 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4246 if (thread->thread_id == tid)
4247 return(thread);
4248 }
4249 return(THREAD_NULL);
4250 }
4251
4252 /*
4253 * Control the CPU usage monitor for a task.
4254 */
4255 kern_return_t
4256 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
4257 {
4258 int error = KERN_SUCCESS;
4259
4260 if (*flags & CPUMON_MAKE_FATAL) {
4261 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
4262 } else {
4263 error = KERN_INVALID_ARGUMENT;
4264 }
4265
4266 return error;
4267 }
4268
4269 /*
4270 * Control the wakeups monitor for a task.
4271 */
4272 kern_return_t
4273 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
4274 {
4275 ledger_t ledger = task->ledger;
4276
4277 task_lock(task);
4278 if (*flags & WAKEMON_GET_PARAMS) {
4279 ledger_amount_t limit;
4280 uint64_t period;
4281
4282 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
4283 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
4284
4285 if (limit != LEDGER_LIMIT_INFINITY) {
4286 /*
4287 * An active limit means the wakeups monitor is enabled.
4288 */
4289 *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
4290 *flags = WAKEMON_ENABLE;
4291 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
4292 *flags |= WAKEMON_MAKE_FATAL;
4293 }
4294 } else {
4295 *flags = WAKEMON_DISABLE;
4296 *rate_hz = -1;
4297 }
4298
4299 /*
4300 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
4301 */
4302 task_unlock(task);
4303 return KERN_SUCCESS;
4304 }
4305
4306 if (*flags & WAKEMON_ENABLE) {
4307 if (*flags & WAKEMON_SET_DEFAULTS) {
4308 *rate_hz = task_wakeups_monitor_rate;
4309 }
4310
4311 #ifndef CONFIG_NOMONITORS
4312 if (*flags & WAKEMON_MAKE_FATAL) {
4313 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
4314 }
4315 #endif /* CONFIG_NOMONITORS */
4316
4317 if (*rate_hz < 0) {
4318 task_unlock(task);
4319 return KERN_INVALID_ARGUMENT;
4320 }
4321
4322 #ifndef CONFIG_NOMONITORS
4323 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
4324 task_wakeups_monitor_ustackshots_trigger_pct);
4325 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
4326 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
4327 #endif /* CONFIG_NOMONITORS */
4328 } else if (*flags & WAKEMON_DISABLE) {
4329 /*
4330 * Caller wishes to disable wakeups monitor on the task.
4331 *
4332 * Disable telemetry if it was triggered by the wakeups monitor, and
4333 * remove the limit & callback on the wakeups ledger entry.
4334 */
4335 #if CONFIG_TELEMETRY
4336 telemetry_task_ctl_locked(current_task(), TF_WAKEMON_WARNING, 0);
4337 #endif
4338 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
4339 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
4340 }
4341
4342 task_unlock(task);
4343 return KERN_SUCCESS;
4344 }
4345
4346 void
4347 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
4348 {
4349 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
4350 #if CONFIG_TELEMETRY
4351 /*
4352 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
4353 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
4354 */
4355 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
4356 #endif
4357 return;
4358 }
4359
4360 #if CONFIG_TELEMETRY
4361 /*
4362 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
4363 * exceeded the limit, turn telemetry off for the task.
4364 */
4365 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
4366 #endif
4367
4368 if (warning == 0) {
4369 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE();
4370 }
4371 }
4372
4373 void __attribute__((noinline))
4374 THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void)
4375 {
4376 task_t task = current_task();
4377 int pid = 0;
4378 const char *procname = "unknown";
4379 uint64_t observed_wakeups_rate;
4380 uint64_t permitted_wakeups_rate;
4381 uint64_t observation_interval;
4382 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
4383 struct ledger_entry_info lei;
4384
4385 #ifdef MACH_BSD
4386 pid = proc_selfpid();
4387 if (task->bsd_info != NULL)
4388 procname = proc_name_address(current_task()->bsd_info);
4389 #endif
4390
4391 ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
4392
4393 /*
4394 * Disable the exception notification so we don't overwhelm
4395 * the listener with an endless stream of redundant exceptions.
4396 */
4397 uint32_t flags = WAKEMON_DISABLE;
4398 task_wakeups_monitor_ctl(task, &flags, NULL);
4399
4400 observed_wakeups_rate = (lei.lei_balance * (int64_t)NSEC_PER_SEC) / lei.lei_last_refill;
4401 permitted_wakeups_rate = lei.lei_limit / task_wakeups_monitor_interval;
4402 observation_interval = lei.lei_refill_period / NSEC_PER_SEC;
4403
4404 if (disable_exc_resource) {
4405 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
4406 "supressed by a boot-arg\n", procname, pid);
4407 return;
4408 }
4409 if (audio_active) {
4410 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
4411 "supressed due to audio playback\n", procname, pid);
4412 return;
4413 }
4414 printf("process %s[%d] caught causing excessive wakeups. Observed wakeups rate "
4415 "(per sec): %lld; Maximum permitted wakeups rate (per sec): %lld; Observation "
4416 "period: %lld seconds; Task lifetime number of wakeups: %lld\n",
4417 procname, pid, observed_wakeups_rate, permitted_wakeups_rate,
4418 observation_interval, lei.lei_credit);
4419
4420 code[0] = code[1] = 0;
4421 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
4422 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
4423 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0], task_wakeups_monitor_rate);
4424 EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0], observation_interval);
4425 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1], lei.lei_balance * (int64_t)NSEC_PER_SEC / lei.lei_last_refill);
4426 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
4427
4428 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
4429 task_terminate_internal(task);
4430 }
4431 }
4432
4433 kern_return_t
4434 task_purge_volatile_memory(
4435 task_t task)
4436 {
4437 vm_map_t map;
4438 int num_object_purged;
4439
4440 if (task == TASK_NULL)
4441 return KERN_INVALID_TASK;
4442
4443 task_lock(task);
4444
4445 if (!task->active) {
4446 task_unlock(task);
4447 return KERN_INVALID_TASK;
4448 }
4449 map = task->map;
4450 if (map == VM_MAP_NULL) {
4451 task_unlock(task);
4452 return KERN_INVALID_TASK;
4453 }
4454 vm_map_reference(task->map);
4455
4456 task_unlock(task);
4457
4458 num_object_purged = vm_map_purge(map);
4459 vm_map_deallocate(map);
4460
4461 return KERN_SUCCESS;
4462 }
4463
4464 /* Placeholders for the task set/get voucher interfaces */
4465 kern_return_t
4466 task_get_mach_voucher(
4467 task_t task,
4468 mach_voucher_selector_t __unused which,
4469 ipc_voucher_t *voucher)
4470 {
4471 if (TASK_NULL == task)
4472 return KERN_INVALID_TASK;
4473
4474 *voucher = NULL;
4475 return KERN_SUCCESS;
4476 }
4477
4478 kern_return_t
4479 task_set_mach_voucher(
4480 task_t task,
4481 ipc_voucher_t __unused voucher)
4482 {
4483 if (TASK_NULL == task)
4484 return KERN_INVALID_TASK;
4485
4486 return KERN_SUCCESS;
4487 }
4488
4489 kern_return_t
4490 task_swap_mach_voucher(
4491 task_t task,
4492 ipc_voucher_t new_voucher,
4493 ipc_voucher_t *in_out_old_voucher)
4494 {
4495 if (TASK_NULL == task)
4496 return KERN_INVALID_TASK;
4497
4498 *in_out_old_voucher = new_voucher;
4499 return KERN_SUCCESS;
4500 }
4501
4502 void task_set_gpu_denied(task_t task, boolean_t denied)
4503 {
4504 task_lock(task);
4505
4506 if (denied) {
4507 task->t_flags |= TF_GPU_DENIED;
4508 } else {
4509 task->t_flags &= ~TF_GPU_DENIED;
4510 }
4511
4512 task_unlock(task);
4513 }
4514
4515 boolean_t task_is_gpu_denied(task_t task)
4516 {
4517 /* We don't need the lock to read this flag */
4518 return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
4519 }