]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/task.c
d58d29cdbc186852040e2a3fdc7f9973f67b9e10
[apple/xnu.git] / osfmk / kern / task.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/task.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59 * David Black
60 *
61 * Task management primitives implementation.
62 */
63 /*
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL). All rights reserved.
66 *
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
72 *
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76 *
77 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
78 * improvements that they make and grant CSL redistribution rights.
79 *
80 */
81 /*
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections. This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
85 * Version 2.0.
86 * Copyright (c) 2005 SPARTA, Inc.
87 */
88
89 #include <mach/mach_types.h>
90 #include <mach/boolean.h>
91 #include <mach/host_priv.h>
92 #include <mach/machine/vm_types.h>
93 #include <mach/vm_param.h>
94 #include <mach/mach_vm.h>
95 #include <mach/semaphore.h>
96 #include <mach/task_info.h>
97 #include <mach/task_inspect.h>
98 #include <mach/task_special_ports.h>
99 #include <mach/sdt.h>
100
101 #include <ipc/ipc_importance.h>
102 #include <ipc/ipc_types.h>
103 #include <ipc/ipc_space.h>
104 #include <ipc/ipc_entry.h>
105 #include <ipc/ipc_hash.h>
106
107 #include <kern/kern_types.h>
108 #include <kern/mach_param.h>
109 #include <kern/misc_protos.h>
110 #include <kern/task.h>
111 #include <kern/thread.h>
112 #include <kern/coalition.h>
113 #include <kern/zalloc.h>
114 #include <kern/kalloc.h>
115 #include <kern/kern_cdata.h>
116 #include <kern/processor.h>
117 #include <kern/sched_prim.h> /* for thread_wakeup */
118 #include <kern/ipc_tt.h>
119 #include <kern/host.h>
120 #include <kern/clock.h>
121 #include <kern/timer.h>
122 #include <kern/assert.h>
123 #include <kern/sync_lock.h>
124 #include <kern/affinity.h>
125 #include <kern/exc_resource.h>
126 #include <kern/machine.h>
127 #include <kern/policy_internal.h>
128
129 #include <corpses/task_corpse.h>
130 #if CONFIG_TELEMETRY
131 #include <kern/telemetry.h>
132 #endif
133
134 #if MONOTONIC
135 #include <kern/monotonic.h>
136 #include <machine/monotonic.h>
137 #endif /* MONOTONIC */
138
139 #include <os/log.h>
140
141 #include <vm/pmap.h>
142 #include <vm/vm_map.h>
143 #include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
144 #include <vm/vm_pageout.h>
145 #include <vm/vm_protos.h>
146 #include <vm/vm_purgeable_internal.h>
147
148 #include <sys/resource.h>
149 #include <sys/signalvar.h> /* for coredump */
150
151 /*
152 * Exported interfaces
153 */
154
155 #include <mach/task_server.h>
156 #include <mach/mach_host_server.h>
157 #include <mach/host_security_server.h>
158 #include <mach/mach_port_server.h>
159
160 #include <vm/vm_shared_region.h>
161
162 #include <libkern/OSDebug.h>
163 #include <libkern/OSAtomic.h>
164
165 #if CONFIG_ATM
166 #include <atm/atm_internal.h>
167 #endif
168
169 #include <kern/sfi.h> /* picks up ledger.h */
170
171 #if CONFIG_MACF
172 #include <security/mac_mach_internal.h>
173 #endif
174
175 #if KPERF
176 extern int kpc_force_all_ctrs(task_t, int);
177 #endif
178
179 task_t kernel_task;
180 zone_t task_zone;
181 lck_attr_t task_lck_attr;
182 lck_grp_t task_lck_grp;
183 lck_grp_attr_t task_lck_grp_attr;
184
185 extern int exc_via_corpse_forking;
186 extern int corpse_for_fatal_memkill;
187
188 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
189 int audio_active = 0;
190
191 zinfo_usage_store_t tasks_tkm_private;
192 zinfo_usage_store_t tasks_tkm_shared;
193
194 /* A container to accumulate statistics for expired tasks */
195 expired_task_statistics_t dead_task_statistics;
196 lck_spin_t dead_task_statistics_lock;
197
198 ledger_template_t task_ledger_template = NULL;
199
200 struct _task_ledger_indices task_ledgers __attribute__((used)) =
201 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
202 #if !CONFIG_EMBEDDED
203 { 0 /* initialized at runtime */},
204 #endif /* !CONFIG_EMBEDDED */
205 -1, -1,
206 -1, -1,
207 -1, -1,
208 };
209
210 /* System sleep state */
211 boolean_t tasks_suspend_state;
212
213
214 void init_task_ledgers(void);
215 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
216 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
217 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
218 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
219 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
220 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
221
222 kern_return_t task_suspend_internal(task_t);
223 kern_return_t task_resume_internal(task_t);
224 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
225
226 extern kern_return_t iokit_task_terminate(task_t task);
227
228 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
229 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
230 extern kern_return_t thread_resume(thread_t thread);
231
232 // Warn tasks when they hit 80% of their memory limit.
233 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
234
235 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
236 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
237
238 /*
239 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
240 *
241 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
242 * stacktraces, aka micro-stackshots)
243 */
244 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
245
246 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
247 int task_wakeups_monitor_rate; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
248
249 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
250
251 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
252
253 ledger_amount_t max_task_footprint = 0; /* Per-task limit on physical memory consumption in bytes */
254 int max_task_footprint_warning_level = 0; /* Per-task limit warning percentage */
255 int max_task_footprint_mb = 0; /* Per-task limit on physical memory consumption in megabytes */
256
257 /* I/O Monitor Limits */
258 #define IOMON_DEFAULT_LIMIT (20480ull) /* MB of logical/physical I/O */
259 #define IOMON_DEFAULT_INTERVAL (86400ull) /* in seconds */
260
261 uint64_t task_iomon_limit_mb; /* Per-task I/O monitor limit in MBs */
262 uint64_t task_iomon_interval_secs; /* Per-task I/O monitor interval in secs */
263
264 #define IO_TELEMETRY_DEFAULT_LIMIT (10ll * 1024ll * 1024ll)
265 int64_t io_telemetry_limit; /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
266 int64_t global_logical_writes_count = 0; /* Global count for logical writes */
267 static boolean_t global_update_logical_writes(int64_t);
268
269 #if MACH_ASSERT
270 int pmap_ledgers_panic = 1;
271 #endif /* MACH_ASSERT */
272
273 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
274
275 #if CONFIG_COREDUMP
276 int hwm_user_cores = 0; /* high watermark violations generate user core files */
277 #endif
278
279 #ifdef MACH_BSD
280 extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
281 extern int proc_pid(struct proc *p);
282 extern int proc_selfpid(void);
283 extern char *proc_name_address(struct proc *p);
284 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
285 extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, int bufsize);
286
287 #if CONFIG_MEMORYSTATUS
288 extern void proc_memstat_terminated(struct proc* p, boolean_t set);
289 extern void memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
290 extern void memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
291 extern boolean_t memorystatus_allowed_vm_map_fork(__unused task_t task);
292 #endif /* CONFIG_MEMORYSTATUS */
293
294 #endif /* MACH_BSD */
295
296 /* Forwards */
297
298 static void task_hold_locked(task_t task);
299 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
300 static void task_release_locked(task_t task);
301
302 static void task_synchronizer_destroy_all(task_t task);
303
304 void
305 task_backing_store_privileged(
306 task_t task)
307 {
308 task_lock(task);
309 task->priv_flags |= VM_BACKING_STORE_PRIV;
310 task_unlock(task);
311 return;
312 }
313
314
315 void
316 task_set_64bit(
317 task_t task,
318 boolean_t is64bit)
319 {
320 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
321 thread_t thread;
322 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
323
324 task_lock(task);
325
326 if (is64bit) {
327 if (task_has_64BitAddr(task))
328 goto out;
329 task_set_64BitAddr(task);
330 } else {
331 if ( !task_has_64BitAddr(task))
332 goto out;
333 task_clear_64BitAddr(task);
334 }
335 /* FIXME: On x86, the thread save state flavor can diverge from the
336 * task's 64-bit feature flag due to the 32-bit/64-bit register save
337 * state dichotomy. Since we can be pre-empted in this interval,
338 * certain routines may observe the thread as being in an inconsistent
339 * state with respect to its task's 64-bitness.
340 */
341
342 #if defined(__x86_64__) || defined(__arm64__)
343 queue_iterate(&task->threads, thread, thread_t, task_threads) {
344 thread_mtx_lock(thread);
345 machine_thread_switch_addrmode(thread);
346 thread_mtx_unlock(thread);
347
348 #if defined(__arm64__)
349 /* specifically, if running on H9 */
350 if (thread == current_thread()) {
351 uint64_t arg1, arg2;
352 int urgency;
353 spl_t spl = splsched();
354 /*
355 * This call tell that the current thread changed it's 32bitness.
356 * Other thread were no more on core when 32bitness was changed,
357 * but current_thread() is on core and the previous call to
358 * machine_thread_going_on_core() gave 32bitness which is now wrong.
359 *
360 * This is needed for bring-up, a different callback should be used
361 * in the future.
362 *
363 * TODO: Remove this callout when we no longer support 32-bit code on H9
364 */
365 thread_lock(thread);
366 urgency = thread_get_urgency(thread, &arg1, &arg2);
367 machine_thread_going_on_core(thread, urgency, 0, 0, mach_approximate_time());
368 thread_unlock(thread);
369 splx(spl);
370 }
371 #endif /* defined(__arm64__) */
372 }
373 #endif /* defined(__x86_64__) || defined(__arm64__) */
374
375 out:
376 task_unlock(task);
377 }
378
379 void
380 task_set_platform_binary(
381 task_t task,
382 boolean_t is_platform)
383 {
384 task_lock(task);
385 if (is_platform) {
386 task->t_flags |= TF_PLATFORM;
387 } else {
388 task->t_flags &= ~(TF_PLATFORM);
389 }
390 task_unlock(task);
391 }
392
393 void
394 task_set_dyld_info(
395 task_t task,
396 mach_vm_address_t addr,
397 mach_vm_size_t size)
398 {
399 task_lock(task);
400 task->all_image_info_addr = addr;
401 task->all_image_info_size = size;
402 task_unlock(task);
403 }
404
405 void
406 task_atm_reset(__unused task_t task) {
407
408 #if CONFIG_ATM
409 if (task->atm_context != NULL) {
410 atm_task_descriptor_destroy(task->atm_context);
411 task->atm_context = NULL;
412 }
413 #endif
414
415 }
416
417 void
418 task_bank_reset(__unused task_t task) {
419
420 if (task->bank_context != NULL) {
421 bank_task_destroy(task);
422 }
423 }
424
425 /*
426 * NOTE: This should only be called when the P_LINTRANSIT
427 * flag is set (the proc_trans lock is held) on the
428 * proc associated with the task.
429 */
430 void
431 task_bank_init(__unused task_t task) {
432
433 if (task->bank_context != NULL) {
434 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
435 }
436 bank_task_initialize(task);
437 }
438
439 void
440 task_set_did_exec_flag(task_t task)
441 {
442 task->t_procflags |= TPF_DID_EXEC;
443 }
444
445 void
446 task_clear_exec_copy_flag(task_t task)
447 {
448 task->t_procflags &= ~TPF_EXEC_COPY;
449 }
450
451 /*
452 * This wait event is t_procflags instead of t_flags because t_flags is volatile
453 *
454 * TODO: store the flags in the same place as the event
455 * rdar://problem/28501994
456 */
457 event_t
458 task_get_return_wait_event(task_t task)
459 {
460 return (event_t)&task->t_procflags;
461 }
462
463 void
464 task_clear_return_wait(task_t task)
465 {
466 task_lock(task);
467
468 task->t_flags &= ~TF_LRETURNWAIT;
469
470 if (task->t_flags & TF_LRETURNWAITER) {
471 thread_wakeup(task_get_return_wait_event(task));
472 task->t_flags &= ~TF_LRETURNWAITER;
473 }
474
475 task_unlock(task);
476 }
477
478 void
479 task_wait_to_return(void)
480 {
481 task_t task;
482
483 task = current_task();
484 task_lock(task);
485
486 if (task->t_flags & TF_LRETURNWAIT) {
487 do {
488 task->t_flags |= TF_LRETURNWAITER;
489 assert_wait(task_get_return_wait_event(task), THREAD_UNINT);
490 task_unlock(task);
491
492 thread_block(THREAD_CONTINUE_NULL);
493
494 task_lock(task);
495 } while (task->t_flags & TF_LRETURNWAIT);
496 }
497
498 task_unlock(task);
499
500 thread_bootstrap_return();
501 }
502
503 boolean_t
504 task_is_exec_copy(task_t task)
505 {
506 return task_is_exec_copy_internal(task);
507 }
508
509 boolean_t
510 task_did_exec(task_t task)
511 {
512 return task_did_exec_internal(task);
513 }
514
515 boolean_t
516 task_is_active(task_t task)
517 {
518 return task->active;
519 }
520
521 boolean_t
522 task_is_halting(task_t task)
523 {
524 return task->halting;
525 }
526
527 #if TASK_REFERENCE_LEAK_DEBUG
528 #include <kern/btlog.h>
529
530 static btlog_t *task_ref_btlog;
531 #define TASK_REF_OP_INCR 0x1
532 #define TASK_REF_OP_DECR 0x2
533
534 #define TASK_REF_NUM_RECORDS 100000
535 #define TASK_REF_BTDEPTH 7
536
537 void
538 task_reference_internal(task_t task)
539 {
540 void * bt[TASK_REF_BTDEPTH];
541 int numsaved = 0;
542
543 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
544
545 (void)hw_atomic_add(&(task)->ref_count, 1);
546 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
547 bt, numsaved);
548 }
549
550 uint32_t
551 task_deallocate_internal(task_t task)
552 {
553 void * bt[TASK_REF_BTDEPTH];
554 int numsaved = 0;
555
556 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
557
558 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
559 bt, numsaved);
560 return hw_atomic_sub(&(task)->ref_count, 1);
561 }
562
563 #endif /* TASK_REFERENCE_LEAK_DEBUG */
564
565 void
566 task_init(void)
567 {
568
569 lck_grp_attr_setdefault(&task_lck_grp_attr);
570 lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
571 lck_attr_setdefault(&task_lck_attr);
572 lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
573 lck_mtx_init(&tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
574
575 task_zone = zinit(
576 sizeof(struct task),
577 task_max * sizeof(struct task),
578 TASK_CHUNK * sizeof(struct task),
579 "tasks");
580
581 zone_change(task_zone, Z_NOENCRYPT, TRUE);
582
583 #if CONFIG_EMBEDDED
584 task_watch_init();
585 #endif /* CONFIG_EMBEDDED */
586
587 /*
588 * Configure per-task memory limit.
589 * The boot-arg is interpreted as Megabytes,
590 * and takes precedence over the device tree.
591 * Setting the boot-arg to 0 disables task limits.
592 */
593 if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
594 sizeof (max_task_footprint_mb))) {
595 /*
596 * No limit was found in boot-args, so go look in the device tree.
597 */
598 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
599 sizeof(max_task_footprint_mb))) {
600 /*
601 * No limit was found in device tree.
602 */
603 max_task_footprint_mb = 0;
604 }
605 }
606
607 if (max_task_footprint_mb != 0) {
608 #if CONFIG_MEMORYSTATUS
609 if (max_task_footprint_mb < 50) {
610 printf("Warning: max_task_pmem %d below minimum.\n",
611 max_task_footprint_mb);
612 max_task_footprint_mb = 50;
613 }
614 printf("Limiting task physical memory footprint to %d MB\n",
615 max_task_footprint_mb);
616
617 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
618
619 /*
620 * Configure the per-task memory limit warning level.
621 * This is computed as a percentage.
622 */
623 max_task_footprint_warning_level = 0;
624
625 if (max_mem < 0x40000000) {
626 /*
627 * On devices with < 1GB of memory:
628 * -- set warnings to 50MB below the per-task limit.
629 */
630 if (max_task_footprint_mb > 50) {
631 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
632 }
633 } else {
634 /*
635 * On devices with >= 1GB of memory:
636 * -- set warnings to 100MB below the per-task limit.
637 */
638 if (max_task_footprint_mb > 100) {
639 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
640 }
641 }
642
643 /*
644 * Never allow warning level to land below the default.
645 */
646 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
647 max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
648 }
649
650 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
651
652 #else
653 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
654 #endif /* CONFIG_MEMORYSTATUS */
655 }
656
657 #if MACH_ASSERT
658 PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
659 sizeof (pmap_ledgers_panic));
660 #endif /* MACH_ASSERT */
661
662 #if CONFIG_COREDUMP
663 if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
664 sizeof (hwm_user_cores))) {
665 hwm_user_cores = 0;
666 }
667 #endif
668
669 proc_init_cpumon_params();
670
671 if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
672 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
673 }
674
675 if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
676 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
677 }
678
679 if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
680 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
681 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
682 }
683
684 if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
685 sizeof (disable_exc_resource))) {
686 disable_exc_resource = 0;
687 }
688
689 if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof (task_iomon_limit_mb))) {
690 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
691 }
692
693 if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof (task_iomon_interval_secs))) {
694 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
695 }
696
697 if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof (io_telemetry_limit))) {
698 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
699 }
700
701 /*
702 * If we have coalitions, coalition_init() will call init_task_ledgers() as it
703 * sets up the ledgers for the default coalition. If we don't have coalitions,
704 * then we have to call it now.
705 */
706 #if CONFIG_COALITIONS
707 assert(task_ledger_template);
708 #else /* CONFIG_COALITIONS */
709 init_task_ledgers();
710 #endif /* CONFIG_COALITIONS */
711
712 #if TASK_REFERENCE_LEAK_DEBUG
713 task_ref_btlog = btlog_create(TASK_REF_NUM_RECORDS, TASK_REF_BTDEPTH, TRUE /* caller_will_remove_entries_for_element? */);
714 assert(task_ref_btlog);
715 #endif
716
717 /*
718 * Create the kernel task as the first task.
719 */
720 #ifdef __LP64__
721 if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)
722 #else
723 if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)
724 #endif
725 panic("task_init\n");
726
727
728 vm_map_deallocate(kernel_task->map);
729 kernel_task->map = kernel_map;
730 lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
731 }
732
733 /*
734 * Create a task running in the kernel address space. It may
735 * have its own map of size mem_size and may have ipc privileges.
736 */
737 kern_return_t
738 kernel_task_create(
739 __unused task_t parent_task,
740 __unused vm_offset_t map_base,
741 __unused vm_size_t map_size,
742 __unused task_t *child_task)
743 {
744 return (KERN_INVALID_ARGUMENT);
745 }
746
747 kern_return_t
748 task_create(
749 task_t parent_task,
750 __unused ledger_port_array_t ledger_ports,
751 __unused mach_msg_type_number_t num_ledger_ports,
752 __unused boolean_t inherit_memory,
753 __unused task_t *child_task) /* OUT */
754 {
755 if (parent_task == TASK_NULL)
756 return(KERN_INVALID_ARGUMENT);
757
758 /*
759 * No longer supported: too many calls assume that a task has a valid
760 * process attached.
761 */
762 return(KERN_FAILURE);
763 }
764
765 kern_return_t
766 host_security_create_task_token(
767 host_security_t host_security,
768 task_t parent_task,
769 __unused security_token_t sec_token,
770 __unused audit_token_t audit_token,
771 __unused host_priv_t host_priv,
772 __unused ledger_port_array_t ledger_ports,
773 __unused mach_msg_type_number_t num_ledger_ports,
774 __unused boolean_t inherit_memory,
775 __unused task_t *child_task) /* OUT */
776 {
777 if (parent_task == TASK_NULL)
778 return(KERN_INVALID_ARGUMENT);
779
780 if (host_security == HOST_NULL)
781 return(KERN_INVALID_SECURITY);
782
783 /*
784 * No longer supported.
785 */
786 return(KERN_FAILURE);
787 }
788
789 /*
790 * Task ledgers
791 * ------------
792 *
793 * phys_footprint
794 * Physical footprint: This is the sum of:
795 * + (internal - alternate_accounting)
796 * + (internal_compressed - alternate_accounting_compressed)
797 * + iokit_mapped
798 * + purgeable_nonvolatile
799 * + purgeable_nonvolatile_compressed
800 * + page_table
801 *
802 * internal
803 * The task's anonymous memory, which on iOS is always resident.
804 *
805 * internal_compressed
806 * Amount of this task's internal memory which is held by the compressor.
807 * Such memory is no longer actually resident for the task [i.e., resident in its pmap],
808 * and could be either decompressed back into memory, or paged out to storage, depending
809 * on our implementation.
810 *
811 * iokit_mapped
812 * IOKit mappings: The total size of all IOKit mappings in this task, regardless of
813 clean/dirty or internal/external state].
814 *
815 * alternate_accounting
816 * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
817 * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
818 * double counting.
819 */
820 void
821 init_task_ledgers(void)
822 {
823 ledger_template_t t;
824
825 assert(task_ledger_template == NULL);
826 assert(kernel_task == TASK_NULL);
827
828 #if MACH_ASSERT
829 PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
830 sizeof (pmap_ledgers_panic));
831 #endif /* MACH_ASSERT */
832
833 if ((t = ledger_template_create("Per-task ledger")) == NULL)
834 panic("couldn't create task ledger template");
835
836 task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
837 task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
838 "physmem", "bytes");
839 task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
840 "bytes");
841 task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
842 "bytes");
843 task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
844 "bytes");
845 task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
846 "bytes");
847 task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
848 "bytes");
849 task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
850 "bytes");
851 task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
852 "bytes");
853 task_ledgers.page_table = ledger_entry_add(t, "page_table", "physmem",
854 "bytes");
855 task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
856 "bytes");
857 task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
858 "bytes");
859 task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
860 task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
861 task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
862 task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
863 task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
864 "count");
865 task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
866 "count");
867
868 #if CONFIG_SCHED_SFI
869 sfi_class_id_t class_id, ledger_alias;
870 for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
871 task_ledgers.sfi_wait_times[class_id] = -1;
872 }
873
874 /* don't account for UNSPECIFIED */
875 for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
876 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
877 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
878 /* Check to see if alias has been registered yet */
879 if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
880 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
881 } else {
882 /* Otherwise, initialize it first */
883 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
884 }
885 } else {
886 task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
887 }
888
889 if (task_ledgers.sfi_wait_times[class_id] < 0) {
890 panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
891 }
892 }
893
894 assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
895 #endif /* CONFIG_SCHED_SFI */
896
897 task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
898 task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
899 task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
900 task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
901 task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
902 task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
903
904 if ((task_ledgers.cpu_time < 0) ||
905 (task_ledgers.tkm_private < 0) ||
906 (task_ledgers.tkm_shared < 0) ||
907 (task_ledgers.phys_mem < 0) ||
908 (task_ledgers.wired_mem < 0) ||
909 (task_ledgers.internal < 0) ||
910 (task_ledgers.iokit_mapped < 0) ||
911 (task_ledgers.alternate_accounting < 0) ||
912 (task_ledgers.alternate_accounting_compressed < 0) ||
913 (task_ledgers.page_table < 0) ||
914 (task_ledgers.phys_footprint < 0) ||
915 (task_ledgers.internal_compressed < 0) ||
916 (task_ledgers.purgeable_volatile < 0) ||
917 (task_ledgers.purgeable_nonvolatile < 0) ||
918 (task_ledgers.purgeable_volatile_compressed < 0) ||
919 (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
920 (task_ledgers.platform_idle_wakeups < 0) ||
921 (task_ledgers.interrupt_wakeups < 0) ||
922 (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
923 (task_ledgers.physical_writes < 0) ||
924 (task_ledgers.logical_writes < 0) ||
925 (task_ledgers.energy_billed_to_me < 0) ||
926 (task_ledgers.energy_billed_to_others < 0)
927 ) {
928 panic("couldn't create entries for task ledger template");
929 }
930
931 ledger_track_credit_only(t, task_ledgers.phys_footprint);
932 ledger_track_credit_only(t, task_ledgers.page_table);
933 ledger_track_credit_only(t, task_ledgers.internal);
934 ledger_track_credit_only(t, task_ledgers.internal_compressed);
935 ledger_track_credit_only(t, task_ledgers.iokit_mapped);
936 ledger_track_credit_only(t, task_ledgers.alternate_accounting);
937 ledger_track_credit_only(t, task_ledgers.alternate_accounting_compressed);
938 ledger_track_credit_only(t, task_ledgers.purgeable_volatile);
939 ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile);
940 ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed);
941 ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed);
942
943 ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
944 #if MACH_ASSERT
945 if (pmap_ledgers_panic) {
946 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
947 ledger_panic_on_negative(t, task_ledgers.page_table);
948 ledger_panic_on_negative(t, task_ledgers.internal);
949 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
950 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
951 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
952 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
953 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
954 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
955 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
956 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
957 }
958 #endif /* MACH_ASSERT */
959
960 #if CONFIG_MEMORYSTATUS
961 ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
962 #endif /* CONFIG_MEMORYSTATUS */
963
964 ledger_set_callback(t, task_ledgers.interrupt_wakeups,
965 task_wakeups_rate_exceeded, NULL, NULL);
966 ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
967 ledger_set_callback(t, task_ledgers.logical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_LOGICAL_WRITES, NULL);
968
969 ledger_template_complete(t);
970 task_ledger_template = t;
971 }
972
973 kern_return_t
974 task_create_internal(
975 task_t parent_task,
976 coalition_t *parent_coalitions __unused,
977 boolean_t inherit_memory,
978 __unused boolean_t is_64bit,
979 uint32_t t_flags,
980 uint32_t t_procflags,
981 task_t *child_task) /* OUT */
982 {
983 task_t new_task;
984 vm_shared_region_t shared_region;
985 ledger_t ledger = NULL;
986
987 new_task = (task_t) zalloc(task_zone);
988
989 if (new_task == TASK_NULL)
990 return(KERN_RESOURCE_SHORTAGE);
991
992 /* one ref for just being alive; one for our caller */
993 new_task->ref_count = 2;
994
995 /* allocate with active entries */
996 assert(task_ledger_template != NULL);
997 if ((ledger = ledger_instantiate(task_ledger_template,
998 LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
999 zfree(task_zone, new_task);
1000 return(KERN_RESOURCE_SHORTAGE);
1001 }
1002
1003
1004 new_task->ledger = ledger;
1005
1006 #if defined(CONFIG_SCHED_MULTIQ)
1007 new_task->sched_group = sched_group_create();
1008 #endif
1009
1010 /* if inherit_memory is true, parent_task MUST not be NULL */
1011 if (!(t_flags & TF_CORPSE_FORK) && inherit_memory)
1012 new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1013 else
1014 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
1015 (vm_map_offset_t)(VM_MIN_ADDRESS),
1016 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
1017
1018 /* Inherit memlock limit from parent */
1019 if (parent_task)
1020 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
1021
1022 lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1023 queue_init(&new_task->threads);
1024 new_task->suspend_count = 0;
1025 new_task->thread_count = 0;
1026 new_task->active_thread_count = 0;
1027 new_task->user_stop_count = 0;
1028 new_task->legacy_stop_count = 0;
1029 new_task->active = TRUE;
1030 new_task->halting = FALSE;
1031 new_task->user_data = NULL;
1032 new_task->priv_flags = 0;
1033 new_task->t_flags = t_flags;
1034 new_task->t_procflags = t_procflags;
1035 new_task->importance = 0;
1036 new_task->crashed_thread_id = 0;
1037 new_task->exec_token = 0;
1038
1039 #if CONFIG_ATM
1040 new_task->atm_context = NULL;
1041 #endif
1042 new_task->bank_context = NULL;
1043
1044 #ifdef MACH_BSD
1045 new_task->bsd_info = NULL;
1046 new_task->corpse_info = NULL;
1047 #endif /* MACH_BSD */
1048
1049 #if CONFIG_MACF
1050 new_task->crash_label = NULL;
1051 #endif
1052
1053 #if CONFIG_MEMORYSTATUS
1054 if (max_task_footprint != 0) {
1055 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1056 }
1057 #endif /* CONFIG_MEMORYSTATUS */
1058
1059 if (task_wakeups_monitor_rate != 0) {
1060 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1061 int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
1062 task_wakeups_monitor_ctl(new_task, &flags, &rate);
1063 }
1064
1065 #if CONFIG_IO_ACCOUNTING
1066 uint32_t flags = IOMON_ENABLE;
1067 task_io_monitor_ctl(new_task, &flags);
1068 #endif /* CONFIG_IO_ACCOUNTING */
1069
1070 machine_task_init(new_task, parent_task, inherit_memory);
1071
1072 new_task->task_debug = NULL;
1073
1074 #if DEVELOPMENT || DEBUG
1075 new_task->task_unnested = FALSE;
1076 new_task->task_disconnected_count = 0;
1077 #endif
1078 queue_init(&new_task->semaphore_list);
1079 new_task->semaphores_owned = 0;
1080
1081 ipc_task_init(new_task, parent_task);
1082
1083 new_task->vtimers = 0;
1084
1085 new_task->shared_region = NULL;
1086
1087 new_task->affinity_space = NULL;
1088
1089 new_task->t_chud = 0;
1090
1091 new_task->pidsuspended = FALSE;
1092 new_task->frozen = FALSE;
1093 new_task->changing_freeze_state = FALSE;
1094 new_task->rusage_cpu_flags = 0;
1095 new_task->rusage_cpu_percentage = 0;
1096 new_task->rusage_cpu_interval = 0;
1097 new_task->rusage_cpu_deadline = 0;
1098 new_task->rusage_cpu_callt = NULL;
1099 #if MACH_ASSERT
1100 new_task->suspends_outstanding = 0;
1101 #endif
1102
1103 #if HYPERVISOR
1104 new_task->hv_task_target = NULL;
1105 #endif /* HYPERVISOR */
1106
1107 #if CONFIG_EMBEDDED
1108 queue_init(&new_task->task_watchers);
1109 new_task->num_taskwatchers = 0;
1110 new_task->watchapplying = 0;
1111 #endif /* CONFIG_EMBEDDED */
1112
1113 new_task->mem_notify_reserved = 0;
1114 new_task->memlimit_attrs_reserved = 0;
1115 #if IMPORTANCE_INHERITANCE
1116 new_task->task_imp_base = NULL;
1117 #endif /* IMPORTANCE_INHERITANCE */
1118
1119 new_task->requested_policy = default_task_requested_policy;
1120 new_task->effective_policy = default_task_effective_policy;
1121
1122 if (parent_task != TASK_NULL) {
1123 new_task->sec_token = parent_task->sec_token;
1124 new_task->audit_token = parent_task->audit_token;
1125
1126 /* inherit the parent's shared region */
1127 shared_region = vm_shared_region_get(parent_task);
1128 vm_shared_region_set(new_task, shared_region);
1129
1130 if(task_has_64BitAddr(parent_task))
1131 task_set_64BitAddr(new_task);
1132 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1133 new_task->all_image_info_size = parent_task->all_image_info_size;
1134
1135 if (inherit_memory && parent_task->affinity_space)
1136 task_affinity_create(parent_task, new_task);
1137
1138 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1139
1140 #if IMPORTANCE_INHERITANCE
1141 ipc_importance_task_t new_task_imp = IIT_NULL;
1142 boolean_t inherit_receive = TRUE;
1143
1144 if (task_is_marked_importance_donor(parent_task)) {
1145 new_task_imp = ipc_importance_for_task(new_task, FALSE);
1146 assert(IIT_NULL != new_task_imp);
1147 ipc_importance_task_mark_donor(new_task_imp, TRUE);
1148 }
1149 #if CONFIG_EMBEDDED
1150 /* Embedded only wants to inherit for exec copy task */
1151 if ((t_procflags & TPF_EXEC_COPY) == 0) {
1152 inherit_receive = FALSE;
1153 }
1154 #endif /* CONFIG_EMBEDDED */
1155
1156 if (inherit_receive) {
1157 if (task_is_marked_importance_receiver(parent_task)) {
1158 if (IIT_NULL == new_task_imp)
1159 new_task_imp = ipc_importance_for_task(new_task, FALSE);
1160 assert(IIT_NULL != new_task_imp);
1161 ipc_importance_task_mark_receiver(new_task_imp, TRUE);
1162 }
1163 if (task_is_marked_importance_denap_receiver(parent_task)) {
1164 if (IIT_NULL == new_task_imp)
1165 new_task_imp = ipc_importance_for_task(new_task, FALSE);
1166 assert(IIT_NULL != new_task_imp);
1167 ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
1168 }
1169 }
1170
1171 if (IIT_NULL != new_task_imp) {
1172 assert(new_task->task_imp_base == new_task_imp);
1173 ipc_importance_task_release(new_task_imp);
1174 }
1175 #endif /* IMPORTANCE_INHERITANCE */
1176
1177 new_task->priority = BASEPRI_DEFAULT;
1178 new_task->max_priority = MAXPRI_USER;
1179
1180 task_policy_create(new_task, parent_task);
1181 } else {
1182 new_task->sec_token = KERNEL_SECURITY_TOKEN;
1183 new_task->audit_token = KERNEL_AUDIT_TOKEN;
1184 #ifdef __LP64__
1185 if(is_64bit)
1186 task_set_64BitAddr(new_task);
1187 #endif
1188 new_task->all_image_info_addr = (mach_vm_address_t)0;
1189 new_task->all_image_info_size = (mach_vm_size_t)0;
1190
1191 new_task->pset_hint = PROCESSOR_SET_NULL;
1192
1193 if (kernel_task == TASK_NULL) {
1194 new_task->priority = BASEPRI_KERNEL;
1195 new_task->max_priority = MAXPRI_KERNEL;
1196 } else {
1197 new_task->priority = BASEPRI_DEFAULT;
1198 new_task->max_priority = MAXPRI_USER;
1199 }
1200 }
1201
1202 bzero(new_task->coalition, sizeof(new_task->coalition));
1203 for (int i = 0; i < COALITION_NUM_TYPES; i++)
1204 queue_chain_init(new_task->task_coalition[i]);
1205
1206 /* Allocate I/O Statistics */
1207 new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1208 assert(new_task->task_io_stats != NULL);
1209 bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1210
1211 bzero(&(new_task->cpu_time_qos_stats), sizeof(struct _cpu_time_qos_stats));
1212
1213 bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1214
1215 /* Copy resource acc. info from Parent for Corpe Forked task. */
1216 if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1217 task_rollup_accounting_info(new_task, parent_task);
1218 } else {
1219 /* Initialize to zero for standard fork/spawn case */
1220 new_task->total_user_time = 0;
1221 new_task->total_system_time = 0;
1222 new_task->total_ptime = 0;
1223 new_task->faults = 0;
1224 new_task->pageins = 0;
1225 new_task->cow_faults = 0;
1226 new_task->messages_sent = 0;
1227 new_task->messages_received = 0;
1228 new_task->syscalls_mach = 0;
1229 new_task->syscalls_unix = 0;
1230 new_task->c_switch = 0;
1231 new_task->p_switch = 0;
1232 new_task->ps_switch = 0;
1233 new_task->low_mem_notified_warn = 0;
1234 new_task->low_mem_notified_critical = 0;
1235 new_task->purged_memory_warn = 0;
1236 new_task->purged_memory_critical = 0;
1237 new_task->low_mem_privileged_listener = 0;
1238 new_task->memlimit_is_active = 0;
1239 new_task->memlimit_is_fatal = 0;
1240 new_task->memlimit_active_exc_resource = 0;
1241 new_task->memlimit_inactive_exc_resource = 0;
1242 new_task->task_timer_wakeups_bin_1 = 0;
1243 new_task->task_timer_wakeups_bin_2 = 0;
1244 new_task->task_gpu_ns = 0;
1245 new_task->task_immediate_writes = 0;
1246 new_task->task_deferred_writes = 0;
1247 new_task->task_invalidated_writes = 0;
1248 new_task->task_metadata_writes = 0;
1249 new_task->task_energy = 0;
1250 #if MONOTONIC
1251 memset(&new_task->task_monotonic, 0, sizeof(new_task->task_monotonic));
1252 #endif /* MONOTONIC */
1253 }
1254
1255
1256 #if CONFIG_COALITIONS
1257 if (!(t_flags & TF_CORPSE_FORK)) {
1258 /* TODO: there is no graceful failure path here... */
1259 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1260 coalitions_adopt_task(parent_coalitions, new_task);
1261 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1262 /*
1263 * all tasks at least have a resource coalition, so
1264 * if the parent has one then inherit all coalitions
1265 * the parent is a part of
1266 */
1267 coalitions_adopt_task(parent_task->coalition, new_task);
1268 } else {
1269 /* TODO: assert that new_task will be PID 1 (launchd) */
1270 coalitions_adopt_init_task(new_task);
1271 }
1272 /*
1273 * on exec, we need to transfer the coalition roles from the
1274 * parent task to the exec copy task.
1275 */
1276 if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1277 int coal_roles[COALITION_NUM_TYPES];
1278 task_coalition_roles(parent_task, coal_roles);
1279 (void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1280 }
1281 } else {
1282 coalitions_adopt_corpse_task(new_task);
1283 }
1284
1285 if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1286 panic("created task is not a member of a resource coalition");
1287 }
1288 #endif /* CONFIG_COALITIONS */
1289
1290 new_task->dispatchqueue_offset = 0;
1291 if (parent_task != NULL) {
1292 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1293 }
1294
1295 if (vm_backing_store_low && parent_task != NULL)
1296 new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
1297
1298 new_task->task_volatile_objects = 0;
1299 new_task->task_nonvolatile_objects = 0;
1300 new_task->task_purgeable_disowning = FALSE;
1301 new_task->task_purgeable_disowned = FALSE;
1302
1303 #if CONFIG_SECLUDED_MEMORY
1304 new_task->task_can_use_secluded_mem = FALSE;
1305 new_task->task_could_use_secluded_mem = FALSE;
1306 new_task->task_could_also_use_secluded_mem = FALSE;
1307 #endif /* CONFIG_SECLUDED_MEMORY */
1308
1309 queue_init(&new_task->io_user_clients);
1310
1311 ipc_task_enable(new_task);
1312
1313 lck_mtx_lock(&tasks_threads_lock);
1314 queue_enter(&tasks, new_task, task_t, tasks);
1315 tasks_count++;
1316 if (tasks_suspend_state) {
1317 task_suspend_internal(new_task);
1318 }
1319 lck_mtx_unlock(&tasks_threads_lock);
1320
1321 *child_task = new_task;
1322 return(KERN_SUCCESS);
1323 }
1324
1325 /*
1326 * task_rollup_accounting_info
1327 *
1328 * Roll up accounting stats. Used to rollup stats
1329 * for exec copy task and corpse fork.
1330 */
1331 void
1332 task_rollup_accounting_info(task_t to_task, task_t from_task)
1333 {
1334 assert(from_task != to_task);
1335
1336 to_task->total_user_time = from_task->total_user_time;
1337 to_task->total_system_time = from_task->total_system_time;
1338 to_task->total_ptime = from_task->total_ptime;
1339 to_task->faults = from_task->faults;
1340 to_task->pageins = from_task->pageins;
1341 to_task->cow_faults = from_task->cow_faults;
1342 to_task->messages_sent = from_task->messages_sent;
1343 to_task->messages_received = from_task->messages_received;
1344 to_task->syscalls_mach = from_task->syscalls_mach;
1345 to_task->syscalls_unix = from_task->syscalls_unix;
1346 to_task->c_switch = from_task->c_switch;
1347 to_task->p_switch = from_task->p_switch;
1348 to_task->ps_switch = from_task->ps_switch;
1349 to_task->extmod_statistics = from_task->extmod_statistics;
1350 to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1351 to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1352 to_task->purged_memory_warn = from_task->purged_memory_warn;
1353 to_task->purged_memory_critical = from_task->purged_memory_critical;
1354 to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1355 *to_task->task_io_stats = *from_task->task_io_stats;
1356 to_task->cpu_time_qos_stats = from_task->cpu_time_qos_stats;
1357 to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1358 to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1359 to_task->task_gpu_ns = from_task->task_gpu_ns;
1360 to_task->task_immediate_writes = from_task->task_immediate_writes;
1361 to_task->task_deferred_writes = from_task->task_deferred_writes;
1362 to_task->task_invalidated_writes = from_task->task_invalidated_writes;
1363 to_task->task_metadata_writes = from_task->task_metadata_writes;
1364 to_task->task_energy = from_task->task_energy;
1365
1366 /* Skip ledger roll up for memory accounting entries */
1367 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1368 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1369 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1370 #if CONFIG_SCHED_SFI
1371 for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1372 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1373 }
1374 #endif
1375 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1376 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1377 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1378 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1379 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1380 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
1381 }
1382
1383 int task_dropped_imp_count = 0;
1384
1385 /*
1386 * task_deallocate:
1387 *
1388 * Drop a reference on a task.
1389 */
1390 void
1391 task_deallocate(
1392 task_t task)
1393 {
1394 ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1395 uint32_t refs;
1396
1397 if (task == TASK_NULL)
1398 return;
1399
1400 refs = task_deallocate_internal(task);
1401
1402 #if IMPORTANCE_INHERITANCE
1403 if (refs > 1)
1404 return;
1405
1406 atomic_load_explicit(&task->ref_count, memory_order_acquire);
1407
1408 if (refs == 1) {
1409 /*
1410 * If last ref potentially comes from the task's importance,
1411 * disconnect it. But more task refs may be added before
1412 * that completes, so wait for the reference to go to zero
1413 * naturually (it may happen on a recursive task_deallocate()
1414 * from the ipc_importance_disconnect_task() call).
1415 */
1416 if (IIT_NULL != task->task_imp_base)
1417 ipc_importance_disconnect_task(task);
1418 return;
1419 }
1420 #else
1421 if (refs > 0)
1422 return;
1423
1424 atomic_load_explicit(&task->ref_count, memory_order_acquire);
1425
1426 #endif /* IMPORTANCE_INHERITANCE */
1427
1428 lck_mtx_lock(&tasks_threads_lock);
1429 queue_remove(&terminated_tasks, task, task_t, tasks);
1430 terminated_tasks_count--;
1431 lck_mtx_unlock(&tasks_threads_lock);
1432
1433 /*
1434 * remove the reference on atm descriptor
1435 */
1436 task_atm_reset(task);
1437
1438 /*
1439 * remove the reference on bank context
1440 */
1441 task_bank_reset(task);
1442
1443 if (task->task_io_stats)
1444 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1445
1446 /*
1447 * Give the machine dependent code a chance
1448 * to perform cleanup before ripping apart
1449 * the task.
1450 */
1451 machine_task_terminate(task);
1452
1453 ipc_task_terminate(task);
1454
1455 /* let iokit know */
1456 iokit_task_terminate(task);
1457
1458 if (task->affinity_space)
1459 task_affinity_deallocate(task);
1460
1461 #if MACH_ASSERT
1462 if (task->ledger != NULL &&
1463 task->map != NULL &&
1464 task->map->pmap != NULL &&
1465 task->map->pmap->ledger != NULL) {
1466 assert(task->ledger == task->map->pmap->ledger);
1467 }
1468 #endif /* MACH_ASSERT */
1469
1470 vm_purgeable_disown(task);
1471 assert(task->task_purgeable_disowned);
1472 if (task->task_volatile_objects != 0 ||
1473 task->task_nonvolatile_objects != 0) {
1474 panic("task_deallocate(%p): "
1475 "volatile_objects=%d nonvolatile_objects=%d\n",
1476 task,
1477 task->task_volatile_objects,
1478 task->task_nonvolatile_objects);
1479 }
1480
1481 vm_map_deallocate(task->map);
1482 is_release(task->itk_space);
1483
1484 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1485 &interrupt_wakeups, &debit);
1486 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1487 &platform_idle_wakeups, &debit);
1488
1489 #if defined(CONFIG_SCHED_MULTIQ)
1490 sched_group_destroy(task->sched_group);
1491 #endif
1492
1493 /* Accumulate statistics for dead tasks */
1494 lck_spin_lock(&dead_task_statistics_lock);
1495 dead_task_statistics.total_user_time += task->total_user_time;
1496 dead_task_statistics.total_system_time += task->total_system_time;
1497
1498 dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1499 dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1500
1501 dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1502 dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1503 dead_task_statistics.total_ptime += task->total_ptime;
1504 dead_task_statistics.total_pset_switches += task->ps_switch;
1505 dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
1506 dead_task_statistics.task_energy += task->task_energy;
1507
1508 lck_spin_unlock(&dead_task_statistics_lock);
1509 lck_mtx_destroy(&task->lock, &task_lck_grp);
1510
1511 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1512 &debit)) {
1513 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1514 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1515 }
1516 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1517 &debit)) {
1518 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1519 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1520 }
1521 ledger_dereference(task->ledger);
1522
1523 #if TASK_REFERENCE_LEAK_DEBUG
1524 btlog_remove_entries_for_element(task_ref_btlog, task);
1525 #endif
1526
1527 #if CONFIG_COALITIONS
1528 task_release_coalitions(task);
1529 #endif /* CONFIG_COALITIONS */
1530
1531 bzero(task->coalition, sizeof(task->coalition));
1532
1533 #if MACH_BSD
1534 /* clean up collected information since last reference to task is gone */
1535 if (task->corpse_info) {
1536 void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
1537 task_crashinfo_destroy(task->corpse_info);
1538 task->corpse_info = NULL;
1539 if (corpse_info_kernel) {
1540 kfree(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
1541 }
1542 }
1543 #endif
1544
1545 #if CONFIG_MACF
1546 if (task->crash_label) {
1547 mac_exc_free_label(task->crash_label);
1548 task->crash_label = NULL;
1549 }
1550 #endif
1551
1552 zfree(task_zone, task);
1553 }
1554
1555 /*
1556 * task_name_deallocate:
1557 *
1558 * Drop a reference on a task name.
1559 */
1560 void
1561 task_name_deallocate(
1562 task_name_t task_name)
1563 {
1564 return(task_deallocate((task_t)task_name));
1565 }
1566
1567 /*
1568 * task_inspect_deallocate:
1569 *
1570 * Drop a task inspection reference.
1571 */
1572 void
1573 task_inspect_deallocate(
1574 task_inspect_t task_inspect)
1575 {
1576 return(task_deallocate((task_t)task_inspect));
1577 }
1578
1579 /*
1580 * task_suspension_token_deallocate:
1581 *
1582 * Drop a reference on a task suspension token.
1583 */
1584 void
1585 task_suspension_token_deallocate(
1586 task_suspension_token_t token)
1587 {
1588 return(task_deallocate((task_t)token));
1589 }
1590
1591
1592 /*
1593 * task_collect_crash_info:
1594 *
1595 * collect crash info from bsd and mach based data
1596 */
1597 kern_return_t
1598 task_collect_crash_info(
1599 task_t task,
1600 #ifdef CONFIG_MACF
1601 struct label *crash_label,
1602 #endif
1603 int is_corpse_fork)
1604 {
1605 kern_return_t kr = KERN_SUCCESS;
1606
1607 kcdata_descriptor_t crash_data = NULL;
1608 kcdata_descriptor_t crash_data_release = NULL;
1609 mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1610 mach_vm_offset_t crash_data_ptr = 0;
1611 void *crash_data_kernel = NULL;
1612 void *crash_data_kernel_release = NULL;
1613 #if CONFIG_MACF
1614 struct label *label, *free_label;
1615 #endif
1616
1617 if (!corpses_enabled()) {
1618 return KERN_NOT_SUPPORTED;
1619 }
1620
1621 #if CONFIG_MACF
1622 free_label = label = mac_exc_create_label();
1623 #endif
1624
1625 task_lock(task);
1626
1627 assert(is_corpse_fork || task->bsd_info != NULL);
1628 if (task->corpse_info == NULL && (is_corpse_fork || task->bsd_info != NULL)) {
1629 #if CONFIG_MACF
1630 /* Set the crash label, used by the exception delivery mac hook */
1631 free_label = task->crash_label; // Most likely NULL.
1632 task->crash_label = label;
1633 mac_exc_update_task_crash_label(task, crash_label);
1634 #endif
1635 task_unlock(task);
1636
1637 crash_data_kernel = (void *) kalloc(CORPSEINFO_ALLOCATION_SIZE);
1638 if (crash_data_kernel == NULL) {
1639 kr = KERN_RESOURCE_SHORTAGE;
1640 goto out_no_lock;
1641 }
1642 bzero(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1643 crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
1644
1645 /* Do not get a corpse ref for corpse fork */
1646 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
1647 is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
1648 KCFLAG_USE_MEMCOPY);
1649 if (crash_data) {
1650 task_lock(task);
1651 crash_data_release = task->corpse_info;
1652 crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
1653 task->corpse_info = crash_data;
1654
1655 task_unlock(task);
1656 kr = KERN_SUCCESS;
1657 } else {
1658 kfree(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1659 kr = KERN_FAILURE;
1660 }
1661
1662 if (crash_data_release != NULL) {
1663 task_crashinfo_destroy(crash_data_release);
1664 }
1665 if (crash_data_kernel_release != NULL) {
1666 kfree(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
1667 }
1668 } else {
1669 task_unlock(task);
1670 }
1671
1672 out_no_lock:
1673 #if CONFIG_MACF
1674 if (free_label != NULL) {
1675 mac_exc_free_label(free_label);
1676 }
1677 #endif
1678 return kr;
1679 }
1680
1681 /*
1682 * task_deliver_crash_notification:
1683 *
1684 * Makes outcall to registered host port for a corpse.
1685 */
1686 kern_return_t
1687 task_deliver_crash_notification(
1688 task_t task,
1689 thread_t thread,
1690 exception_type_t etype,
1691 mach_exception_subcode_t subcode)
1692 {
1693 kcdata_descriptor_t crash_info = task->corpse_info;
1694 thread_t th_iter = NULL;
1695 kern_return_t kr = KERN_SUCCESS;
1696 wait_interrupt_t wsave;
1697 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1698 ipc_port_t task_port, old_notify;
1699
1700 if (crash_info == NULL)
1701 return KERN_FAILURE;
1702
1703 task_lock(task);
1704 if (task_is_a_corpse_fork(task)) {
1705 /* Populate code with EXC_{RESOURCE,GUARD} for corpse fork */
1706 code[0] = etype;
1707 code[1] = subcode;
1708 } else {
1709 /* Populate code with EXC_CRASH for corpses */
1710 code[0] = EXC_CRASH;
1711 code[1] = 0;
1712 /* Update the code[1] if the boot-arg corpse_for_fatal_memkill is set */
1713 if (corpse_for_fatal_memkill) {
1714 code[1] = subcode;
1715 }
1716 }
1717
1718 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1719 {
1720 if (th_iter->corpse_dup == FALSE) {
1721 ipc_thread_reset(th_iter);
1722 }
1723 }
1724 task_unlock(task);
1725
1726 /* Arm the no-sender notification for taskport */
1727 task_reference(task);
1728 task_port = convert_task_to_port(task);
1729 ip_lock(task_port);
1730 assert(ip_active(task_port));
1731 ipc_port_nsrequest(task_port, task_port->ip_mscount, ipc_port_make_sonce_locked(task_port), &old_notify);
1732 /* port unlocked */
1733 assert(IP_NULL == old_notify);
1734
1735 wsave = thread_interrupt_level(THREAD_UNINT);
1736 kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
1737 if (kr != KERN_SUCCESS) {
1738 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
1739 }
1740
1741 (void)thread_interrupt_level(wsave);
1742
1743 /*
1744 * Drop the send right on task port, will fire the
1745 * no-sender notification if exception deliver failed.
1746 */
1747 ipc_port_release_send(task_port);
1748 return kr;
1749 }
1750
1751 /*
1752 * task_terminate:
1753 *
1754 * Terminate the specified task. See comments on thread_terminate
1755 * (kern/thread.c) about problems with terminating the "current task."
1756 */
1757
1758 kern_return_t
1759 task_terminate(
1760 task_t task)
1761 {
1762 if (task == TASK_NULL)
1763 return (KERN_INVALID_ARGUMENT);
1764
1765 if (task->bsd_info)
1766 return (KERN_FAILURE);
1767
1768 return (task_terminate_internal(task));
1769 }
1770
1771 #if MACH_ASSERT
1772 extern int proc_pid(struct proc *);
1773 extern void proc_name_kdp(task_t t, char *buf, int size);
1774 #endif /* MACH_ASSERT */
1775
1776 #define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
1777 static void
1778 __unused task_partial_reap(task_t task, __unused int pid)
1779 {
1780 unsigned int reclaimed_resident = 0;
1781 unsigned int reclaimed_compressed = 0;
1782 uint64_t task_page_count;
1783
1784 task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1785
1786 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
1787 pid, task_page_count, 0, 0, 0);
1788
1789 vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1790
1791 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
1792 pid, reclaimed_resident, reclaimed_compressed, 0, 0);
1793 }
1794
1795 kern_return_t
1796 task_mark_corpse(task_t task)
1797 {
1798 kern_return_t kr = KERN_SUCCESS;
1799 thread_t self_thread;
1800 (void) self_thread;
1801 wait_interrupt_t wsave;
1802 #if CONFIG_MACF
1803 struct label *crash_label = NULL;
1804 #endif
1805
1806 assert(task != kernel_task);
1807 assert(task == current_task());
1808 assert(!task_is_a_corpse(task));
1809
1810 #if CONFIG_MACF
1811 crash_label = mac_exc_create_label_for_proc((struct proc*)task->bsd_info);
1812 #endif
1813
1814 kr = task_collect_crash_info(task,
1815 #if CONFIG_MACF
1816 crash_label,
1817 #endif
1818 FALSE);
1819 if (kr != KERN_SUCCESS) {
1820 goto out;
1821 }
1822
1823 self_thread = current_thread();
1824
1825 wsave = thread_interrupt_level(THREAD_UNINT);
1826 task_lock(task);
1827
1828 task_set_corpse_pending_report(task);
1829 task_set_corpse(task);
1830 task->crashed_thread_id = thread_tid(self_thread);
1831
1832 kr = task_start_halt_locked(task, TRUE);
1833 assert(kr == KERN_SUCCESS);
1834
1835 ipc_task_reset(task);
1836 /* Remove the naked send right for task port, needed to arm no sender notification */
1837 task_set_special_port(task, TASK_KERNEL_PORT, IPC_PORT_NULL);
1838 ipc_task_enable(task);
1839
1840 task_unlock(task);
1841 /* terminate the ipc space */
1842 ipc_space_terminate(task->itk_space);
1843
1844 /* Add it to global corpse task list */
1845 task_add_to_corpse_task_list(task);
1846
1847 task_start_halt(task);
1848 thread_terminate_internal(self_thread);
1849
1850 (void) thread_interrupt_level(wsave);
1851 assert(task->halting == TRUE);
1852
1853 out:
1854 #if CONFIG_MACF
1855 mac_exc_free_label(crash_label);
1856 #endif
1857 return kr;
1858 }
1859
1860 /*
1861 * task_clear_corpse
1862 *
1863 * Clears the corpse pending bit on task.
1864 * Removes inspection bit on the threads.
1865 */
1866 void
1867 task_clear_corpse(task_t task)
1868 {
1869 thread_t th_iter = NULL;
1870
1871 task_lock(task);
1872 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1873 {
1874 thread_mtx_lock(th_iter);
1875 th_iter->inspection = FALSE;
1876 thread_mtx_unlock(th_iter);
1877 }
1878
1879 thread_terminate_crashed_threads();
1880 /* remove the pending corpse report flag */
1881 task_clear_corpse_pending_report(task);
1882
1883 task_unlock(task);
1884 }
1885
1886 /*
1887 * task_port_notify
1888 *
1889 * Called whenever the Mach port system detects no-senders on
1890 * the task port of a corpse.
1891 * Each notification that comes in should terminate the task (corpse).
1892 */
1893 void
1894 task_port_notify(mach_msg_header_t *msg)
1895 {
1896 mach_no_senders_notification_t *notification = (void *)msg;
1897 ipc_port_t port = notification->not_header.msgh_remote_port;
1898 task_t task;
1899
1900 assert(ip_active(port));
1901 assert(IKOT_TASK == ip_kotype(port));
1902 task = (task_t) port->ip_kobject;
1903
1904 assert(task_is_a_corpse(task));
1905
1906 /* Remove the task from global corpse task list */
1907 task_remove_from_corpse_task_list(task);
1908
1909 task_clear_corpse(task);
1910 task_terminate_internal(task);
1911 }
1912
1913 /*
1914 * task_wait_till_threads_terminate_locked
1915 *
1916 * Wait till all the threads in the task are terminated.
1917 * Might release the task lock and re-acquire it.
1918 */
1919 void
1920 task_wait_till_threads_terminate_locked(task_t task)
1921 {
1922 /* wait for all the threads in the task to terminate */
1923 while (task->active_thread_count != 0) {
1924 assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
1925 task_unlock(task);
1926 thread_block(THREAD_CONTINUE_NULL);
1927
1928 task_lock(task);
1929 }
1930 }
1931
1932 /*
1933 * task_duplicate_map_and_threads
1934 *
1935 * Copy vmmap of source task.
1936 * Copy active threads from source task to destination task.
1937 * Source task would be suspended during the copy.
1938 */
1939 kern_return_t
1940 task_duplicate_map_and_threads(
1941 task_t task,
1942 void *p,
1943 task_t new_task,
1944 thread_t *thread_ret,
1945 uint64_t **udata_buffer,
1946 int *size,
1947 int *num_udata)
1948 {
1949 kern_return_t kr = KERN_SUCCESS;
1950 int active;
1951 thread_t thread, self, thread_return = THREAD_NULL;
1952 thread_t new_thread = THREAD_NULL;
1953 thread_t *thread_array;
1954 uint32_t active_thread_count = 0, array_count = 0, i;
1955 vm_map_t oldmap;
1956 uint64_t *buffer = NULL;
1957 int buf_size = 0;
1958 int est_knotes = 0, num_knotes = 0;
1959
1960 self = current_thread();
1961
1962 /*
1963 * Suspend the task to copy thread state, use the internal
1964 * variant so that no user-space process can resume
1965 * the task from under us
1966 */
1967 kr = task_suspend_internal(task);
1968 if (kr != KERN_SUCCESS) {
1969 return kr;
1970 }
1971
1972 if (task->map->disable_vmentry_reuse == TRUE) {
1973 /*
1974 * Quite likely GuardMalloc (or some debugging tool)
1975 * is being used on this task. And it has gone through
1976 * its limit. Making a corpse will likely encounter
1977 * a lot of VM entries that will need COW.
1978 *
1979 * Skip it.
1980 */
1981 task_resume_internal(task);
1982 return KERN_FAILURE;
1983 }
1984
1985 /* Check with VM if vm_map_fork is allowed for this task */
1986 if (task_allowed_vm_map_fork(task)) {
1987
1988 /* Setup new task's vmmap, switch from parent task's map to it COW map */
1989 oldmap = new_task->map;
1990 new_task->map = vm_map_fork(new_task->ledger,
1991 task->map,
1992 (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
1993 VM_MAP_FORK_PRESERVE_PURGEABLE));
1994 vm_map_deallocate(oldmap);
1995
1996 /* Get all the udata pointers from kqueue */
1997 est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
1998 if (est_knotes > 0) {
1999 buf_size = (est_knotes + 32) * sizeof(uint64_t);
2000 buffer = (uint64_t *) kalloc(buf_size);
2001 num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2002 if (num_knotes > est_knotes + 32) {
2003 num_knotes = est_knotes + 32;
2004 }
2005 }
2006 }
2007
2008 active_thread_count = task->active_thread_count;
2009 if (active_thread_count == 0) {
2010 if (buffer != NULL) {
2011 kfree(buffer, buf_size);
2012 }
2013 task_resume_internal(task);
2014 return KERN_FAILURE;
2015 }
2016
2017 thread_array = (thread_t *) kalloc(sizeof(thread_t) * active_thread_count);
2018
2019 /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2020 task_lock(task);
2021 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2022 /* Skip inactive threads */
2023 active = thread->active;
2024 if (!active) {
2025 continue;
2026 }
2027
2028 if (array_count >= active_thread_count) {
2029 break;
2030 }
2031
2032 thread_array[array_count++] = thread;
2033 thread_reference(thread);
2034 }
2035 task_unlock(task);
2036
2037 for (i = 0; i < array_count; i++) {
2038
2039 kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2040 if (kr != KERN_SUCCESS) {
2041 break;
2042 }
2043
2044 /* Equivalent of current thread in corpse */
2045 if (thread_array[i] == self) {
2046 thread_return = new_thread;
2047 new_task->crashed_thread_id = thread_tid(new_thread);
2048 } else {
2049 /* drop the extra ref returned by thread_create_with_continuation */
2050 thread_deallocate(new_thread);
2051 }
2052
2053 kr = thread_dup2(thread_array[i], new_thread);
2054 if (kr != KERN_SUCCESS) {
2055 thread_mtx_lock(new_thread);
2056 new_thread->corpse_dup = TRUE;
2057 thread_mtx_unlock(new_thread);
2058 continue;
2059 }
2060
2061 /* Copy thread name */
2062 bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread);
2063 thread_copy_resource_info(new_thread, thread_array[i]);
2064 }
2065
2066 task_resume_internal(task);
2067
2068 for (i = 0; i < array_count; i++) {
2069 thread_deallocate(thread_array[i]);
2070 }
2071 kfree(thread_array, sizeof(thread_t) * active_thread_count);
2072
2073 if (kr == KERN_SUCCESS) {
2074 *thread_ret = thread_return;
2075 *udata_buffer = buffer;
2076 *size = buf_size;
2077 *num_udata = num_knotes;
2078 } else {
2079 if (thread_return != THREAD_NULL) {
2080 thread_deallocate(thread_return);
2081 }
2082 if (buffer != NULL) {
2083 kfree(buffer, buf_size);
2084 }
2085 }
2086
2087 return kr;
2088 }
2089
2090 /*
2091 * Place holder function to be filled by VM to return
2092 * TRUE if vm_map_fork is allowed on the given task.
2093 */
2094 boolean_t
2095 task_allowed_vm_map_fork(task_t task __unused)
2096 {
2097 return memorystatus_allowed_vm_map_fork(task);
2098 }
2099
2100 #if CONFIG_SECLUDED_MEMORY
2101 extern void task_set_can_use_secluded_mem_locked(
2102 task_t task,
2103 boolean_t can_use_secluded_mem);
2104 #endif /* CONFIG_SECLUDED_MEMORY */
2105
2106 kern_return_t
2107 task_terminate_internal(
2108 task_t task)
2109 {
2110 thread_t thread, self;
2111 task_t self_task;
2112 boolean_t interrupt_save;
2113 int pid = 0;
2114
2115 assert(task != kernel_task);
2116
2117 self = current_thread();
2118 self_task = self->task;
2119
2120 /*
2121 * Get the task locked and make sure that we are not racing
2122 * with someone else trying to terminate us.
2123 */
2124 if (task == self_task)
2125 task_lock(task);
2126 else
2127 if (task < self_task) {
2128 task_lock(task);
2129 task_lock(self_task);
2130 }
2131 else {
2132 task_lock(self_task);
2133 task_lock(task);
2134 }
2135
2136 #if CONFIG_SECLUDED_MEMORY
2137 if (task->task_can_use_secluded_mem) {
2138 task_set_can_use_secluded_mem_locked(task, FALSE);
2139 }
2140 task->task_could_use_secluded_mem = FALSE;
2141 task->task_could_also_use_secluded_mem = FALSE;
2142 #endif /* CONFIG_SECLUDED_MEMORY */
2143
2144 if (!task->active) {
2145 /*
2146 * Task is already being terminated.
2147 * Just return an error. If we are dying, this will
2148 * just get us to our AST special handler and that
2149 * will get us to finalize the termination of ourselves.
2150 */
2151 task_unlock(task);
2152 if (self_task != task)
2153 task_unlock(self_task);
2154
2155 return (KERN_FAILURE);
2156 }
2157
2158 if (task_corpse_pending_report(task)) {
2159 /*
2160 * Task is marked for reporting as corpse.
2161 * Just return an error. This will
2162 * just get us to our AST special handler and that
2163 * will get us to finish the path to death
2164 */
2165 task_unlock(task);
2166 if (self_task != task)
2167 task_unlock(self_task);
2168
2169 return (KERN_FAILURE);
2170 }
2171
2172 if (self_task != task)
2173 task_unlock(self_task);
2174
2175 /*
2176 * Make sure the current thread does not get aborted out of
2177 * the waits inside these operations.
2178 */
2179 interrupt_save = thread_interrupt_level(THREAD_UNINT);
2180
2181 /*
2182 * Indicate that we want all the threads to stop executing
2183 * at user space by holding the task (we would have held
2184 * each thread independently in thread_terminate_internal -
2185 * but this way we may be more likely to already find it
2186 * held there). Mark the task inactive, and prevent
2187 * further task operations via the task port.
2188 */
2189 task_hold_locked(task);
2190 task->active = FALSE;
2191 ipc_task_disable(task);
2192
2193 #if CONFIG_TELEMETRY
2194 /*
2195 * Notify telemetry that this task is going away.
2196 */
2197 telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2198 #endif
2199
2200 /*
2201 * Terminate each thread in the task.
2202 */
2203 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2204 thread_terminate_internal(thread);
2205 }
2206
2207 #ifdef MACH_BSD
2208 if (task->bsd_info != NULL && !task_is_exec_copy(task)) {
2209 pid = proc_pid(task->bsd_info);
2210 }
2211 #endif /* MACH_BSD */
2212
2213 task_unlock(task);
2214
2215 proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
2216 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
2217
2218 /* Early object reap phase */
2219
2220 // PR-17045188: Revisit implementation
2221 // task_partial_reap(task, pid);
2222
2223 #if CONFIG_EMBEDDED
2224 /*
2225 * remove all task watchers
2226 */
2227 task_removewatchers(task);
2228
2229 #endif /* CONFIG_EMBEDDED */
2230
2231 /*
2232 * Destroy all synchronizers owned by the task.
2233 */
2234 task_synchronizer_destroy_all(task);
2235
2236 /*
2237 * Destroy the IPC space, leaving just a reference for it.
2238 */
2239 ipc_space_terminate(task->itk_space);
2240
2241 #if 00
2242 /* if some ledgers go negative on tear-down again... */
2243 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2244 task_ledgers.phys_footprint);
2245 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2246 task_ledgers.internal);
2247 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2248 task_ledgers.internal_compressed);
2249 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2250 task_ledgers.iokit_mapped);
2251 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2252 task_ledgers.alternate_accounting);
2253 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2254 task_ledgers.alternate_accounting_compressed);
2255 #endif
2256
2257 /*
2258 * If the current thread is a member of the task
2259 * being terminated, then the last reference to
2260 * the task will not be dropped until the thread
2261 * is finally reaped. To avoid incurring the
2262 * expense of removing the address space regions
2263 * at reap time, we do it explictly here.
2264 */
2265
2266 vm_map_lock(task->map);
2267 vm_map_disable_hole_optimization(task->map);
2268 vm_map_unlock(task->map);
2269
2270 #if MACH_ASSERT
2271 /*
2272 * Identify the pmap's process, in case the pmap ledgers drift
2273 * and we have to report it.
2274 */
2275 char procname[17];
2276 if (task->bsd_info && !task_is_exec_copy(task)) {
2277 pid = proc_pid(task->bsd_info);
2278 proc_name_kdp(task, procname, sizeof (procname));
2279 } else {
2280 pid = 0;
2281 strlcpy(procname, "<unknown>", sizeof (procname));
2282 }
2283 pmap_set_process(task->map->pmap, pid, procname);
2284 #endif /* MACH_ASSERT */
2285
2286 vm_map_remove(task->map,
2287 task->map->min_offset,
2288 task->map->max_offset,
2289 /*
2290 * Final cleanup:
2291 * + no unnesting
2292 * + remove immutable mappings
2293 */
2294 (VM_MAP_REMOVE_NO_UNNESTING |
2295 VM_MAP_REMOVE_IMMUTABLE));
2296
2297 /* release our shared region */
2298 vm_shared_region_set(task, NULL);
2299
2300
2301 lck_mtx_lock(&tasks_threads_lock);
2302 queue_remove(&tasks, task, task_t, tasks);
2303 queue_enter(&terminated_tasks, task, task_t, tasks);
2304 tasks_count--;
2305 terminated_tasks_count++;
2306 lck_mtx_unlock(&tasks_threads_lock);
2307
2308 /*
2309 * We no longer need to guard against being aborted, so restore
2310 * the previous interruptible state.
2311 */
2312 thread_interrupt_level(interrupt_save);
2313
2314 #if KPERF
2315 /* force the task to release all ctrs */
2316 if (task->t_chud & TASK_KPC_FORCED_ALL_CTRS)
2317 kpc_force_all_ctrs(task, 0);
2318 #endif
2319
2320 #if CONFIG_COALITIONS
2321 /*
2322 * Leave our coalitions. (drop activation but not reference)
2323 */
2324 coalitions_remove_task(task);
2325 #endif
2326
2327 /*
2328 * Get rid of the task active reference on itself.
2329 */
2330 task_deallocate(task);
2331
2332 return (KERN_SUCCESS);
2333 }
2334
2335 void
2336 tasks_system_suspend(boolean_t suspend)
2337 {
2338 task_t task;
2339
2340 lck_mtx_lock(&tasks_threads_lock);
2341 assert(tasks_suspend_state != suspend);
2342 tasks_suspend_state = suspend;
2343 queue_iterate(&tasks, task, task_t, tasks) {
2344 if (task == kernel_task) {
2345 continue;
2346 }
2347 suspend ? task_suspend_internal(task) : task_resume_internal(task);
2348 }
2349 lck_mtx_unlock(&tasks_threads_lock);
2350 }
2351
2352 /*
2353 * task_start_halt:
2354 *
2355 * Shut the current task down (except for the current thread) in
2356 * preparation for dramatic changes to the task (probably exec).
2357 * We hold the task and mark all other threads in the task for
2358 * termination.
2359 */
2360 kern_return_t
2361 task_start_halt(task_t task)
2362 {
2363 kern_return_t kr = KERN_SUCCESS;
2364 task_lock(task);
2365 kr = task_start_halt_locked(task, FALSE);
2366 task_unlock(task);
2367 return kr;
2368 }
2369
2370 static kern_return_t
2371 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
2372 {
2373 thread_t thread, self;
2374 uint64_t dispatchqueue_offset;
2375
2376 assert(task != kernel_task);
2377
2378 self = current_thread();
2379
2380 if (task != self->task && !task_is_a_corpse_fork(task))
2381 return (KERN_INVALID_ARGUMENT);
2382
2383 if (task->halting || !task->active || !self->active) {
2384 /*
2385 * Task or current thread is already being terminated.
2386 * Hurry up and return out of the current kernel context
2387 * so that we run our AST special handler to terminate
2388 * ourselves.
2389 */
2390 return (KERN_FAILURE);
2391 }
2392
2393 task->halting = TRUE;
2394
2395 /*
2396 * Mark all the threads to keep them from starting any more
2397 * user-level execution. The thread_terminate_internal code
2398 * would do this on a thread by thread basis anyway, but this
2399 * gives us a better chance of not having to wait there.
2400 */
2401 task_hold_locked(task);
2402 dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
2403
2404 /*
2405 * Terminate all the other threads in the task.
2406 */
2407 queue_iterate(&task->threads, thread, thread_t, task_threads)
2408 {
2409 if (should_mark_corpse) {
2410 thread_mtx_lock(thread);
2411 thread->inspection = TRUE;
2412 thread_mtx_unlock(thread);
2413 }
2414 if (thread != self)
2415 thread_terminate_internal(thread);
2416 }
2417 task->dispatchqueue_offset = dispatchqueue_offset;
2418
2419 task_release_locked(task);
2420
2421 return KERN_SUCCESS;
2422 }
2423
2424
2425 /*
2426 * task_complete_halt:
2427 *
2428 * Complete task halt by waiting for threads to terminate, then clean
2429 * up task resources (VM, port namespace, etc...) and then let the
2430 * current thread go in the (practically empty) task context.
2431 *
2432 * Note: task->halting flag is not cleared in order to avoid creation
2433 * of new thread in old exec'ed task.
2434 */
2435 void
2436 task_complete_halt(task_t task)
2437 {
2438 task_lock(task);
2439 assert(task->halting);
2440 assert(task == current_task());
2441
2442 /*
2443 * Wait for the other threads to get shut down.
2444 * When the last other thread is reaped, we'll be
2445 * woken up.
2446 */
2447 if (task->thread_count > 1) {
2448 assert_wait((event_t)&task->halting, THREAD_UNINT);
2449 task_unlock(task);
2450 thread_block(THREAD_CONTINUE_NULL);
2451 } else {
2452 task_unlock(task);
2453 }
2454
2455 /*
2456 * Give the machine dependent code a chance
2457 * to perform cleanup of task-level resources
2458 * associated with the current thread before
2459 * ripping apart the task.
2460 */
2461 machine_task_terminate(task);
2462
2463 /*
2464 * Destroy all synchronizers owned by the task.
2465 */
2466 task_synchronizer_destroy_all(task);
2467
2468 /*
2469 * Destroy the contents of the IPC space, leaving just
2470 * a reference for it.
2471 */
2472 ipc_space_clean(task->itk_space);
2473
2474 /*
2475 * Clean out the address space, as we are going to be
2476 * getting a new one.
2477 */
2478 vm_map_remove(task->map, task->map->min_offset,
2479 task->map->max_offset,
2480 /*
2481 * Final cleanup:
2482 * + no unnesting
2483 * + remove immutable mappings
2484 */
2485 (VM_MAP_REMOVE_NO_UNNESTING |
2486 VM_MAP_REMOVE_IMMUTABLE));
2487
2488 /*
2489 * Kick out any IOKitUser handles to the task. At best they're stale,
2490 * at worst someone is racing a SUID exec.
2491 */
2492 iokit_task_terminate(task);
2493 }
2494
2495 /*
2496 * task_hold_locked:
2497 *
2498 * Suspend execution of the specified task.
2499 * This is a recursive-style suspension of the task, a count of
2500 * suspends is maintained.
2501 *
2502 * CONDITIONS: the task is locked and active.
2503 */
2504 void
2505 task_hold_locked(
2506 task_t task)
2507 {
2508 thread_t thread;
2509
2510 assert(task->active);
2511
2512 if (task->suspend_count++ > 0)
2513 return;
2514
2515 /*
2516 * Iterate through all the threads and hold them.
2517 */
2518 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2519 thread_mtx_lock(thread);
2520 thread_hold(thread);
2521 thread_mtx_unlock(thread);
2522 }
2523 }
2524
2525 /*
2526 * task_hold:
2527 *
2528 * Same as the internal routine above, except that is must lock
2529 * and verify that the task is active. This differs from task_suspend
2530 * in that it places a kernel hold on the task rather than just a
2531 * user-level hold. This keeps users from over resuming and setting
2532 * it running out from under the kernel.
2533 *
2534 * CONDITIONS: the caller holds a reference on the task
2535 */
2536 kern_return_t
2537 task_hold(
2538 task_t task)
2539 {
2540 if (task == TASK_NULL)
2541 return (KERN_INVALID_ARGUMENT);
2542
2543 task_lock(task);
2544
2545 if (!task->active) {
2546 task_unlock(task);
2547
2548 return (KERN_FAILURE);
2549 }
2550
2551 task_hold_locked(task);
2552 task_unlock(task);
2553
2554 return (KERN_SUCCESS);
2555 }
2556
2557 kern_return_t
2558 task_wait(
2559 task_t task,
2560 boolean_t until_not_runnable)
2561 {
2562 if (task == TASK_NULL)
2563 return (KERN_INVALID_ARGUMENT);
2564
2565 task_lock(task);
2566
2567 if (!task->active) {
2568 task_unlock(task);
2569
2570 return (KERN_FAILURE);
2571 }
2572
2573 task_wait_locked(task, until_not_runnable);
2574 task_unlock(task);
2575
2576 return (KERN_SUCCESS);
2577 }
2578
2579 /*
2580 * task_wait_locked:
2581 *
2582 * Wait for all threads in task to stop.
2583 *
2584 * Conditions:
2585 * Called with task locked, active, and held.
2586 */
2587 void
2588 task_wait_locked(
2589 task_t task,
2590 boolean_t until_not_runnable)
2591 {
2592 thread_t thread, self;
2593
2594 assert(task->active);
2595 assert(task->suspend_count > 0);
2596
2597 self = current_thread();
2598
2599 /*
2600 * Iterate through all the threads and wait for them to
2601 * stop. Do not wait for the current thread if it is within
2602 * the task.
2603 */
2604 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2605 if (thread != self)
2606 thread_wait(thread, until_not_runnable);
2607 }
2608 }
2609
2610 /*
2611 * task_release_locked:
2612 *
2613 * Release a kernel hold on a task.
2614 *
2615 * CONDITIONS: the task is locked and active
2616 */
2617 void
2618 task_release_locked(
2619 task_t task)
2620 {
2621 thread_t thread;
2622
2623 assert(task->active);
2624 assert(task->suspend_count > 0);
2625
2626 if (--task->suspend_count > 0)
2627 return;
2628
2629 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2630 thread_mtx_lock(thread);
2631 thread_release(thread);
2632 thread_mtx_unlock(thread);
2633 }
2634 }
2635
2636 /*
2637 * task_release:
2638 *
2639 * Same as the internal routine above, except that it must lock
2640 * and verify that the task is active.
2641 *
2642 * CONDITIONS: The caller holds a reference to the task
2643 */
2644 kern_return_t
2645 task_release(
2646 task_t task)
2647 {
2648 if (task == TASK_NULL)
2649 return (KERN_INVALID_ARGUMENT);
2650
2651 task_lock(task);
2652
2653 if (!task->active) {
2654 task_unlock(task);
2655
2656 return (KERN_FAILURE);
2657 }
2658
2659 task_release_locked(task);
2660 task_unlock(task);
2661
2662 return (KERN_SUCCESS);
2663 }
2664
2665 kern_return_t
2666 task_threads(
2667 task_t task,
2668 thread_act_array_t *threads_out,
2669 mach_msg_type_number_t *count)
2670 {
2671 mach_msg_type_number_t actual;
2672 thread_t *thread_list;
2673 thread_t thread;
2674 vm_size_t size, size_needed;
2675 void *addr;
2676 unsigned int i, j;
2677
2678 if (task == TASK_NULL)
2679 return (KERN_INVALID_ARGUMENT);
2680
2681 size = 0; addr = NULL;
2682
2683 for (;;) {
2684 task_lock(task);
2685 if (!task->active) {
2686 task_unlock(task);
2687
2688 if (size != 0)
2689 kfree(addr, size);
2690
2691 return (KERN_FAILURE);
2692 }
2693
2694 actual = task->thread_count;
2695
2696 /* do we have the memory we need? */
2697 size_needed = actual * sizeof (mach_port_t);
2698 if (size_needed <= size)
2699 break;
2700
2701 /* unlock the task and allocate more memory */
2702 task_unlock(task);
2703
2704 if (size != 0)
2705 kfree(addr, size);
2706
2707 assert(size_needed > 0);
2708 size = size_needed;
2709
2710 addr = kalloc(size);
2711 if (addr == 0)
2712 return (KERN_RESOURCE_SHORTAGE);
2713 }
2714
2715 /* OK, have memory and the task is locked & active */
2716 thread_list = (thread_t *)addr;
2717
2718 i = j = 0;
2719
2720 for (thread = (thread_t)queue_first(&task->threads); i < actual;
2721 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
2722 thread_reference_internal(thread);
2723 thread_list[j++] = thread;
2724 }
2725
2726 assert(queue_end(&task->threads, (queue_entry_t)thread));
2727
2728 actual = j;
2729 size_needed = actual * sizeof (mach_port_t);
2730
2731 /* can unlock task now that we've got the thread refs */
2732 task_unlock(task);
2733
2734 if (actual == 0) {
2735 /* no threads, so return null pointer and deallocate memory */
2736
2737 *threads_out = NULL;
2738 *count = 0;
2739
2740 if (size != 0)
2741 kfree(addr, size);
2742 }
2743 else {
2744 /* if we allocated too much, must copy */
2745
2746 if (size_needed < size) {
2747 void *newaddr;
2748
2749 newaddr = kalloc(size_needed);
2750 if (newaddr == 0) {
2751 for (i = 0; i < actual; ++i)
2752 thread_deallocate(thread_list[i]);
2753 kfree(addr, size);
2754 return (KERN_RESOURCE_SHORTAGE);
2755 }
2756
2757 bcopy(addr, newaddr, size_needed);
2758 kfree(addr, size);
2759 thread_list = (thread_t *)newaddr;
2760 }
2761
2762 *threads_out = thread_list;
2763 *count = actual;
2764
2765 /* do the conversion that Mig should handle */
2766
2767 for (i = 0; i < actual; ++i)
2768 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
2769 }
2770
2771 return (KERN_SUCCESS);
2772 }
2773
2774 #define TASK_HOLD_NORMAL 0
2775 #define TASK_HOLD_PIDSUSPEND 1
2776 #define TASK_HOLD_LEGACY 2
2777 #define TASK_HOLD_LEGACY_ALL 3
2778
2779 static kern_return_t
2780 place_task_hold (
2781 task_t task,
2782 int mode)
2783 {
2784 if (!task->active && !task_is_a_corpse(task)) {
2785 return (KERN_FAILURE);
2786 }
2787
2788 /* Return success for corpse task */
2789 if (task_is_a_corpse(task)) {
2790 return KERN_SUCCESS;
2791 }
2792
2793 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2794 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
2795 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2796 task->user_stop_count, task->user_stop_count + 1, 0);
2797
2798 #if MACH_ASSERT
2799 current_task()->suspends_outstanding++;
2800 #endif
2801
2802 if (mode == TASK_HOLD_LEGACY)
2803 task->legacy_stop_count++;
2804
2805 if (task->user_stop_count++ > 0) {
2806 /*
2807 * If the stop count was positive, the task is
2808 * already stopped and we can exit.
2809 */
2810 return (KERN_SUCCESS);
2811 }
2812
2813 /*
2814 * Put a kernel-level hold on the threads in the task (all
2815 * user-level task suspensions added together represent a
2816 * single kernel-level hold). We then wait for the threads
2817 * to stop executing user code.
2818 */
2819 task_hold_locked(task);
2820 task_wait_locked(task, FALSE);
2821
2822 return (KERN_SUCCESS);
2823 }
2824
2825 static kern_return_t
2826 release_task_hold (
2827 task_t task,
2828 int mode)
2829 {
2830 boolean_t release = FALSE;
2831
2832 if (!task->active && !task_is_a_corpse(task)) {
2833 return (KERN_FAILURE);
2834 }
2835
2836 /* Return success for corpse task */
2837 if (task_is_a_corpse(task)) {
2838 return KERN_SUCCESS;
2839 }
2840
2841 if (mode == TASK_HOLD_PIDSUSPEND) {
2842 if (task->pidsuspended == FALSE) {
2843 return (KERN_FAILURE);
2844 }
2845 task->pidsuspended = FALSE;
2846 }
2847
2848 if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
2849
2850 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2851 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
2852 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2853 task->user_stop_count, mode, task->legacy_stop_count);
2854
2855 #if MACH_ASSERT
2856 /*
2857 * This is obviously not robust; if we suspend one task and then resume a different one,
2858 * we'll fly under the radar. This is only meant to catch the common case of a crashed
2859 * or buggy suspender.
2860 */
2861 current_task()->suspends_outstanding--;
2862 #endif
2863
2864 if (mode == TASK_HOLD_LEGACY_ALL) {
2865 if (task->legacy_stop_count >= task->user_stop_count) {
2866 task->user_stop_count = 0;
2867 release = TRUE;
2868 } else {
2869 task->user_stop_count -= task->legacy_stop_count;
2870 }
2871 task->legacy_stop_count = 0;
2872 } else {
2873 if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
2874 task->legacy_stop_count--;
2875 if (--task->user_stop_count == 0)
2876 release = TRUE;
2877 }
2878 }
2879 else {
2880 return (KERN_FAILURE);
2881 }
2882
2883 /*
2884 * Release the task if necessary.
2885 */
2886 if (release)
2887 task_release_locked(task);
2888
2889 return (KERN_SUCCESS);
2890 }
2891
2892
2893 /*
2894 * task_suspend:
2895 *
2896 * Implement an (old-fashioned) user-level suspension on a task.
2897 *
2898 * Because the user isn't expecting to have to manage a suspension
2899 * token, we'll track it for him in the kernel in the form of a naked
2900 * send right to the task's resume port. All such send rights
2901 * account for a single suspension against the task (unlike task_suspend2()
2902 * where each caller gets a unique suspension count represented by a
2903 * unique send-once right).
2904 *
2905 * Conditions:
2906 * The caller holds a reference to the task
2907 */
2908 kern_return_t
2909 task_suspend(
2910 task_t task)
2911 {
2912 kern_return_t kr;
2913 mach_port_t port, send, old_notify;
2914 mach_port_name_t name;
2915
2916 if (task == TASK_NULL || task == kernel_task)
2917 return (KERN_INVALID_ARGUMENT);
2918
2919 task_lock(task);
2920
2921 /*
2922 * Claim a send right on the task resume port, and request a no-senders
2923 * notification on that port (if none outstanding).
2924 */
2925 if (task->itk_resume == IP_NULL) {
2926 task->itk_resume = ipc_port_alloc_kernel();
2927 if (!IP_VALID(task->itk_resume))
2928 panic("failed to create resume port");
2929 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
2930 }
2931
2932 port = task->itk_resume;
2933 ip_lock(port);
2934 assert(ip_active(port));
2935
2936 send = ipc_port_make_send_locked(port);
2937 assert(IP_VALID(send));
2938
2939 if (port->ip_nsrequest == IP_NULL) {
2940 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2941 assert(old_notify == IP_NULL);
2942 /* port unlocked */
2943 } else {
2944 ip_unlock(port);
2945 }
2946
2947 /*
2948 * place a legacy hold on the task.
2949 */
2950 kr = place_task_hold(task, TASK_HOLD_LEGACY);
2951 if (kr != KERN_SUCCESS) {
2952 task_unlock(task);
2953 ipc_port_release_send(send);
2954 return kr;
2955 }
2956
2957 task_unlock(task);
2958
2959 /*
2960 * Copyout the send right into the calling task's IPC space. It won't know it is there,
2961 * but we'll look it up when calling a traditional resume. Any IPC operations that
2962 * deallocate the send right will auto-release the suspension.
2963 */
2964 if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
2965 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
2966 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
2967 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
2968 task_pid(task), kr);
2969 return (kr);
2970 }
2971
2972 return (kr);
2973 }
2974
2975 /*
2976 * task_resume:
2977 * Release a user hold on a task.
2978 *
2979 * Conditions:
2980 * The caller holds a reference to the task
2981 */
2982 kern_return_t
2983 task_resume(
2984 task_t task)
2985 {
2986 kern_return_t kr;
2987 mach_port_name_t resume_port_name;
2988 ipc_entry_t resume_port_entry;
2989 ipc_space_t space = current_task()->itk_space;
2990
2991 if (task == TASK_NULL || task == kernel_task )
2992 return (KERN_INVALID_ARGUMENT);
2993
2994 /* release a legacy task hold */
2995 task_lock(task);
2996 kr = release_task_hold(task, TASK_HOLD_LEGACY);
2997 task_unlock(task);
2998
2999 is_write_lock(space);
3000 if (is_active(space) && IP_VALID(task->itk_resume) &&
3001 ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
3002 /*
3003 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
3004 * we are holding one less legacy hold on the task from this caller. If the release failed,
3005 * go ahead and drop all the rights, as someone either already released our holds or the task
3006 * is gone.
3007 */
3008 if (kr == KERN_SUCCESS)
3009 ipc_right_dealloc(space, resume_port_name, resume_port_entry);
3010 else
3011 ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
3012 /* space unlocked */
3013 } else {
3014 is_write_unlock(space);
3015 if (kr == KERN_SUCCESS)
3016 printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
3017 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3018 task_pid(task));
3019 }
3020
3021 return kr;
3022 }
3023
3024 /*
3025 * Suspend the target task.
3026 * Making/holding a token/reference/port is the callers responsibility.
3027 */
3028 kern_return_t
3029 task_suspend_internal(task_t task)
3030 {
3031 kern_return_t kr;
3032
3033 if (task == TASK_NULL || task == kernel_task)
3034 return (KERN_INVALID_ARGUMENT);
3035
3036 task_lock(task);
3037 kr = place_task_hold(task, TASK_HOLD_NORMAL);
3038 task_unlock(task);
3039 return (kr);
3040 }
3041
3042 /*
3043 * Suspend the target task, and return a suspension token. The token
3044 * represents a reference on the suspended task.
3045 */
3046 kern_return_t
3047 task_suspend2(
3048 task_t task,
3049 task_suspension_token_t *suspend_token)
3050 {
3051 kern_return_t kr;
3052
3053 kr = task_suspend_internal(task);
3054 if (kr != KERN_SUCCESS) {
3055 *suspend_token = TASK_NULL;
3056 return (kr);
3057 }
3058
3059 /*
3060 * Take a reference on the target task and return that to the caller
3061 * as a "suspension token," which can be converted into an SO right to
3062 * the now-suspended task's resume port.
3063 */
3064 task_reference_internal(task);
3065 *suspend_token = task;
3066
3067 return (KERN_SUCCESS);
3068 }
3069
3070 /*
3071 * Resume the task
3072 * (reference/token/port management is caller's responsibility).
3073 */
3074 kern_return_t
3075 task_resume_internal(
3076 task_suspension_token_t task)
3077 {
3078 kern_return_t kr;
3079
3080 if (task == TASK_NULL || task == kernel_task)
3081 return (KERN_INVALID_ARGUMENT);
3082
3083 task_lock(task);
3084 kr = release_task_hold(task, TASK_HOLD_NORMAL);
3085 task_unlock(task);
3086 return (kr);
3087 }
3088
3089 /*
3090 * Resume the task using a suspension token. Consumes the token's ref.
3091 */
3092 kern_return_t
3093 task_resume2(
3094 task_suspension_token_t task)
3095 {
3096 kern_return_t kr;
3097
3098 kr = task_resume_internal(task);
3099 task_suspension_token_deallocate(task);
3100
3101 return (kr);
3102 }
3103
3104 boolean_t
3105 task_suspension_notify(mach_msg_header_t *request_header)
3106 {
3107 ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
3108 task_t task = convert_port_to_task_suspension_token(port);
3109 mach_msg_type_number_t not_count;
3110
3111 if (task == TASK_NULL || task == kernel_task)
3112 return TRUE; /* nothing to do */
3113
3114 switch (request_header->msgh_id) {
3115
3116 case MACH_NOTIFY_SEND_ONCE:
3117 /* release the hold held by this specific send-once right */
3118 task_lock(task);
3119 release_task_hold(task, TASK_HOLD_NORMAL);
3120 task_unlock(task);
3121 break;
3122
3123 case MACH_NOTIFY_NO_SENDERS:
3124 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
3125
3126 task_lock(task);
3127 ip_lock(port);
3128 if (port->ip_mscount == not_count) {
3129
3130 /* release all the [remaining] outstanding legacy holds */
3131 assert(port->ip_nsrequest == IP_NULL);
3132 ip_unlock(port);
3133 release_task_hold(task, TASK_HOLD_LEGACY_ALL);
3134 task_unlock(task);
3135
3136 } else if (port->ip_nsrequest == IP_NULL) {
3137 ipc_port_t old_notify;
3138
3139 task_unlock(task);
3140 /* new send rights, re-arm notification at current make-send count */
3141 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3142 assert(old_notify == IP_NULL);
3143 /* port unlocked */
3144 } else {
3145 ip_unlock(port);
3146 task_unlock(task);
3147 }
3148 break;
3149
3150 default:
3151 break;
3152 }
3153
3154 task_suspension_token_deallocate(task); /* drop token reference */
3155 return TRUE;
3156 }
3157
3158 kern_return_t
3159 task_pidsuspend_locked(task_t task)
3160 {
3161 kern_return_t kr;
3162
3163 if (task->pidsuspended) {
3164 kr = KERN_FAILURE;
3165 goto out;
3166 }
3167
3168 task->pidsuspended = TRUE;
3169
3170 kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
3171 if (kr != KERN_SUCCESS) {
3172 task->pidsuspended = FALSE;
3173 }
3174 out:
3175 return(kr);
3176 }
3177
3178
3179 /*
3180 * task_pidsuspend:
3181 *
3182 * Suspends a task by placing a hold on its threads.
3183 *
3184 * Conditions:
3185 * The caller holds a reference to the task
3186 */
3187 kern_return_t
3188 task_pidsuspend(
3189 task_t task)
3190 {
3191 kern_return_t kr;
3192
3193 if (task == TASK_NULL || task == kernel_task)
3194 return (KERN_INVALID_ARGUMENT);
3195
3196 task_lock(task);
3197
3198 kr = task_pidsuspend_locked(task);
3199
3200 task_unlock(task);
3201
3202 return (kr);
3203 }
3204
3205 /*
3206 * task_pidresume:
3207 * Resumes a previously suspended task.
3208 *
3209 * Conditions:
3210 * The caller holds a reference to the task
3211 */
3212 kern_return_t
3213 task_pidresume(
3214 task_t task)
3215 {
3216 kern_return_t kr;
3217
3218 if (task == TASK_NULL || task == kernel_task)
3219 return (KERN_INVALID_ARGUMENT);
3220
3221 task_lock(task);
3222
3223 #if CONFIG_FREEZE
3224
3225 while (task->changing_freeze_state) {
3226
3227 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3228 task_unlock(task);
3229 thread_block(THREAD_CONTINUE_NULL);
3230
3231 task_lock(task);
3232 }
3233 task->changing_freeze_state = TRUE;
3234 #endif
3235
3236 kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
3237
3238 task_unlock(task);
3239
3240 #if CONFIG_FREEZE
3241
3242 task_lock(task);
3243
3244 if (kr == KERN_SUCCESS)
3245 task->frozen = FALSE;
3246 task->changing_freeze_state = FALSE;
3247 thread_wakeup(&task->changing_freeze_state);
3248
3249 task_unlock(task);
3250 #endif
3251
3252 return (kr);
3253 }
3254
3255
3256 #if DEVELOPMENT || DEBUG
3257
3258 extern void IOSleep(int);
3259
3260 kern_return_t
3261 task_disconnect_page_mappings(task_t task)
3262 {
3263 int n;
3264
3265 if (task == TASK_NULL || task == kernel_task)
3266 return (KERN_INVALID_ARGUMENT);
3267
3268 /*
3269 * this function is used to strip all of the mappings from
3270 * the pmap for the specified task to force the task to
3271 * re-fault all of the pages it is actively using... this
3272 * allows us to approximate the true working set of the
3273 * specified task. We only engage if at least 1 of the
3274 * threads in the task is runnable, but we want to continuously
3275 * sweep (at least for a while - I've arbitrarily set the limit at
3276 * 100 sweeps to be re-looked at as we gain experience) to get a better
3277 * view into what areas within a page are being visited (as opposed to only
3278 * seeing the first fault of a page after the task becomes
3279 * runnable)... in the future I may
3280 * try to block until awakened by a thread in this task
3281 * being made runnable, but for now we'll periodically poll from the
3282 * user level debug tool driving the sysctl
3283 */
3284 for (n = 0; n < 100; n++) {
3285 thread_t thread;
3286 boolean_t runnable;
3287 boolean_t do_unnest;
3288 int page_count;
3289
3290 runnable = FALSE;
3291 do_unnest = FALSE;
3292
3293 task_lock(task);
3294
3295 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3296
3297 if (thread->state & TH_RUN) {
3298 runnable = TRUE;
3299 break;
3300 }
3301 }
3302 if (n == 0)
3303 task->task_disconnected_count++;
3304
3305 if (task->task_unnested == FALSE) {
3306 if (runnable == TRUE) {
3307 task->task_unnested = TRUE;
3308 do_unnest = TRUE;
3309 }
3310 }
3311 task_unlock(task);
3312
3313 if (runnable == FALSE)
3314 break;
3315
3316 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
3317 task, do_unnest, task->task_disconnected_count, 0, 0);
3318
3319 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
3320
3321 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
3322 task, page_count, 0, 0, 0);
3323
3324 if ((n % 5) == 4)
3325 IOSleep(1);
3326 }
3327 return (KERN_SUCCESS);
3328 }
3329
3330 #endif
3331
3332
3333 #if CONFIG_FREEZE
3334
3335 /*
3336 * task_freeze:
3337 *
3338 * Freeze a task.
3339 *
3340 * Conditions:
3341 * The caller holds a reference to the task
3342 */
3343 extern void vm_wake_compactor_swapper(void);
3344 extern queue_head_t c_swapout_list_head;
3345
3346 kern_return_t
3347 task_freeze(
3348 task_t task,
3349 uint32_t *purgeable_count,
3350 uint32_t *wired_count,
3351 uint32_t *clean_count,
3352 uint32_t *dirty_count,
3353 uint32_t dirty_budget,
3354 boolean_t *shared,
3355 boolean_t walk_only)
3356 {
3357 kern_return_t kr = KERN_SUCCESS;
3358
3359 if (task == TASK_NULL || task == kernel_task)
3360 return (KERN_INVALID_ARGUMENT);
3361
3362 task_lock(task);
3363
3364 while (task->changing_freeze_state) {
3365
3366 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3367 task_unlock(task);
3368 thread_block(THREAD_CONTINUE_NULL);
3369
3370 task_lock(task);
3371 }
3372 if (task->frozen) {
3373 task_unlock(task);
3374 return (KERN_FAILURE);
3375 }
3376 task->changing_freeze_state = TRUE;
3377
3378 task_unlock(task);
3379
3380 if (walk_only) {
3381 panic("task_freeze - walk_only == TRUE");
3382 } else {
3383 kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
3384 }
3385
3386 task_lock(task);
3387
3388 if (walk_only == FALSE && kr == KERN_SUCCESS)
3389 task->frozen = TRUE;
3390 task->changing_freeze_state = FALSE;
3391 thread_wakeup(&task->changing_freeze_state);
3392
3393 task_unlock(task);
3394
3395 if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3396 vm_wake_compactor_swapper();
3397 /*
3398 * We do an explicit wakeup of the swapout thread here
3399 * because the compact_and_swap routines don't have
3400 * knowledge about these kind of "per-task packed c_segs"
3401 * and so will not be evaluating whether we need to do
3402 * a wakeup there.
3403 */
3404 thread_wakeup((event_t)&c_swapout_list_head);
3405 }
3406
3407 return (kr);
3408 }
3409
3410 /*
3411 * task_thaw:
3412 *
3413 * Thaw a currently frozen task.
3414 *
3415 * Conditions:
3416 * The caller holds a reference to the task
3417 */
3418 kern_return_t
3419 task_thaw(
3420 task_t task)
3421 {
3422 if (task == TASK_NULL || task == kernel_task)
3423 return (KERN_INVALID_ARGUMENT);
3424
3425 task_lock(task);
3426
3427 while (task->changing_freeze_state) {
3428
3429 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3430 task_unlock(task);
3431 thread_block(THREAD_CONTINUE_NULL);
3432
3433 task_lock(task);
3434 }
3435 if (!task->frozen) {
3436 task_unlock(task);
3437 return (KERN_FAILURE);
3438 }
3439 task->frozen = FALSE;
3440
3441 task_unlock(task);
3442
3443 return (KERN_SUCCESS);
3444 }
3445
3446 #endif /* CONFIG_FREEZE */
3447
3448 kern_return_t
3449 host_security_set_task_token(
3450 host_security_t host_security,
3451 task_t task,
3452 security_token_t sec_token,
3453 audit_token_t audit_token,
3454 host_priv_t host_priv)
3455 {
3456 ipc_port_t host_port;
3457 kern_return_t kr;
3458
3459 if (task == TASK_NULL)
3460 return(KERN_INVALID_ARGUMENT);
3461
3462 if (host_security == HOST_NULL)
3463 return(KERN_INVALID_SECURITY);
3464
3465 task_lock(task);
3466 task->sec_token = sec_token;
3467 task->audit_token = audit_token;
3468
3469 task_unlock(task);
3470
3471 if (host_priv != HOST_PRIV_NULL) {
3472 kr = host_get_host_priv_port(host_priv, &host_port);
3473 } else {
3474 kr = host_get_host_port(host_priv_self(), &host_port);
3475 }
3476 assert(kr == KERN_SUCCESS);
3477 kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
3478 return(kr);
3479 }
3480
3481 kern_return_t
3482 task_send_trace_memory(
3483 task_t target_task,
3484 __unused uint32_t pid,
3485 __unused uint64_t uniqueid)
3486 {
3487 kern_return_t kr = KERN_INVALID_ARGUMENT;
3488 if (target_task == TASK_NULL)
3489 return (KERN_INVALID_ARGUMENT);
3490
3491 #if CONFIG_ATM
3492 kr = atm_send_proc_inspect_notification(target_task,
3493 pid,
3494 uniqueid);
3495
3496 #endif
3497 return (kr);
3498 }
3499 /*
3500 * This routine was added, pretty much exclusively, for registering the
3501 * RPC glue vector for in-kernel short circuited tasks. Rather than
3502 * removing it completely, I have only disabled that feature (which was
3503 * the only feature at the time). It just appears that we are going to
3504 * want to add some user data to tasks in the future (i.e. bsd info,
3505 * task names, etc...), so I left it in the formal task interface.
3506 */
3507 kern_return_t
3508 task_set_info(
3509 task_t task,
3510 task_flavor_t flavor,
3511 __unused task_info_t task_info_in, /* pointer to IN array */
3512 __unused mach_msg_type_number_t task_info_count)
3513 {
3514 if (task == TASK_NULL)
3515 return(KERN_INVALID_ARGUMENT);
3516
3517 switch (flavor) {
3518
3519 #if CONFIG_ATM
3520 case TASK_TRACE_MEMORY_INFO:
3521 {
3522 if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
3523 return (KERN_INVALID_ARGUMENT);
3524
3525 assert(task_info_in != NULL);
3526 task_trace_memory_info_t mem_info;
3527 mem_info = (task_trace_memory_info_t) task_info_in;
3528 kern_return_t kr = atm_register_trace_memory(task,
3529 mem_info->user_memory_address,
3530 mem_info->buffer_size);
3531 return kr;
3532 }
3533
3534 #endif
3535 default:
3536 return (KERN_INVALID_ARGUMENT);
3537 }
3538 return (KERN_SUCCESS);
3539 }
3540
3541 int radar_20146450 = 1;
3542 kern_return_t
3543 task_info(
3544 task_t task,
3545 task_flavor_t flavor,
3546 task_info_t task_info_out,
3547 mach_msg_type_number_t *task_info_count)
3548 {
3549 kern_return_t error = KERN_SUCCESS;
3550 mach_msg_type_number_t original_task_info_count;
3551
3552 if (task == TASK_NULL)
3553 return (KERN_INVALID_ARGUMENT);
3554
3555 original_task_info_count = *task_info_count;
3556 task_lock(task);
3557
3558 if ((task != current_task()) && (!task->active)) {
3559 task_unlock(task);
3560 return (KERN_INVALID_ARGUMENT);
3561 }
3562
3563 switch (flavor) {
3564
3565 case TASK_BASIC_INFO_32:
3566 case TASK_BASIC2_INFO_32:
3567 #if defined(__arm__) || defined(__arm64__)
3568 case TASK_BASIC_INFO_64:
3569 #endif
3570 {
3571 task_basic_info_32_t basic_info;
3572 vm_map_t map;
3573 clock_sec_t secs;
3574 clock_usec_t usecs;
3575
3576 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
3577 error = KERN_INVALID_ARGUMENT;
3578 break;
3579 }
3580
3581 basic_info = (task_basic_info_32_t)task_info_out;
3582
3583 map = (task == kernel_task)? kernel_map: task->map;
3584 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
3585 if (flavor == TASK_BASIC2_INFO_32) {
3586 /*
3587 * The "BASIC2" flavor gets the maximum resident
3588 * size instead of the current resident size...
3589 */
3590 basic_info->resident_size = pmap_resident_max(map->pmap);
3591 } else {
3592 basic_info->resident_size = pmap_resident_count(map->pmap);
3593 }
3594 basic_info->resident_size *= PAGE_SIZE;
3595
3596 basic_info->policy = ((task != kernel_task)?
3597 POLICY_TIMESHARE: POLICY_RR);
3598 basic_info->suspend_count = task->user_stop_count;
3599
3600 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3601 basic_info->user_time.seconds =
3602 (typeof(basic_info->user_time.seconds))secs;
3603 basic_info->user_time.microseconds = usecs;
3604
3605 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3606 basic_info->system_time.seconds =
3607 (typeof(basic_info->system_time.seconds))secs;
3608 basic_info->system_time.microseconds = usecs;
3609
3610 *task_info_count = TASK_BASIC_INFO_32_COUNT;
3611 break;
3612 }
3613
3614 #if defined(__arm__) || defined(__arm64__)
3615 case TASK_BASIC_INFO_64_2:
3616 {
3617 task_basic_info_64_2_t basic_info;
3618 vm_map_t map;
3619 clock_sec_t secs;
3620 clock_usec_t usecs;
3621
3622 if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
3623 error = KERN_INVALID_ARGUMENT;
3624 break;
3625 }
3626
3627 basic_info = (task_basic_info_64_2_t)task_info_out;
3628
3629 map = (task == kernel_task)? kernel_map: task->map;
3630 basic_info->virtual_size = map->size;
3631 basic_info->resident_size =
3632 (mach_vm_size_t)(pmap_resident_count(map->pmap))
3633 * PAGE_SIZE_64;
3634
3635 basic_info->policy = ((task != kernel_task)?
3636 POLICY_TIMESHARE: POLICY_RR);
3637 basic_info->suspend_count = task->user_stop_count;
3638
3639 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3640 basic_info->user_time.seconds =
3641 (typeof(basic_info->user_time.seconds))secs;
3642 basic_info->user_time.microseconds = usecs;
3643
3644 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3645 basic_info->system_time.seconds =
3646 (typeof(basic_info->system_time.seconds))secs;
3647 basic_info->system_time.microseconds = usecs;
3648
3649 *task_info_count = TASK_BASIC_INFO_64_2_COUNT;
3650 break;
3651 }
3652
3653 #else /* defined(__arm__) || defined(__arm64__) */
3654 case TASK_BASIC_INFO_64:
3655 {
3656 task_basic_info_64_t basic_info;
3657 vm_map_t map;
3658 clock_sec_t secs;
3659 clock_usec_t usecs;
3660
3661 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
3662 error = KERN_INVALID_ARGUMENT;
3663 break;
3664 }
3665
3666 basic_info = (task_basic_info_64_t)task_info_out;
3667
3668 map = (task == kernel_task)? kernel_map: task->map;
3669 basic_info->virtual_size = map->size;
3670 basic_info->resident_size =
3671 (mach_vm_size_t)(pmap_resident_count(map->pmap))
3672 * PAGE_SIZE_64;
3673
3674 basic_info->policy = ((task != kernel_task)?
3675 POLICY_TIMESHARE: POLICY_RR);
3676 basic_info->suspend_count = task->user_stop_count;
3677
3678 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3679 basic_info->user_time.seconds =
3680 (typeof(basic_info->user_time.seconds))secs;
3681 basic_info->user_time.microseconds = usecs;
3682
3683 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3684 basic_info->system_time.seconds =
3685 (typeof(basic_info->system_time.seconds))secs;
3686 basic_info->system_time.microseconds = usecs;
3687
3688 *task_info_count = TASK_BASIC_INFO_64_COUNT;
3689 break;
3690 }
3691 #endif /* defined(__arm__) || defined(__arm64__) */
3692
3693 case MACH_TASK_BASIC_INFO:
3694 {
3695 mach_task_basic_info_t basic_info;
3696 vm_map_t map;
3697 clock_sec_t secs;
3698 clock_usec_t usecs;
3699
3700 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
3701 error = KERN_INVALID_ARGUMENT;
3702 break;
3703 }
3704
3705 basic_info = (mach_task_basic_info_t)task_info_out;
3706
3707 map = (task == kernel_task) ? kernel_map : task->map;
3708
3709 basic_info->virtual_size = map->size;
3710
3711 basic_info->resident_size =
3712 (mach_vm_size_t)(pmap_resident_count(map->pmap));
3713 basic_info->resident_size *= PAGE_SIZE_64;
3714
3715 basic_info->resident_size_max =
3716 (mach_vm_size_t)(pmap_resident_max(map->pmap));
3717 basic_info->resident_size_max *= PAGE_SIZE_64;
3718
3719 basic_info->policy = ((task != kernel_task) ?
3720 POLICY_TIMESHARE : POLICY_RR);
3721
3722 basic_info->suspend_count = task->user_stop_count;
3723
3724 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3725 basic_info->user_time.seconds =
3726 (typeof(basic_info->user_time.seconds))secs;
3727 basic_info->user_time.microseconds = usecs;
3728
3729 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3730 basic_info->system_time.seconds =
3731 (typeof(basic_info->system_time.seconds))secs;
3732 basic_info->system_time.microseconds = usecs;
3733
3734 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
3735 break;
3736 }
3737
3738 case TASK_THREAD_TIMES_INFO:
3739 {
3740 task_thread_times_info_t times_info;
3741 thread_t thread;
3742
3743 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
3744 error = KERN_INVALID_ARGUMENT;
3745 break;
3746 }
3747
3748 times_info = (task_thread_times_info_t) task_info_out;
3749 times_info->user_time.seconds = 0;
3750 times_info->user_time.microseconds = 0;
3751 times_info->system_time.seconds = 0;
3752 times_info->system_time.microseconds = 0;
3753
3754
3755 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3756 time_value_t user_time, system_time;
3757
3758 if (thread->options & TH_OPT_IDLE_THREAD)
3759 continue;
3760
3761 thread_read_times(thread, &user_time, &system_time);
3762
3763 time_value_add(&times_info->user_time, &user_time);
3764 time_value_add(&times_info->system_time, &system_time);
3765 }
3766
3767 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
3768 break;
3769 }
3770
3771 case TASK_ABSOLUTETIME_INFO:
3772 {
3773 task_absolutetime_info_t info;
3774 thread_t thread;
3775
3776 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
3777 error = KERN_INVALID_ARGUMENT;
3778 break;
3779 }
3780
3781 info = (task_absolutetime_info_t)task_info_out;
3782 info->threads_user = info->threads_system = 0;
3783
3784
3785 info->total_user = task->total_user_time;
3786 info->total_system = task->total_system_time;
3787
3788 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3789 uint64_t tval;
3790 spl_t x;
3791
3792 if (thread->options & TH_OPT_IDLE_THREAD)
3793 continue;
3794
3795 x = splsched();
3796 thread_lock(thread);
3797
3798 tval = timer_grab(&thread->user_timer);
3799 info->threads_user += tval;
3800 info->total_user += tval;
3801
3802 tval = timer_grab(&thread->system_timer);
3803 if (thread->precise_user_kernel_time) {
3804 info->threads_system += tval;
3805 info->total_system += tval;
3806 } else {
3807 /* system_timer may represent either sys or user */
3808 info->threads_user += tval;
3809 info->total_user += tval;
3810 }
3811
3812 thread_unlock(thread);
3813 splx(x);
3814 }
3815
3816
3817 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
3818 break;
3819 }
3820
3821 case TASK_DYLD_INFO:
3822 {
3823 task_dyld_info_t info;
3824
3825 /*
3826 * We added the format field to TASK_DYLD_INFO output. For
3827 * temporary backward compatibility, accept the fact that
3828 * clients may ask for the old version - distinquished by the
3829 * size of the expected result structure.
3830 */
3831 #define TASK_LEGACY_DYLD_INFO_COUNT \
3832 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
3833
3834 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
3835 error = KERN_INVALID_ARGUMENT;
3836 break;
3837 }
3838
3839 info = (task_dyld_info_t)task_info_out;
3840 info->all_image_info_addr = task->all_image_info_addr;
3841 info->all_image_info_size = task->all_image_info_size;
3842
3843 /* only set format on output for those expecting it */
3844 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
3845 info->all_image_info_format = task_has_64BitAddr(task) ?
3846 TASK_DYLD_ALL_IMAGE_INFO_64 :
3847 TASK_DYLD_ALL_IMAGE_INFO_32 ;
3848 *task_info_count = TASK_DYLD_INFO_COUNT;
3849 } else {
3850 *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
3851 }
3852 break;
3853 }
3854
3855 case TASK_EXTMOD_INFO:
3856 {
3857 task_extmod_info_t info;
3858 void *p;
3859
3860 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
3861 error = KERN_INVALID_ARGUMENT;
3862 break;
3863 }
3864
3865 info = (task_extmod_info_t)task_info_out;
3866
3867 p = get_bsdtask_info(task);
3868 if (p) {
3869 proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
3870 } else {
3871 bzero(info->task_uuid, sizeof(info->task_uuid));
3872 }
3873 info->extmod_statistics = task->extmod_statistics;
3874 *task_info_count = TASK_EXTMOD_INFO_COUNT;
3875
3876 break;
3877 }
3878
3879 case TASK_KERNELMEMORY_INFO:
3880 {
3881 task_kernelmemory_info_t tkm_info;
3882 ledger_amount_t credit, debit;
3883
3884 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
3885 error = KERN_INVALID_ARGUMENT;
3886 break;
3887 }
3888
3889 tkm_info = (task_kernelmemory_info_t) task_info_out;
3890 tkm_info->total_palloc = 0;
3891 tkm_info->total_pfree = 0;
3892 tkm_info->total_salloc = 0;
3893 tkm_info->total_sfree = 0;
3894
3895 if (task == kernel_task) {
3896 /*
3897 * All shared allocs/frees from other tasks count against
3898 * the kernel private memory usage. If we are looking up
3899 * info for the kernel task, gather from everywhere.
3900 */
3901 task_unlock(task);
3902
3903 /* start by accounting for all the terminated tasks against the kernel */
3904 tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
3905 tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
3906
3907 /* count all other task/thread shared alloc/free against the kernel */
3908 lck_mtx_lock(&tasks_threads_lock);
3909
3910 /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
3911 queue_iterate(&tasks, task, task_t, tasks) {
3912 if (task == kernel_task) {
3913 if (ledger_get_entries(task->ledger,
3914 task_ledgers.tkm_private, &credit,
3915 &debit) == KERN_SUCCESS) {
3916 tkm_info->total_palloc += credit;
3917 tkm_info->total_pfree += debit;
3918 }
3919 }
3920 if (!ledger_get_entries(task->ledger,
3921 task_ledgers.tkm_shared, &credit, &debit)) {
3922 tkm_info->total_palloc += credit;
3923 tkm_info->total_pfree += debit;
3924 }
3925 }
3926 lck_mtx_unlock(&tasks_threads_lock);
3927 } else {
3928 if (!ledger_get_entries(task->ledger,
3929 task_ledgers.tkm_private, &credit, &debit)) {
3930 tkm_info->total_palloc = credit;
3931 tkm_info->total_pfree = debit;
3932 }
3933 if (!ledger_get_entries(task->ledger,
3934 task_ledgers.tkm_shared, &credit, &debit)) {
3935 tkm_info->total_salloc = credit;
3936 tkm_info->total_sfree = debit;
3937 }
3938 task_unlock(task);
3939 }
3940
3941 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
3942 return KERN_SUCCESS;
3943 }
3944
3945 /* OBSOLETE */
3946 case TASK_SCHED_FIFO_INFO:
3947 {
3948
3949 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
3950 error = KERN_INVALID_ARGUMENT;
3951 break;
3952 }
3953
3954 error = KERN_INVALID_POLICY;
3955 break;
3956 }
3957
3958 /* OBSOLETE */
3959 case TASK_SCHED_RR_INFO:
3960 {
3961 policy_rr_base_t rr_base;
3962 uint32_t quantum_time;
3963 uint64_t quantum_ns;
3964
3965 if (*task_info_count < POLICY_RR_BASE_COUNT) {
3966 error = KERN_INVALID_ARGUMENT;
3967 break;
3968 }
3969
3970 rr_base = (policy_rr_base_t) task_info_out;
3971
3972 if (task != kernel_task) {
3973 error = KERN_INVALID_POLICY;
3974 break;
3975 }
3976
3977 rr_base->base_priority = task->priority;
3978
3979 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
3980 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
3981
3982 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
3983
3984 *task_info_count = POLICY_RR_BASE_COUNT;
3985 break;
3986 }
3987
3988 /* OBSOLETE */
3989 case TASK_SCHED_TIMESHARE_INFO:
3990 {
3991 policy_timeshare_base_t ts_base;
3992
3993 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
3994 error = KERN_INVALID_ARGUMENT;
3995 break;
3996 }
3997
3998 ts_base = (policy_timeshare_base_t) task_info_out;
3999
4000 if (task == kernel_task) {
4001 error = KERN_INVALID_POLICY;
4002 break;
4003 }
4004
4005 ts_base->base_priority = task->priority;
4006
4007 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
4008 break;
4009 }
4010
4011 case TASK_SECURITY_TOKEN:
4012 {
4013 security_token_t *sec_token_p;
4014
4015 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
4016 error = KERN_INVALID_ARGUMENT;
4017 break;
4018 }
4019
4020 sec_token_p = (security_token_t *) task_info_out;
4021
4022 *sec_token_p = task->sec_token;
4023
4024 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
4025 break;
4026 }
4027
4028 case TASK_AUDIT_TOKEN:
4029 {
4030 audit_token_t *audit_token_p;
4031
4032 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
4033 error = KERN_INVALID_ARGUMENT;
4034 break;
4035 }
4036
4037 audit_token_p = (audit_token_t *) task_info_out;
4038
4039 *audit_token_p = task->audit_token;
4040
4041 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
4042 break;
4043 }
4044
4045 case TASK_SCHED_INFO:
4046 error = KERN_INVALID_ARGUMENT;
4047 break;
4048
4049 case TASK_EVENTS_INFO:
4050 {
4051 task_events_info_t events_info;
4052 thread_t thread;
4053
4054 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
4055 error = KERN_INVALID_ARGUMENT;
4056 break;
4057 }
4058
4059 events_info = (task_events_info_t) task_info_out;
4060
4061
4062 events_info->faults = task->faults;
4063 events_info->pageins = task->pageins;
4064 events_info->cow_faults = task->cow_faults;
4065 events_info->messages_sent = task->messages_sent;
4066 events_info->messages_received = task->messages_received;
4067 events_info->syscalls_mach = task->syscalls_mach;
4068 events_info->syscalls_unix = task->syscalls_unix;
4069
4070 events_info->csw = task->c_switch;
4071
4072 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4073 events_info->csw += thread->c_switch;
4074 events_info->syscalls_mach += thread->syscalls_mach;
4075 events_info->syscalls_unix += thread->syscalls_unix;
4076 }
4077
4078
4079 *task_info_count = TASK_EVENTS_INFO_COUNT;
4080 break;
4081 }
4082 case TASK_AFFINITY_TAG_INFO:
4083 {
4084 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
4085 error = KERN_INVALID_ARGUMENT;
4086 break;
4087 }
4088
4089 error = task_affinity_info(task, task_info_out, task_info_count);
4090 break;
4091 }
4092 case TASK_POWER_INFO:
4093 {
4094 if (*task_info_count < TASK_POWER_INFO_COUNT) {
4095 error = KERN_INVALID_ARGUMENT;
4096 break;
4097 }
4098
4099 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL);
4100 break;
4101 }
4102
4103 case TASK_POWER_INFO_V2:
4104 {
4105 if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
4106 error = KERN_INVALID_ARGUMENT;
4107 break;
4108 }
4109 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
4110 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2);
4111 break;
4112 }
4113
4114 case TASK_VM_INFO:
4115 case TASK_VM_INFO_PURGEABLE:
4116 {
4117 task_vm_info_t vm_info;
4118 vm_map_t map;
4119
4120 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
4121 error = KERN_INVALID_ARGUMENT;
4122 break;
4123 }
4124
4125 vm_info = (task_vm_info_t)task_info_out;
4126
4127 if (task == kernel_task) {
4128 map = kernel_map;
4129 /* no lock */
4130 } else {
4131 map = task->map;
4132 vm_map_lock_read(map);
4133 }
4134
4135 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
4136 vm_info->region_count = map->hdr.nentries;
4137 vm_info->page_size = vm_map_page_size(map);
4138
4139 vm_info->resident_size = pmap_resident_count(map->pmap);
4140 vm_info->resident_size *= PAGE_SIZE;
4141 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
4142 vm_info->resident_size_peak *= PAGE_SIZE;
4143
4144 #define _VM_INFO(_name) \
4145 vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
4146
4147 _VM_INFO(device);
4148 _VM_INFO(device_peak);
4149 _VM_INFO(external);
4150 _VM_INFO(external_peak);
4151 _VM_INFO(internal);
4152 _VM_INFO(internal_peak);
4153 _VM_INFO(reusable);
4154 _VM_INFO(reusable_peak);
4155 _VM_INFO(compressed);
4156 _VM_INFO(compressed_peak);
4157 _VM_INFO(compressed_lifetime);
4158
4159 vm_info->purgeable_volatile_pmap = 0;
4160 vm_info->purgeable_volatile_resident = 0;
4161 vm_info->purgeable_volatile_virtual = 0;
4162 if (task == kernel_task) {
4163 /*
4164 * We do not maintain the detailed stats for the
4165 * kernel_pmap, so just count everything as
4166 * "internal"...
4167 */
4168 vm_info->internal = vm_info->resident_size;
4169 /*
4170 * ... but since the memory held by the VM compressor
4171 * in the kernel address space ought to be attributed
4172 * to user-space tasks, we subtract it from "internal"
4173 * to give memory reporting tools a more accurate idea
4174 * of what the kernel itself is actually using, instead
4175 * of making it look like the kernel is leaking memory
4176 * when the system is under memory pressure.
4177 */
4178 vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
4179 PAGE_SIZE);
4180 } else {
4181 mach_vm_size_t volatile_virtual_size;
4182 mach_vm_size_t volatile_resident_size;
4183 mach_vm_size_t volatile_compressed_size;
4184 mach_vm_size_t volatile_pmap_size;
4185 mach_vm_size_t volatile_compressed_pmap_size;
4186 kern_return_t kr;
4187
4188 if (flavor == TASK_VM_INFO_PURGEABLE) {
4189 kr = vm_map_query_volatile(
4190 map,
4191 &volatile_virtual_size,
4192 &volatile_resident_size,
4193 &volatile_compressed_size,
4194 &volatile_pmap_size,
4195 &volatile_compressed_pmap_size);
4196 if (kr == KERN_SUCCESS) {
4197 vm_info->purgeable_volatile_pmap =
4198 volatile_pmap_size;
4199 if (radar_20146450) {
4200 vm_info->compressed -=
4201 volatile_compressed_pmap_size;
4202 }
4203 vm_info->purgeable_volatile_resident =
4204 volatile_resident_size;
4205 vm_info->purgeable_volatile_virtual =
4206 volatile_virtual_size;
4207 }
4208 }
4209 }
4210 *task_info_count = TASK_VM_INFO_REV0_COUNT;
4211
4212 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
4213 vm_info->phys_footprint =
4214 (mach_vm_size_t) get_task_phys_footprint(task);
4215 *task_info_count = TASK_VM_INFO_REV1_COUNT;
4216 }
4217 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
4218 vm_info->min_address = map->min_offset;
4219 vm_info->max_address = map->max_offset;
4220 *task_info_count = TASK_VM_INFO_REV2_COUNT;
4221 }
4222
4223 if (task != kernel_task) {
4224 vm_map_unlock_read(map);
4225 }
4226
4227 break;
4228 }
4229
4230 case TASK_WAIT_STATE_INFO:
4231 {
4232 /*
4233 * Deprecated flavor. Currently allowing some results until all users
4234 * stop calling it. The results may not be accurate.
4235 */
4236 task_wait_state_info_t wait_state_info;
4237 uint64_t total_sfi_ledger_val = 0;
4238
4239 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
4240 error = KERN_INVALID_ARGUMENT;
4241 break;
4242 }
4243
4244 wait_state_info = (task_wait_state_info_t) task_info_out;
4245
4246 wait_state_info->total_wait_state_time = 0;
4247 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
4248
4249 #if CONFIG_SCHED_SFI
4250 int i, prev_lentry = -1;
4251 int64_t val_credit, val_debit;
4252
4253 for (i = 0; i < MAX_SFI_CLASS_ID; i++){
4254 val_credit =0;
4255 /*
4256 * checking with prev_lentry != entry ensures adjacent classes
4257 * which share the same ledger do not add wait times twice.
4258 * Note: Use ledger() call to get data for each individual sfi class.
4259 */
4260 if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
4261 KERN_SUCCESS == ledger_get_entries(task->ledger,
4262 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
4263 total_sfi_ledger_val += val_credit;
4264 }
4265 prev_lentry = task_ledgers.sfi_wait_times[i];
4266 }
4267
4268 #endif /* CONFIG_SCHED_SFI */
4269 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
4270 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
4271
4272 break;
4273 }
4274 case TASK_VM_INFO_PURGEABLE_ACCOUNT:
4275 {
4276 #if DEVELOPMENT || DEBUG
4277 pvm_account_info_t acnt_info;
4278
4279 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
4280 error = KERN_INVALID_ARGUMENT;
4281 break;
4282 }
4283
4284 if (task_info_out == NULL) {
4285 error = KERN_INVALID_ARGUMENT;
4286 break;
4287 }
4288
4289 acnt_info = (pvm_account_info_t) task_info_out;
4290
4291 error = vm_purgeable_account(task, acnt_info);
4292
4293 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
4294
4295 break;
4296 #else /* DEVELOPMENT || DEBUG */
4297 error = KERN_NOT_SUPPORTED;
4298 break;
4299 #endif /* DEVELOPMENT || DEBUG */
4300 }
4301 case TASK_FLAGS_INFO:
4302 {
4303 task_flags_info_t flags_info;
4304
4305 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
4306 error = KERN_INVALID_ARGUMENT;
4307 break;
4308 }
4309
4310 flags_info = (task_flags_info_t)task_info_out;
4311
4312 /* only publish the 64-bit flag of the task */
4313 flags_info->flags = task->t_flags & TF_64B_ADDR;
4314
4315 *task_info_count = TASK_FLAGS_INFO_COUNT;
4316 break;
4317 }
4318
4319 case TASK_DEBUG_INFO_INTERNAL:
4320 {
4321 #if DEVELOPMENT || DEBUG
4322 task_debug_info_internal_t dbg_info;
4323 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
4324 error = KERN_NOT_SUPPORTED;
4325 break;
4326 }
4327
4328 if (task_info_out == NULL) {
4329 error = KERN_INVALID_ARGUMENT;
4330 break;
4331 }
4332 dbg_info = (task_debug_info_internal_t) task_info_out;
4333 dbg_info->ipc_space_size = 0;
4334 if (task->itk_space){
4335 dbg_info->ipc_space_size = task->itk_space->is_table_size;
4336 }
4337
4338 error = KERN_SUCCESS;
4339 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
4340 break;
4341 #else /* DEVELOPMENT || DEBUG */
4342 error = KERN_NOT_SUPPORTED;
4343 break;
4344 #endif /* DEVELOPMENT || DEBUG */
4345 }
4346 default:
4347 error = KERN_INVALID_ARGUMENT;
4348 }
4349
4350 task_unlock(task);
4351 return (error);
4352 }
4353
4354 /*
4355 * task_info_from_user
4356 *
4357 * When calling task_info from user space,
4358 * this function will be executed as mig server side
4359 * instead of calling directly into task_info.
4360 * This gives the possibility to perform more security
4361 * checks on task_port.
4362 *
4363 * In the case of TASK_DYLD_INFO, we require the more
4364 * privileged task_port not the less-privileged task_name_port.
4365 *
4366 */
4367 kern_return_t
4368 task_info_from_user(
4369 mach_port_t task_port,
4370 task_flavor_t flavor,
4371 task_info_t task_info_out,
4372 mach_msg_type_number_t *task_info_count)
4373 {
4374 task_t task;
4375 kern_return_t ret;
4376
4377 if (flavor == TASK_DYLD_INFO)
4378 task = convert_port_to_task(task_port);
4379 else
4380 task = convert_port_to_task_name(task_port);
4381
4382 ret = task_info(task, flavor, task_info_out, task_info_count);
4383
4384 task_deallocate(task);
4385
4386 return ret;
4387 }
4388
4389 /*
4390 * task_power_info
4391 *
4392 * Returns power stats for the task.
4393 * Note: Called with task locked.
4394 */
4395 void
4396 task_power_info_locked(
4397 task_t task,
4398 task_power_info_t info,
4399 gpu_energy_data_t ginfo,
4400 task_power_info_v2_t infov2)
4401 {
4402 thread_t thread;
4403 ledger_amount_t tmp;
4404
4405 task_lock_assert_owned(task);
4406
4407 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
4408 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
4409 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
4410 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
4411
4412 info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
4413 info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
4414
4415 info->total_user = task->total_user_time;
4416 info->total_system = task->total_system_time;
4417
4418 #if CONFIG_EMBEDDED
4419 if (infov2) {
4420 infov2->task_energy = task->task_energy;
4421 }
4422 #endif
4423
4424 if (ginfo) {
4425 ginfo->task_gpu_utilisation = task->task_gpu_ns;
4426 }
4427
4428 if (infov2) {
4429 infov2->task_ptime = task->total_ptime;
4430 infov2->task_pset_switches = task->ps_switch;
4431 }
4432
4433 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4434 uint64_t tval;
4435 spl_t x;
4436
4437 if (thread->options & TH_OPT_IDLE_THREAD)
4438 continue;
4439
4440 x = splsched();
4441 thread_lock(thread);
4442
4443 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
4444 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
4445
4446 #if CONFIG_EMBEDDED
4447 if (infov2) {
4448 infov2->task_energy += ml_energy_stat(thread);
4449 }
4450 #endif
4451
4452 tval = timer_grab(&thread->user_timer);
4453 info->total_user += tval;
4454
4455 if (infov2) {
4456 tval = timer_grab(&thread->ptime);
4457 infov2->task_ptime += tval;
4458 infov2->task_pset_switches += thread->ps_switch;
4459 }
4460
4461 tval = timer_grab(&thread->system_timer);
4462 if (thread->precise_user_kernel_time) {
4463 info->total_system += tval;
4464 } else {
4465 /* system_timer may represent either sys or user */
4466 info->total_user += tval;
4467 }
4468
4469 if (ginfo) {
4470 ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
4471 }
4472 thread_unlock(thread);
4473 splx(x);
4474 }
4475 }
4476
4477 /*
4478 * task_gpu_utilisation
4479 *
4480 * Returns the total gpu time used by the all the threads of the task
4481 * (both dead and alive)
4482 */
4483 uint64_t
4484 task_gpu_utilisation(
4485 task_t task)
4486 {
4487 uint64_t gpu_time = 0;
4488 #if !CONFIG_EMBEDDED
4489 thread_t thread;
4490
4491 task_lock(task);
4492 gpu_time += task->task_gpu_ns;
4493
4494 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4495 spl_t x;
4496 x = splsched();
4497 thread_lock(thread);
4498 gpu_time += ml_gpu_stat(thread);
4499 thread_unlock(thread);
4500 splx(x);
4501 }
4502
4503 task_unlock(task);
4504 #else /* CONFIG_EMBEDDED */
4505 /* silence compiler warning */
4506 (void)task;
4507 #endif /* !CONFIG_EMBEDDED */
4508 return gpu_time;
4509 }
4510
4511 /*
4512 * task_energy
4513 *
4514 * Returns the total energy used by the all the threads of the task
4515 * (both dead and alive)
4516 */
4517 uint64_t
4518 task_energy(
4519 task_t task)
4520 {
4521 uint64_t energy = 0;
4522 thread_t thread;
4523
4524 task_lock(task);
4525 energy += task->task_energy;
4526
4527 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4528 spl_t x;
4529 x = splsched();
4530 thread_lock(thread);
4531 energy += ml_energy_stat(thread);
4532 thread_unlock(thread);
4533 splx(x);
4534 }
4535
4536 task_unlock(task);
4537 return energy;
4538 }
4539
4540
4541 uint64_t
4542 task_cpu_ptime(
4543 __unused task_t task)
4544 {
4545 return 0;
4546 }
4547
4548
4549 kern_return_t
4550 task_purgable_info(
4551 task_t task,
4552 task_purgable_info_t *stats)
4553 {
4554 if (task == TASK_NULL || stats == NULL)
4555 return KERN_INVALID_ARGUMENT;
4556 /* Take task reference */
4557 task_reference(task);
4558 vm_purgeable_stats((vm_purgeable_info_t)stats, task);
4559 /* Drop task reference */
4560 task_deallocate(task);
4561 return KERN_SUCCESS;
4562 }
4563
4564 void
4565 task_vtimer_set(
4566 task_t task,
4567 integer_t which)
4568 {
4569 thread_t thread;
4570 spl_t x;
4571
4572 task_lock(task);
4573
4574 task->vtimers |= which;
4575
4576 switch (which) {
4577
4578 case TASK_VTIMER_USER:
4579 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4580 x = splsched();
4581 thread_lock(thread);
4582 if (thread->precise_user_kernel_time)
4583 thread->vtimer_user_save = timer_grab(&thread->user_timer);
4584 else
4585 thread->vtimer_user_save = timer_grab(&thread->system_timer);
4586 thread_unlock(thread);
4587 splx(x);
4588 }
4589 break;
4590
4591 case TASK_VTIMER_PROF:
4592 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4593 x = splsched();
4594 thread_lock(thread);
4595 thread->vtimer_prof_save = timer_grab(&thread->user_timer);
4596 thread->vtimer_prof_save += timer_grab(&thread->system_timer);
4597 thread_unlock(thread);
4598 splx(x);
4599 }
4600 break;
4601
4602 case TASK_VTIMER_RLIM:
4603 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4604 x = splsched();
4605 thread_lock(thread);
4606 thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
4607 thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
4608 thread_unlock(thread);
4609 splx(x);
4610 }
4611 break;
4612 }
4613
4614 task_unlock(task);
4615 }
4616
4617 void
4618 task_vtimer_clear(
4619 task_t task,
4620 integer_t which)
4621 {
4622 assert(task == current_task());
4623
4624 task_lock(task);
4625
4626 task->vtimers &= ~which;
4627
4628 task_unlock(task);
4629 }
4630
4631 void
4632 task_vtimer_update(
4633 __unused
4634 task_t task,
4635 integer_t which,
4636 uint32_t *microsecs)
4637 {
4638 thread_t thread = current_thread();
4639 uint32_t tdelt = 0;
4640 clock_sec_t secs = 0;
4641 uint64_t tsum;
4642
4643 assert(task == current_task());
4644
4645 spl_t s = splsched();
4646 thread_lock(thread);
4647
4648 if ((task->vtimers & which) != (uint32_t)which) {
4649 thread_unlock(thread);
4650 splx(s);
4651 return;
4652 }
4653
4654 switch (which) {
4655
4656 case TASK_VTIMER_USER:
4657 if (thread->precise_user_kernel_time) {
4658 tdelt = (uint32_t)timer_delta(&thread->user_timer,
4659 &thread->vtimer_user_save);
4660 } else {
4661 tdelt = (uint32_t)timer_delta(&thread->system_timer,
4662 &thread->vtimer_user_save);
4663 }
4664 absolutetime_to_microtime(tdelt, &secs, microsecs);
4665 break;
4666
4667 case TASK_VTIMER_PROF:
4668 tsum = timer_grab(&thread->user_timer);
4669 tsum += timer_grab(&thread->system_timer);
4670 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
4671 absolutetime_to_microtime(tdelt, &secs, microsecs);
4672 /* if the time delta is smaller than a usec, ignore */
4673 if (*microsecs != 0)
4674 thread->vtimer_prof_save = tsum;
4675 break;
4676
4677 case TASK_VTIMER_RLIM:
4678 tsum = timer_grab(&thread->user_timer);
4679 tsum += timer_grab(&thread->system_timer);
4680 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
4681 thread->vtimer_rlim_save = tsum;
4682 absolutetime_to_microtime(tdelt, &secs, microsecs);
4683 break;
4684 }
4685
4686 thread_unlock(thread);
4687 splx(s);
4688 }
4689
4690 /*
4691 * task_assign:
4692 *
4693 * Change the assigned processor set for the task
4694 */
4695 kern_return_t
4696 task_assign(
4697 __unused task_t task,
4698 __unused processor_set_t new_pset,
4699 __unused boolean_t assign_threads)
4700 {
4701 return(KERN_FAILURE);
4702 }
4703
4704 /*
4705 * task_assign_default:
4706 *
4707 * Version of task_assign to assign to default processor set.
4708 */
4709 kern_return_t
4710 task_assign_default(
4711 task_t task,
4712 boolean_t assign_threads)
4713 {
4714 return (task_assign(task, &pset0, assign_threads));
4715 }
4716
4717 /*
4718 * task_get_assignment
4719 *
4720 * Return name of processor set that task is assigned to.
4721 */
4722 kern_return_t
4723 task_get_assignment(
4724 task_t task,
4725 processor_set_t *pset)
4726 {
4727 if (!task || !task->active)
4728 return KERN_FAILURE;
4729
4730 *pset = &pset0;
4731
4732 return KERN_SUCCESS;
4733 }
4734
4735 uint64_t
4736 get_task_dispatchqueue_offset(
4737 task_t task)
4738 {
4739 return task->dispatchqueue_offset;
4740 }
4741
4742 /*
4743 * task_policy
4744 *
4745 * Set scheduling policy and parameters, both base and limit, for
4746 * the given task. Policy must be a policy which is enabled for the
4747 * processor set. Change contained threads if requested.
4748 */
4749 kern_return_t
4750 task_policy(
4751 __unused task_t task,
4752 __unused policy_t policy_id,
4753 __unused policy_base_t base,
4754 __unused mach_msg_type_number_t count,
4755 __unused boolean_t set_limit,
4756 __unused boolean_t change)
4757 {
4758 return(KERN_FAILURE);
4759 }
4760
4761 /*
4762 * task_set_policy
4763 *
4764 * Set scheduling policy and parameters, both base and limit, for
4765 * the given task. Policy can be any policy implemented by the
4766 * processor set, whether enabled or not. Change contained threads
4767 * if requested.
4768 */
4769 kern_return_t
4770 task_set_policy(
4771 __unused task_t task,
4772 __unused processor_set_t pset,
4773 __unused policy_t policy_id,
4774 __unused policy_base_t base,
4775 __unused mach_msg_type_number_t base_count,
4776 __unused policy_limit_t limit,
4777 __unused mach_msg_type_number_t limit_count,
4778 __unused boolean_t change)
4779 {
4780 return(KERN_FAILURE);
4781 }
4782
4783 kern_return_t
4784 task_set_ras_pc(
4785 __unused task_t task,
4786 __unused vm_offset_t pc,
4787 __unused vm_offset_t endpc)
4788 {
4789 return KERN_FAILURE;
4790 }
4791
4792 void
4793 task_synchronizer_destroy_all(task_t task)
4794 {
4795 /*
4796 * Destroy owned semaphores
4797 */
4798 semaphore_destroy_all(task);
4799 }
4800
4801 /*
4802 * Install default (machine-dependent) initial thread state
4803 * on the task. Subsequent thread creation will have this initial
4804 * state set on the thread by machine_thread_inherit_taskwide().
4805 * Flavors and structures are exactly the same as those to thread_set_state()
4806 */
4807 kern_return_t
4808 task_set_state(
4809 task_t task,
4810 int flavor,
4811 thread_state_t state,
4812 mach_msg_type_number_t state_count)
4813 {
4814 kern_return_t ret;
4815
4816 if (task == TASK_NULL) {
4817 return (KERN_INVALID_ARGUMENT);
4818 }
4819
4820 task_lock(task);
4821
4822 if (!task->active) {
4823 task_unlock(task);
4824 return (KERN_FAILURE);
4825 }
4826
4827 ret = machine_task_set_state(task, flavor, state, state_count);
4828
4829 task_unlock(task);
4830 return ret;
4831 }
4832
4833 /*
4834 * Examine the default (machine-dependent) initial thread state
4835 * on the task, as set by task_set_state(). Flavors and structures
4836 * are exactly the same as those passed to thread_get_state().
4837 */
4838 kern_return_t
4839 task_get_state(
4840 task_t task,
4841 int flavor,
4842 thread_state_t state,
4843 mach_msg_type_number_t *state_count)
4844 {
4845 kern_return_t ret;
4846
4847 if (task == TASK_NULL) {
4848 return (KERN_INVALID_ARGUMENT);
4849 }
4850
4851 task_lock(task);
4852
4853 if (!task->active) {
4854 task_unlock(task);
4855 return (KERN_FAILURE);
4856 }
4857
4858 ret = machine_task_get_state(task, flavor, state, state_count);
4859
4860 task_unlock(task);
4861 return ret;
4862 }
4863
4864
4865 static kern_return_t __attribute__((noinline,not_tail_called))
4866 PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(
4867 mach_exception_code_t code,
4868 mach_exception_subcode_t subcode,
4869 void *reason)
4870 {
4871 #ifdef MACH_BSD
4872 if (1 == proc_selfpid())
4873 return KERN_NOT_SUPPORTED; // initproc is immune
4874 #endif
4875 mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
4876 [0] = code,
4877 [1] = subcode,
4878 };
4879 task_t task = current_task();
4880 kern_return_t kr;
4881
4882 /* (See jetsam-related comments below) */
4883
4884 proc_memstat_terminated(task->bsd_info, TRUE);
4885 kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason);
4886 proc_memstat_terminated(task->bsd_info, FALSE);
4887 return kr;
4888 }
4889
4890 extern kern_return_t
4891 task_violated_guard(mach_exception_code_t, mach_exception_subcode_t, void *);
4892
4893 kern_return_t
4894 task_violated_guard(
4895 mach_exception_code_t code,
4896 mach_exception_subcode_t subcode,
4897 void *reason)
4898 {
4899 return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code, subcode, reason);
4900 }
4901
4902
4903 #if CONFIG_MEMORYSTATUS
4904
4905 boolean_t
4906 task_get_memlimit_is_active(task_t task)
4907 {
4908 assert (task != NULL);
4909
4910 if (task->memlimit_is_active == 1) {
4911 return(TRUE);
4912 } else {
4913 return (FALSE);
4914 }
4915 }
4916
4917 void
4918 task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
4919 {
4920 assert (task != NULL);
4921
4922 if (memlimit_is_active) {
4923 task->memlimit_is_active = 1;
4924 } else {
4925 task->memlimit_is_active = 0;
4926 }
4927 }
4928
4929 boolean_t
4930 task_get_memlimit_is_fatal(task_t task)
4931 {
4932 assert(task != NULL);
4933
4934 if (task->memlimit_is_fatal == 1) {
4935 return(TRUE);
4936 } else {
4937 return(FALSE);
4938 }
4939 }
4940
4941 void
4942 task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
4943 {
4944 assert (task != NULL);
4945
4946 if (memlimit_is_fatal) {
4947 task->memlimit_is_fatal = 1;
4948 } else {
4949 task->memlimit_is_fatal = 0;
4950 }
4951 }
4952
4953 boolean_t
4954 task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
4955 {
4956 boolean_t triggered = FALSE;
4957
4958 assert(task == current_task());
4959
4960 /*
4961 * Returns true, if task has already triggered an exc_resource exception.
4962 */
4963
4964 if (memlimit_is_active) {
4965 triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
4966 } else {
4967 triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
4968 }
4969
4970 return(triggered);
4971 }
4972
4973 void
4974 task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
4975 {
4976 assert(task == current_task());
4977
4978 /*
4979 * We allow one exc_resource per process per active/inactive limit.
4980 * The limit's fatal attribute does not come into play.
4981 */
4982
4983 if (memlimit_is_active) {
4984 task->memlimit_active_exc_resource = 1;
4985 } else {
4986 task->memlimit_inactive_exc_resource = 1;
4987 }
4988 }
4989
4990 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
4991
4992 void __attribute__((noinline))
4993 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
4994 {
4995 task_t task = current_task();
4996 int pid = 0;
4997 const char *procname = "unknown";
4998 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
4999
5000 #ifdef MACH_BSD
5001 pid = proc_selfpid();
5002
5003 if (pid == 1) {
5004 /*
5005 * Cannot have ReportCrash analyzing
5006 * a suspended initproc.
5007 */
5008 return;
5009 }
5010
5011 if (task->bsd_info != NULL)
5012 procname = proc_name_address(current_task()->bsd_info);
5013 #endif
5014 #if CONFIG_COREDUMP
5015 if (hwm_user_cores) {
5016 int error;
5017 uint64_t starttime, end;
5018 clock_sec_t secs = 0;
5019 uint32_t microsecs = 0;
5020
5021 starttime = mach_absolute_time();
5022 /*
5023 * Trigger a coredump of this process. Don't proceed unless we know we won't
5024 * be filling up the disk; and ignore the core size resource limit for this
5025 * core file.
5026 */
5027 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
5028 printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
5029 }
5030 /*
5031 * coredump() leaves the task suspended.
5032 */
5033 task_resume_internal(current_task());
5034
5035 end = mach_absolute_time();
5036 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
5037 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
5038 proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
5039 }
5040 #endif /* CONFIG_COREDUMP */
5041
5042 if (disable_exc_resource) {
5043 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
5044 "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
5045 return;
5046 }
5047
5048 /*
5049 * A task that has triggered an EXC_RESOURCE, should not be
5050 * jetsammed when the device is under memory pressure. Here
5051 * we set the P_MEMSTAT_TERMINATED flag so that the process
5052 * will be skipped if the memorystatus_thread wakes up.
5053 */
5054 proc_memstat_terminated(current_task()->bsd_info, TRUE);
5055
5056 code[0] = code[1] = 0;
5057 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
5058 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
5059 EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
5060
5061 /* Do not generate a corpse fork if the violation is a fatal one */
5062 if (is_fatal || exc_via_corpse_forking == 0) {
5063 /* Do not send a EXC_RESOURCE is corpse_for_fatal_memkill is set */
5064 if (corpse_for_fatal_memkill == 0) {
5065 /*
5066 * Use the _internal_ variant so that no user-space
5067 * process can resume our task from under us.
5068 */
5069 task_suspend_internal(task);
5070 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5071 task_resume_internal(task);
5072 }
5073 } else {
5074 if (audio_active) {
5075 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
5076 "supressed due to audio playback.\n", procname, pid, max_footprint_mb);
5077 } else {
5078 task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
5079 code, EXCEPTION_CODE_MAX, NULL);
5080 }
5081 }
5082
5083 /*
5084 * After the EXC_RESOURCE has been handled, we must clear the
5085 * P_MEMSTAT_TERMINATED flag so that the process can again be
5086 * considered for jetsam if the memorystatus_thread wakes up.
5087 */
5088 proc_memstat_terminated(current_task()->bsd_info, FALSE); /* clear the flag */
5089 }
5090
5091 /*
5092 * Callback invoked when a task exceeds its physical footprint limit.
5093 */
5094 void
5095 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
5096 {
5097 ledger_amount_t max_footprint, max_footprint_mb;
5098 task_t task;
5099 boolean_t is_warning;
5100 boolean_t memlimit_is_active;
5101 boolean_t memlimit_is_fatal;
5102
5103 if (warning == LEDGER_WARNING_DIPPED_BELOW) {
5104 /*
5105 * Task memory limits only provide a warning on the way up.
5106 */
5107 return;
5108 } else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
5109 /*
5110 * This task is in danger of violating a memory limit,
5111 * It has exceeded a percentage level of the limit.
5112 */
5113 is_warning = TRUE;
5114 } else {
5115 /*
5116 * The task has exceeded the physical footprint limit.
5117 * This is not a warning but a true limit violation.
5118 */
5119 is_warning = FALSE;
5120 }
5121
5122 task = current_task();
5123
5124 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
5125 max_footprint_mb = max_footprint >> 20;
5126
5127 memlimit_is_active = task_get_memlimit_is_active(task);
5128 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
5129
5130 /*
5131 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
5132 * We only generate the exception once per process per memlimit (active/inactive limit).
5133 * To enforce this, we monitor state based on the memlimit's active/inactive attribute
5134 * and we disable it by marking that memlimit as exception triggered.
5135 */
5136 if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
5137 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
5138 memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
5139 task_mark_has_triggered_exc_resource(task, memlimit_is_active);
5140 }
5141
5142 memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
5143 }
5144
5145 extern int proc_check_footprint_priv(void);
5146
5147 kern_return_t
5148 task_set_phys_footprint_limit(
5149 task_t task,
5150 int new_limit_mb,
5151 int *old_limit_mb)
5152 {
5153 kern_return_t error;
5154
5155 boolean_t memlimit_is_active;
5156 boolean_t memlimit_is_fatal;
5157
5158 if ((error = proc_check_footprint_priv())) {
5159 return (KERN_NO_ACCESS);
5160 }
5161
5162 /*
5163 * This call should probably be obsoleted.
5164 * But for now, we default to current state.
5165 */
5166 memlimit_is_active = task_get_memlimit_is_active(task);
5167 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
5168
5169 return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
5170 }
5171
5172 kern_return_t
5173 task_convert_phys_footprint_limit(
5174 int limit_mb,
5175 int *converted_limit_mb)
5176 {
5177 if (limit_mb == -1) {
5178 /*
5179 * No limit
5180 */
5181 if (max_task_footprint != 0) {
5182 *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024); /* bytes to MB */
5183 } else {
5184 *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
5185 }
5186 } else {
5187 /* nothing to convert */
5188 *converted_limit_mb = limit_mb;
5189 }
5190 return (KERN_SUCCESS);
5191 }
5192
5193
5194 kern_return_t
5195 task_set_phys_footprint_limit_internal(
5196 task_t task,
5197 int new_limit_mb,
5198 int *old_limit_mb,
5199 boolean_t memlimit_is_active,
5200 boolean_t memlimit_is_fatal)
5201 {
5202 ledger_amount_t old;
5203
5204 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
5205
5206 /*
5207 * Check that limit >> 20 will not give an "unexpected" 32-bit
5208 * result. There are, however, implicit assumptions that -1 mb limit
5209 * equates to LEDGER_LIMIT_INFINITY.
5210 */
5211 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
5212
5213 if (old_limit_mb) {
5214 *old_limit_mb = (int)(old >> 20);
5215 }
5216
5217 if (new_limit_mb == -1) {
5218 /*
5219 * Caller wishes to remove the limit.
5220 */
5221 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
5222 max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
5223 max_task_footprint ? max_task_footprint_warning_level : 0);
5224
5225 task_lock(task);
5226 task_set_memlimit_is_active(task, memlimit_is_active);
5227 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
5228 task_unlock(task);
5229
5230 return (KERN_SUCCESS);
5231 }
5232
5233 #ifdef CONFIG_NOMONITORS
5234 return (KERN_SUCCESS);
5235 #endif /* CONFIG_NOMONITORS */
5236
5237 task_lock(task);
5238
5239 if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
5240 (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
5241 (((ledger_amount_t)new_limit_mb << 20) == old)) {
5242 /*
5243 * memlimit state is not changing
5244 */
5245 task_unlock(task);
5246 return(KERN_SUCCESS);
5247 }
5248
5249 task_set_memlimit_is_active(task, memlimit_is_active);
5250 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
5251
5252 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
5253 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
5254
5255 if (task == current_task()) {
5256 ledger_check_new_balance(current_thread(), task->ledger,
5257 task_ledgers.phys_footprint);
5258 }
5259
5260 task_unlock(task);
5261
5262 return (KERN_SUCCESS);
5263 }
5264
5265 kern_return_t
5266 task_get_phys_footprint_limit(
5267 task_t task,
5268 int *limit_mb)
5269 {
5270 ledger_amount_t limit;
5271
5272 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
5273 /*
5274 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
5275 * result. There are, however, implicit assumptions that -1 mb limit
5276 * equates to LEDGER_LIMIT_INFINITY.
5277 */
5278 assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
5279 *limit_mb = (int)(limit >> 20);
5280
5281 return (KERN_SUCCESS);
5282 }
5283 #else /* CONFIG_MEMORYSTATUS */
5284 kern_return_t
5285 task_set_phys_footprint_limit(
5286 __unused task_t task,
5287 __unused int new_limit_mb,
5288 __unused int *old_limit_mb)
5289 {
5290 return (KERN_FAILURE);
5291 }
5292
5293 kern_return_t
5294 task_get_phys_footprint_limit(
5295 __unused task_t task,
5296 __unused int *limit_mb)
5297 {
5298 return (KERN_FAILURE);
5299 }
5300 #endif /* CONFIG_MEMORYSTATUS */
5301
5302 /*
5303 * We need to export some functions to other components that
5304 * are currently implemented in macros within the osfmk
5305 * component. Just export them as functions of the same name.
5306 */
5307 boolean_t is_kerneltask(task_t t)
5308 {
5309 if (t == kernel_task)
5310 return (TRUE);
5311
5312 return (FALSE);
5313 }
5314
5315 boolean_t is_corpsetask(task_t t)
5316 {
5317 return (task_is_a_corpse(t));
5318 }
5319
5320 #undef current_task
5321 task_t current_task(void);
5322 task_t current_task(void)
5323 {
5324 return (current_task_fast());
5325 }
5326
5327 #undef task_reference
5328 void task_reference(task_t task);
5329 void
5330 task_reference(
5331 task_t task)
5332 {
5333 if (task != TASK_NULL)
5334 task_reference_internal(task);
5335 }
5336
5337 /* defined in bsd/kern/kern_prot.c */
5338 extern int get_audit_token_pid(audit_token_t *audit_token);
5339
5340 int task_pid(task_t task)
5341 {
5342 if (task)
5343 return get_audit_token_pid(&task->audit_token);
5344 return -1;
5345 }
5346
5347
5348 /*
5349 * This routine finds a thread in a task by its unique id
5350 * Returns a referenced thread or THREAD_NULL if the thread was not found
5351 *
5352 * TODO: This is super inefficient - it's an O(threads in task) list walk!
5353 * We should make a tid hash, or transition all tid clients to thread ports
5354 *
5355 * Precondition: No locks held (will take task lock)
5356 */
5357 thread_t
5358 task_findtid(task_t task, uint64_t tid)
5359 {
5360 thread_t self = current_thread();
5361 thread_t found_thread = THREAD_NULL;
5362 thread_t iter_thread = THREAD_NULL;
5363
5364 /* Short-circuit the lookup if we're looking up ourselves */
5365 if (tid == self->thread_id || tid == TID_NULL) {
5366 assert(self->task == task);
5367
5368 thread_reference(self);
5369
5370 return self;
5371 }
5372
5373 task_lock(task);
5374
5375 queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
5376 if (iter_thread->thread_id == tid) {
5377 found_thread = iter_thread;
5378 thread_reference(found_thread);
5379 break;
5380 }
5381 }
5382
5383 task_unlock(task);
5384
5385 return (found_thread);
5386 }
5387
5388 int pid_from_task(task_t task)
5389 {
5390 int pid = -1;
5391
5392 if (task->bsd_info) {
5393 pid = proc_pid(task->bsd_info);
5394 } else {
5395 pid = task_pid(task);
5396 }
5397
5398 return pid;
5399 }
5400
5401 /*
5402 * Control the CPU usage monitor for a task.
5403 */
5404 kern_return_t
5405 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
5406 {
5407 int error = KERN_SUCCESS;
5408
5409 if (*flags & CPUMON_MAKE_FATAL) {
5410 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
5411 } else {
5412 error = KERN_INVALID_ARGUMENT;
5413 }
5414
5415 return error;
5416 }
5417
5418 /*
5419 * Control the wakeups monitor for a task.
5420 */
5421 kern_return_t
5422 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
5423 {
5424 ledger_t ledger = task->ledger;
5425
5426 task_lock(task);
5427 if (*flags & WAKEMON_GET_PARAMS) {
5428 ledger_amount_t limit;
5429 uint64_t period;
5430
5431 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
5432 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
5433
5434 if (limit != LEDGER_LIMIT_INFINITY) {
5435 /*
5436 * An active limit means the wakeups monitor is enabled.
5437 */
5438 *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
5439 *flags = WAKEMON_ENABLE;
5440 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
5441 *flags |= WAKEMON_MAKE_FATAL;
5442 }
5443 } else {
5444 *flags = WAKEMON_DISABLE;
5445 *rate_hz = -1;
5446 }
5447
5448 /*
5449 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
5450 */
5451 task_unlock(task);
5452 return KERN_SUCCESS;
5453 }
5454
5455 if (*flags & WAKEMON_ENABLE) {
5456 if (*flags & WAKEMON_SET_DEFAULTS) {
5457 *rate_hz = task_wakeups_monitor_rate;
5458 }
5459
5460 #ifndef CONFIG_NOMONITORS
5461 if (*flags & WAKEMON_MAKE_FATAL) {
5462 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5463 }
5464 #endif /* CONFIG_NOMONITORS */
5465
5466 if (*rate_hz <= 0) {
5467 task_unlock(task);
5468 return KERN_INVALID_ARGUMENT;
5469 }
5470
5471 #ifndef CONFIG_NOMONITORS
5472 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
5473 task_wakeups_monitor_ustackshots_trigger_pct);
5474 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
5475 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
5476 #endif /* CONFIG_NOMONITORS */
5477 } else if (*flags & WAKEMON_DISABLE) {
5478 /*
5479 * Caller wishes to disable wakeups monitor on the task.
5480 *
5481 * Disable telemetry if it was triggered by the wakeups monitor, and
5482 * remove the limit & callback on the wakeups ledger entry.
5483 */
5484 #if CONFIG_TELEMETRY
5485 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
5486 #endif
5487 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
5488 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
5489 }
5490
5491 task_unlock(task);
5492 return KERN_SUCCESS;
5493 }
5494
5495 void
5496 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
5497 {
5498 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
5499 #if CONFIG_TELEMETRY
5500 /*
5501 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
5502 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
5503 */
5504 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
5505 #endif
5506 return;
5507 }
5508
5509 #if CONFIG_TELEMETRY
5510 /*
5511 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
5512 * exceeded the limit, turn telemetry off for the task.
5513 */
5514 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
5515 #endif
5516
5517 if (warning == 0) {
5518 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
5519 }
5520 }
5521
5522 void __attribute__((noinline))
5523 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
5524 {
5525 task_t task = current_task();
5526 int pid = 0;
5527 const char *procname = "unknown";
5528 boolean_t fatal;
5529 kern_return_t kr;
5530 #ifdef EXC_RESOURCE_MONITORS
5531 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
5532 #endif /* EXC_RESOURCE_MONITORS */
5533 struct ledger_entry_info lei;
5534
5535 #ifdef MACH_BSD
5536 pid = proc_selfpid();
5537 if (task->bsd_info != NULL)
5538 procname = proc_name_address(current_task()->bsd_info);
5539 #endif
5540
5541 ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
5542
5543 /*
5544 * Disable the exception notification so we don't overwhelm
5545 * the listener with an endless stream of redundant exceptions.
5546 * TODO: detect whether another thread is already reporting the violation.
5547 */
5548 uint32_t flags = WAKEMON_DISABLE;
5549 task_wakeups_monitor_ctl(task, &flags, NULL);
5550
5551 fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5552 trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
5553 os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
5554 "over ~%llu seconds, averaging %llu wakes / second and "
5555 "violating a %slimit of %llu wakes over %llu seconds.\n",
5556 procname, pid,
5557 lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
5558 lei.lei_last_refill == 0 ? 0 :
5559 (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
5560 fatal ? "FATAL " : "",
5561 lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
5562
5563 kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
5564 fatal ? kRNFatalLimitFlag : 0);
5565 if (kr) {
5566 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
5567 }
5568
5569 #ifdef EXC_RESOURCE_MONITORS
5570 if (disable_exc_resource) {
5571 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5572 "supressed by a boot-arg\n", procname, pid);
5573 return;
5574 }
5575 if (audio_active) {
5576 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5577 "supressed due to audio playback\n", procname, pid);
5578 return;
5579 }
5580 if (lei.lei_last_refill == 0) {
5581 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5582 "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
5583 }
5584
5585 code[0] = code[1] = 0;
5586 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
5587 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
5588 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
5589 NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
5590 EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
5591 lei.lei_last_refill);
5592 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
5593 NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
5594 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5595 #endif /* EXC_RESOURCE_MONITORS */
5596
5597 if (fatal) {
5598 task_terminate_internal(task);
5599 }
5600 }
5601
5602 static boolean_t
5603 global_update_logical_writes(int64_t io_delta)
5604 {
5605 int64_t old_count, new_count;
5606 boolean_t needs_telemetry;
5607
5608 do {
5609 new_count = old_count = global_logical_writes_count;
5610 new_count += io_delta;
5611 if (new_count >= io_telemetry_limit) {
5612 new_count = 0;
5613 needs_telemetry = TRUE;
5614 } else {
5615 needs_telemetry = FALSE;
5616 }
5617 } while(!OSCompareAndSwap64(old_count, new_count, &global_logical_writes_count));
5618 return needs_telemetry;
5619 }
5620
5621 void task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
5622 {
5623 int64_t io_delta = 0;
5624 boolean_t needs_telemetry = FALSE;
5625
5626 if ((!task) || (!io_size) || (!vp))
5627 return;
5628
5629 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
5630 task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
5631 DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
5632 switch(flags) {
5633 case TASK_WRITE_IMMEDIATE:
5634 OSAddAtomic64(io_size, (SInt64 *)&(task->task_immediate_writes));
5635 ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5636 break;
5637 case TASK_WRITE_DEFERRED:
5638 OSAddAtomic64(io_size, (SInt64 *)&(task->task_deferred_writes));
5639 ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5640 break;
5641 case TASK_WRITE_INVALIDATED:
5642 OSAddAtomic64(io_size, (SInt64 *)&(task->task_invalidated_writes));
5643 ledger_debit(task->ledger, task_ledgers.logical_writes, io_size);
5644 break;
5645 case TASK_WRITE_METADATA:
5646 OSAddAtomic64(io_size, (SInt64 *)&(task->task_metadata_writes));
5647 ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5648 break;
5649 }
5650
5651 io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
5652 if (io_telemetry_limit != 0) {
5653 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
5654 needs_telemetry = global_update_logical_writes(io_delta);
5655 if (needs_telemetry) {
5656 act_set_io_telemetry_ast(current_thread());
5657 }
5658 }
5659 }
5660
5661 /*
5662 * Control the I/O monitor for a task.
5663 */
5664 kern_return_t
5665 task_io_monitor_ctl(task_t task, uint32_t *flags)
5666 {
5667 ledger_t ledger = task->ledger;
5668
5669 task_lock(task);
5670 if (*flags & IOMON_ENABLE) {
5671 /* Configure the physical I/O ledger */
5672 ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
5673 ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5674
5675 /* Configure the logical I/O ledger */
5676 ledger_set_limit(ledger, task_ledgers.logical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
5677 ledger_set_period(ledger, task_ledgers.logical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5678
5679 } else if (*flags & IOMON_DISABLE) {
5680 /*
5681 * Caller wishes to disable I/O monitor on the task.
5682 */
5683 ledger_disable_refill(ledger, task_ledgers.physical_writes);
5684 ledger_disable_callback(ledger, task_ledgers.physical_writes);
5685 ledger_disable_refill(ledger, task_ledgers.logical_writes);
5686 ledger_disable_callback(ledger, task_ledgers.logical_writes);
5687 }
5688
5689 task_unlock(task);
5690 return KERN_SUCCESS;
5691 }
5692
5693 void
5694 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
5695 {
5696 if (warning == 0) {
5697 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
5698 }
5699 }
5700
5701 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
5702 {
5703 int pid = 0;
5704 task_t task = current_task();
5705 #ifdef EXC_RESOURCE_MONITORS
5706 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
5707 #endif /* EXC_RESOURCE_MONITORS */
5708 struct ledger_entry_info lei;
5709 kern_return_t kr;
5710
5711 #ifdef MACH_BSD
5712 pid = proc_selfpid();
5713 #endif
5714 /*
5715 * Get the ledger entry info. We need to do this before disabling the exception
5716 * to get correct values for all fields.
5717 */
5718 switch(flavor) {
5719 case FLAVOR_IO_PHYSICAL_WRITES:
5720 ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
5721 break;
5722 case FLAVOR_IO_LOGICAL_WRITES:
5723 ledger_get_entry_info(task->ledger, task_ledgers.logical_writes, &lei);
5724 break;
5725 }
5726
5727
5728 /*
5729 * Disable the exception notification so we don't overwhelm
5730 * the listener with an endless stream of redundant exceptions.
5731 * TODO: detect whether another thread is already reporting the violation.
5732 */
5733 uint32_t flags = IOMON_DISABLE;
5734 task_io_monitor_ctl(task, &flags);
5735
5736 if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
5737 trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
5738 }
5739 os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
5740 pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
5741
5742 kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
5743 if (kr) {
5744 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
5745 }
5746
5747 #ifdef EXC_RESOURCE_MONITORS
5748 code[0] = code[1] = 0;
5749 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
5750 EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
5751 EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
5752 EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
5753 EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
5754 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5755 #endif /* EXC_RESOURCE_MONITORS */
5756 }
5757
5758 /* Placeholders for the task set/get voucher interfaces */
5759 kern_return_t
5760 task_get_mach_voucher(
5761 task_t task,
5762 mach_voucher_selector_t __unused which,
5763 ipc_voucher_t *voucher)
5764 {
5765 if (TASK_NULL == task)
5766 return KERN_INVALID_TASK;
5767
5768 *voucher = NULL;
5769 return KERN_SUCCESS;
5770 }
5771
5772 kern_return_t
5773 task_set_mach_voucher(
5774 task_t task,
5775 ipc_voucher_t __unused voucher)
5776 {
5777 if (TASK_NULL == task)
5778 return KERN_INVALID_TASK;
5779
5780 return KERN_SUCCESS;
5781 }
5782
5783 kern_return_t
5784 task_swap_mach_voucher(
5785 task_t task,
5786 ipc_voucher_t new_voucher,
5787 ipc_voucher_t *in_out_old_voucher)
5788 {
5789 if (TASK_NULL == task)
5790 return KERN_INVALID_TASK;
5791
5792 *in_out_old_voucher = new_voucher;
5793 return KERN_SUCCESS;
5794 }
5795
5796 void task_set_gpu_denied(task_t task, boolean_t denied)
5797 {
5798 task_lock(task);
5799
5800 if (denied) {
5801 task->t_flags |= TF_GPU_DENIED;
5802 } else {
5803 task->t_flags &= ~TF_GPU_DENIED;
5804 }
5805
5806 task_unlock(task);
5807 }
5808
5809 boolean_t task_is_gpu_denied(task_t task)
5810 {
5811 /* We don't need the lock to read this flag */
5812 return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
5813 }
5814
5815
5816 uint64_t get_task_memory_region_count(task_t task)
5817 {
5818 vm_map_t map;
5819 map = (task == kernel_task) ? kernel_map: task->map;
5820 return((uint64_t)get_map_nentries(map));
5821 }
5822
5823 static void
5824 kdebug_trace_dyld_internal(uint32_t base_code,
5825 struct dyld_kernel_image_info *info)
5826 {
5827 static_assert(sizeof(info->uuid) >= 16);
5828
5829 #if defined(__LP64__)
5830 uint64_t *uuid = (uint64_t *)&(info->uuid);
5831
5832 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5833 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
5834 uuid[1], info->load_addr,
5835 (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
5836 0);
5837 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5838 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
5839 (uint64_t)info->fsobjid.fid_objno |
5840 ((uint64_t)info->fsobjid.fid_generation << 32),
5841 0, 0, 0, 0);
5842 #else /* defined(__LP64__) */
5843 uint32_t *uuid = (uint32_t *)&(info->uuid);
5844
5845 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5846 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
5847 uuid[1], uuid[2], uuid[3], 0);
5848 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5849 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
5850 (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
5851 info->fsobjid.fid_objno, 0);
5852 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5853 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
5854 info->fsobjid.fid_generation, 0, 0, 0, 0);
5855 #endif /* !defined(__LP64__) */
5856 }
5857
5858 static kern_return_t
5859 kdebug_trace_dyld(task_t task, uint32_t base_code,
5860 vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
5861 {
5862 kern_return_t kr;
5863 dyld_kernel_image_info_array_t infos;
5864 vm_map_offset_t map_data;
5865 vm_offset_t data;
5866
5867 if (!infos_copy) {
5868 return KERN_INVALID_ADDRESS;
5869 }
5870
5871 if (!kdebug_enable ||
5872 !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0)))
5873 {
5874 vm_map_copy_discard(infos_copy);
5875 return KERN_SUCCESS;
5876 }
5877
5878 if (task == NULL || task != current_task()) {
5879 return KERN_INVALID_TASK;
5880 }
5881
5882 kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
5883 if (kr != KERN_SUCCESS) {
5884 return kr;
5885 }
5886
5887 infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
5888
5889 for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
5890 kdebug_trace_dyld_internal(base_code, &(infos[i]));
5891 }
5892
5893 data = CAST_DOWN(vm_offset_t, map_data);
5894 mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
5895 return KERN_SUCCESS;
5896 }
5897
5898 kern_return_t
5899 task_register_dyld_image_infos(task_t task,
5900 dyld_kernel_image_info_array_t infos_copy,
5901 mach_msg_type_number_t infos_len)
5902 {
5903 return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
5904 (vm_map_copy_t)infos_copy, infos_len);
5905 }
5906
5907 kern_return_t
5908 task_unregister_dyld_image_infos(task_t task,
5909 dyld_kernel_image_info_array_t infos_copy,
5910 mach_msg_type_number_t infos_len)
5911 {
5912 return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
5913 (vm_map_copy_t)infos_copy, infos_len);
5914 }
5915
5916 kern_return_t
5917 task_get_dyld_image_infos(__unused task_t task,
5918 __unused dyld_kernel_image_info_array_t * dyld_images,
5919 __unused mach_msg_type_number_t * dyld_imagesCnt)
5920 {
5921 return KERN_NOT_SUPPORTED;
5922 }
5923
5924 kern_return_t
5925 task_register_dyld_shared_cache_image_info(task_t task,
5926 dyld_kernel_image_info_t cache_img,
5927 __unused boolean_t no_cache,
5928 __unused boolean_t private_cache)
5929 {
5930 if (task == NULL || task != current_task()) {
5931 return KERN_INVALID_TASK;
5932 }
5933
5934 kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
5935 return KERN_SUCCESS;
5936 }
5937
5938 kern_return_t
5939 task_register_dyld_set_dyld_state(__unused task_t task,
5940 __unused uint8_t dyld_state)
5941 {
5942 return KERN_NOT_SUPPORTED;
5943 }
5944
5945 kern_return_t
5946 task_register_dyld_get_process_state(__unused task_t task,
5947 __unused dyld_kernel_process_info_t * dyld_process_state)
5948 {
5949 return KERN_NOT_SUPPORTED;
5950 }
5951
5952 kern_return_t
5953 task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
5954 task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
5955 {
5956 #if MONOTONIC
5957 task_t task = (task_t)task_insp;
5958 kern_return_t kr = KERN_SUCCESS;
5959 mach_msg_type_number_t size;
5960
5961 if (task == TASK_NULL) {
5962 return KERN_INVALID_ARGUMENT;
5963 }
5964
5965 size = *size_in_out;
5966
5967 switch (flavor) {
5968 case TASK_INSPECT_BASIC_COUNTS: {
5969 struct task_inspect_basic_counts *bc;
5970 uint64_t task_counts[MT_CORE_NFIXED];
5971
5972 if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
5973 kr = KERN_INVALID_ARGUMENT;
5974 break;
5975 }
5976
5977 mt_fixed_task_counts(task, task_counts);
5978 bc = (struct task_inspect_basic_counts *)info_out;
5979 #ifdef MT_CORE_INSTRS
5980 bc->instructions = task_counts[MT_CORE_INSTRS];
5981 #else /* defined(MT_CORE_INSTRS) */
5982 bc->instructions = 0;
5983 #endif /* !defined(MT_CORE_INSTRS) */
5984 bc->cycles = task_counts[MT_CORE_CYCLES];
5985 size = TASK_INSPECT_BASIC_COUNTS_COUNT;
5986 break;
5987 }
5988 default:
5989 kr = KERN_INVALID_ARGUMENT;
5990 break;
5991 }
5992
5993 if (kr == KERN_SUCCESS) {
5994 *size_in_out = size;
5995 }
5996 return kr;
5997 #else /* MONOTONIC */
5998 #pragma unused(task_insp, flavor, info_out, size_in_out)
5999 return KERN_NOT_SUPPORTED;
6000 #endif /* !MONOTONIC */
6001 }
6002
6003 #if CONFIG_SECLUDED_MEMORY
6004 int num_tasks_can_use_secluded_mem = 0;
6005
6006 void
6007 task_set_can_use_secluded_mem(
6008 task_t task,
6009 boolean_t can_use_secluded_mem)
6010 {
6011 if (!task->task_could_use_secluded_mem) {
6012 return;
6013 }
6014 task_lock(task);
6015 task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
6016 task_unlock(task);
6017 }
6018
6019 void
6020 task_set_can_use_secluded_mem_locked(
6021 task_t task,
6022 boolean_t can_use_secluded_mem)
6023 {
6024 assert(task->task_could_use_secluded_mem);
6025 if (can_use_secluded_mem &&
6026 secluded_for_apps && /* global boot-arg */
6027 !task->task_can_use_secluded_mem) {
6028 assert(num_tasks_can_use_secluded_mem >= 0);
6029 OSAddAtomic(+1,
6030 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
6031 task->task_can_use_secluded_mem = TRUE;
6032 } else if (!can_use_secluded_mem &&
6033 task->task_can_use_secluded_mem) {
6034 assert(num_tasks_can_use_secluded_mem > 0);
6035 OSAddAtomic(-1,
6036 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
6037 task->task_can_use_secluded_mem = FALSE;
6038 }
6039 }
6040
6041 void
6042 task_set_could_use_secluded_mem(
6043 task_t task,
6044 boolean_t could_use_secluded_mem)
6045 {
6046 task->task_could_use_secluded_mem = could_use_secluded_mem;
6047 }
6048
6049 void
6050 task_set_could_also_use_secluded_mem(
6051 task_t task,
6052 boolean_t could_also_use_secluded_mem)
6053 {
6054 task->task_could_also_use_secluded_mem = could_also_use_secluded_mem;
6055 }
6056
6057 boolean_t
6058 task_can_use_secluded_mem(
6059 task_t task)
6060 {
6061 if (task->task_can_use_secluded_mem) {
6062 assert(task->task_could_use_secluded_mem);
6063 assert(num_tasks_can_use_secluded_mem > 0);
6064 return TRUE;
6065 }
6066 if (task->task_could_also_use_secluded_mem &&
6067 num_tasks_can_use_secluded_mem > 0) {
6068 assert(num_tasks_can_use_secluded_mem > 0);
6069 return TRUE;
6070 }
6071 return FALSE;
6072 }
6073
6074 boolean_t
6075 task_could_use_secluded_mem(
6076 task_t task)
6077 {
6078 return task->task_could_use_secluded_mem;
6079 }
6080 #endif /* CONFIG_SECLUDED_MEMORY */
6081
6082 queue_head_t *
6083 task_io_user_clients(task_t task)
6084 {
6085 return (&task->io_user_clients);
6086 }
6087
6088 void
6089 task_copy_fields_for_exec(task_t dst_task, task_t src_task)
6090 {
6091 dst_task->vtimers = src_task->vtimers;
6092 }