]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/task.c
xnu-4570.71.2.tar.gz
[apple/xnu.git] / osfmk / kern / task.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 * File: kern/task.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59 * David Black
60 *
61 * Task management primitives implementation.
62 */
63 /*
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL). All rights reserved.
66 *
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
72 *
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76 *
77 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
78 * improvements that they make and grant CSL redistribution rights.
79 *
80 */
81 /*
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections. This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
85 * Version 2.0.
86 * Copyright (c) 2005 SPARTA, Inc.
87 */
88
89 #include <mach/mach_types.h>
90 #include <mach/boolean.h>
91 #include <mach/host_priv.h>
92 #include <mach/machine/vm_types.h>
93 #include <mach/vm_param.h>
94 #include <mach/mach_vm.h>
95 #include <mach/semaphore.h>
96 #include <mach/task_info.h>
97 #include <mach/task_inspect.h>
98 #include <mach/task_special_ports.h>
99 #include <mach/sdt.h>
100
101 #include <ipc/ipc_importance.h>
102 #include <ipc/ipc_types.h>
103 #include <ipc/ipc_space.h>
104 #include <ipc/ipc_entry.h>
105 #include <ipc/ipc_hash.h>
106
107 #include <kern/kern_types.h>
108 #include <kern/mach_param.h>
109 #include <kern/misc_protos.h>
110 #include <kern/task.h>
111 #include <kern/thread.h>
112 #include <kern/coalition.h>
113 #include <kern/zalloc.h>
114 #include <kern/kalloc.h>
115 #include <kern/kern_cdata.h>
116 #include <kern/processor.h>
117 #include <kern/sched_prim.h> /* for thread_wakeup */
118 #include <kern/ipc_tt.h>
119 #include <kern/host.h>
120 #include <kern/clock.h>
121 #include <kern/timer.h>
122 #include <kern/assert.h>
123 #include <kern/sync_lock.h>
124 #include <kern/affinity.h>
125 #include <kern/exc_resource.h>
126 #include <kern/machine.h>
127 #include <kern/policy_internal.h>
128
129 #include <corpses/task_corpse.h>
130 #if CONFIG_TELEMETRY
131 #include <kern/telemetry.h>
132 #endif
133
134 #if MONOTONIC
135 #include <kern/monotonic.h>
136 #include <machine/monotonic.h>
137 #endif /* MONOTONIC */
138
139 #include <os/log.h>
140
141 #include <vm/pmap.h>
142 #include <vm/vm_map.h>
143 #include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
144 #include <vm/vm_pageout.h>
145 #include <vm/vm_protos.h>
146 #include <vm/vm_purgeable_internal.h>
147
148 #include <sys/resource.h>
149 #include <sys/signalvar.h> /* for coredump */
150
151 /*
152 * Exported interfaces
153 */
154
155 #include <mach/task_server.h>
156 #include <mach/mach_host_server.h>
157 #include <mach/host_security_server.h>
158 #include <mach/mach_port_server.h>
159
160 #include <vm/vm_shared_region.h>
161
162 #include <libkern/OSDebug.h>
163 #include <libkern/OSAtomic.h>
164
165 #if CONFIG_ATM
166 #include <atm/atm_internal.h>
167 #endif
168
169 #include <kern/sfi.h> /* picks up ledger.h */
170
171 #if CONFIG_MACF
172 #include <security/mac_mach_internal.h>
173 #endif
174
175 #if KPERF
176 extern int kpc_force_all_ctrs(task_t, int);
177 #endif
178
179 task_t kernel_task;
180 zone_t task_zone;
181 lck_attr_t task_lck_attr;
182 lck_grp_t task_lck_grp;
183 lck_grp_attr_t task_lck_grp_attr;
184
185 extern int exc_via_corpse_forking;
186 extern int corpse_for_fatal_memkill;
187
188 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
189 int audio_active = 0;
190
191 zinfo_usage_store_t tasks_tkm_private;
192 zinfo_usage_store_t tasks_tkm_shared;
193
194 /* A container to accumulate statistics for expired tasks */
195 expired_task_statistics_t dead_task_statistics;
196 lck_spin_t dead_task_statistics_lock;
197
198 ledger_template_t task_ledger_template = NULL;
199
200 struct _task_ledger_indices task_ledgers __attribute__((used)) =
201 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
202 #if !CONFIG_EMBEDDED
203 { 0 /* initialized at runtime */},
204 #endif /* !CONFIG_EMBEDDED */
205 -1, -1,
206 -1, -1,
207 -1, -1,
208 };
209
210 /* System sleep state */
211 boolean_t tasks_suspend_state;
212
213
214 void init_task_ledgers(void);
215 void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
216 void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
217 void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
218 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
219 void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
220 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
221
222 kern_return_t task_suspend_internal(task_t);
223 kern_return_t task_resume_internal(task_t);
224 static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
225
226 extern kern_return_t iokit_task_terminate(task_t task);
227
228 extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
229 extern void bsd_copythreadname(void *dst_uth, void *src_uth);
230 extern kern_return_t thread_resume(thread_t thread);
231
232 // Warn tasks when they hit 80% of their memory limit.
233 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
234
235 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
236 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
237
238 /*
239 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
240 *
241 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
242 * stacktraces, aka micro-stackshots)
243 */
244 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
245
246 int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
247 int task_wakeups_monitor_rate; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
248
249 int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
250
251 int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
252
253 ledger_amount_t max_task_footprint = 0; /* Per-task limit on physical memory consumption in bytes */
254 int max_task_footprint_warning_level = 0; /* Per-task limit warning percentage */
255 int max_task_footprint_mb = 0; /* Per-task limit on physical memory consumption in megabytes */
256
257 /* I/O Monitor Limits */
258 #define IOMON_DEFAULT_LIMIT (20480ull) /* MB of logical/physical I/O */
259 #define IOMON_DEFAULT_INTERVAL (86400ull) /* in seconds */
260
261 uint64_t task_iomon_limit_mb; /* Per-task I/O monitor limit in MBs */
262 uint64_t task_iomon_interval_secs; /* Per-task I/O monitor interval in secs */
263
264 #define IO_TELEMETRY_DEFAULT_LIMIT (10ll * 1024ll * 1024ll)
265 int64_t io_telemetry_limit; /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
266 int64_t global_logical_writes_count = 0; /* Global count for logical writes */
267 static boolean_t global_update_logical_writes(int64_t);
268
269 #if MACH_ASSERT
270 int pmap_ledgers_panic = 1;
271 #endif /* MACH_ASSERT */
272
273 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
274
275 #if CONFIG_COREDUMP
276 int hwm_user_cores = 0; /* high watermark violations generate user core files */
277 #endif
278
279 #ifdef MACH_BSD
280 extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
281 extern int proc_pid(struct proc *p);
282 extern int proc_selfpid(void);
283 extern char *proc_name_address(struct proc *p);
284 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
285 extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, int bufsize);
286
287 #if CONFIG_MEMORYSTATUS
288 extern void proc_memstat_terminated(struct proc* p, boolean_t set);
289 extern void memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
290 extern void memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
291 extern boolean_t memorystatus_allowed_vm_map_fork(task_t task);
292
293 #if DEVELOPMENT || DEBUG
294 extern void memorystatus_abort_vm_map_fork(task_t);
295 #endif
296
297 #endif /* CONFIG_MEMORYSTATUS */
298
299 #endif /* MACH_BSD */
300
301 /* Forwards */
302
303 static void task_hold_locked(task_t task);
304 static void task_wait_locked(task_t task, boolean_t until_not_runnable);
305 static void task_release_locked(task_t task);
306
307 static void task_synchronizer_destroy_all(task_t task);
308
309 void
310 task_backing_store_privileged(
311 task_t task)
312 {
313 task_lock(task);
314 task->priv_flags |= VM_BACKING_STORE_PRIV;
315 task_unlock(task);
316 return;
317 }
318
319
320 void
321 task_set_64bit(
322 task_t task,
323 boolean_t is64bit)
324 {
325 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
326 thread_t thread;
327 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
328
329 task_lock(task);
330
331 if (is64bit) {
332 if (task_has_64BitAddr(task))
333 goto out;
334 task_set_64BitAddr(task);
335 } else {
336 if ( !task_has_64BitAddr(task))
337 goto out;
338 task_clear_64BitAddr(task);
339 }
340 /* FIXME: On x86, the thread save state flavor can diverge from the
341 * task's 64-bit feature flag due to the 32-bit/64-bit register save
342 * state dichotomy. Since we can be pre-empted in this interval,
343 * certain routines may observe the thread as being in an inconsistent
344 * state with respect to its task's 64-bitness.
345 */
346
347 #if defined(__x86_64__) || defined(__arm64__)
348 queue_iterate(&task->threads, thread, thread_t, task_threads) {
349 thread_mtx_lock(thread);
350 machine_thread_switch_addrmode(thread);
351 thread_mtx_unlock(thread);
352
353 #if defined(__arm64__)
354 /* specifically, if running on H9 */
355 if (thread == current_thread()) {
356 uint64_t arg1, arg2;
357 int urgency;
358 spl_t spl = splsched();
359 /*
360 * This call tell that the current thread changed it's 32bitness.
361 * Other thread were no more on core when 32bitness was changed,
362 * but current_thread() is on core and the previous call to
363 * machine_thread_going_on_core() gave 32bitness which is now wrong.
364 *
365 * This is needed for bring-up, a different callback should be used
366 * in the future.
367 *
368 * TODO: Remove this callout when we no longer support 32-bit code on H9
369 */
370 thread_lock(thread);
371 urgency = thread_get_urgency(thread, &arg1, &arg2);
372 machine_thread_going_on_core(thread, urgency, 0, 0, mach_approximate_time());
373 thread_unlock(thread);
374 splx(spl);
375 }
376 #endif /* defined(__arm64__) */
377 }
378 #endif /* defined(__x86_64__) || defined(__arm64__) */
379
380 out:
381 task_unlock(task);
382 }
383
384 void
385 task_set_platform_binary(
386 task_t task,
387 boolean_t is_platform)
388 {
389 task_lock(task);
390 if (is_platform) {
391 task->t_flags |= TF_PLATFORM;
392 } else {
393 task->t_flags &= ~(TF_PLATFORM);
394 }
395 task_unlock(task);
396 }
397
398 /*
399 * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
400 * Returns "false" if flag is already set, and "true" in other cases.
401 */
402 bool
403 task_set_ca_client_wi(
404 task_t task,
405 boolean_t set_or_clear)
406 {
407 bool ret = true;
408 task_lock(task);
409 if (set_or_clear) {
410 /* Tasks can have only one CA_CLIENT work interval */
411 if (task->t_flags & TF_CA_CLIENT_WI)
412 ret = false;
413 else
414 task->t_flags |= TF_CA_CLIENT_WI;
415 } else {
416 task->t_flags &= ~TF_CA_CLIENT_WI;
417 }
418 task_unlock(task);
419 return ret;
420 }
421
422 void
423 task_set_dyld_info(
424 task_t task,
425 mach_vm_address_t addr,
426 mach_vm_size_t size)
427 {
428 task_lock(task);
429 task->all_image_info_addr = addr;
430 task->all_image_info_size = size;
431 task_unlock(task);
432 }
433
434 void
435 task_atm_reset(__unused task_t task) {
436
437 #if CONFIG_ATM
438 if (task->atm_context != NULL) {
439 atm_task_descriptor_destroy(task->atm_context);
440 task->atm_context = NULL;
441 }
442 #endif
443
444 }
445
446 void
447 task_bank_reset(__unused task_t task) {
448
449 if (task->bank_context != NULL) {
450 bank_task_destroy(task);
451 }
452 }
453
454 /*
455 * NOTE: This should only be called when the P_LINTRANSIT
456 * flag is set (the proc_trans lock is held) on the
457 * proc associated with the task.
458 */
459 void
460 task_bank_init(__unused task_t task) {
461
462 if (task->bank_context != NULL) {
463 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
464 }
465 bank_task_initialize(task);
466 }
467
468 void
469 task_set_did_exec_flag(task_t task)
470 {
471 task->t_procflags |= TPF_DID_EXEC;
472 }
473
474 void
475 task_clear_exec_copy_flag(task_t task)
476 {
477 task->t_procflags &= ~TPF_EXEC_COPY;
478 }
479
480 /*
481 * This wait event is t_procflags instead of t_flags because t_flags is volatile
482 *
483 * TODO: store the flags in the same place as the event
484 * rdar://problem/28501994
485 */
486 event_t
487 task_get_return_wait_event(task_t task)
488 {
489 return (event_t)&task->t_procflags;
490 }
491
492 void
493 task_clear_return_wait(task_t task)
494 {
495 task_lock(task);
496
497 task->t_flags &= ~TF_LRETURNWAIT;
498
499 if (task->t_flags & TF_LRETURNWAITER) {
500 thread_wakeup(task_get_return_wait_event(task));
501 task->t_flags &= ~TF_LRETURNWAITER;
502 }
503
504 task_unlock(task);
505 }
506
507 void __attribute__((noreturn))
508 task_wait_to_return(void)
509 {
510 task_t task;
511
512 task = current_task();
513 task_lock(task);
514
515 if (task->t_flags & TF_LRETURNWAIT) {
516 do {
517 task->t_flags |= TF_LRETURNWAITER;
518 assert_wait(task_get_return_wait_event(task), THREAD_UNINT);
519 task_unlock(task);
520
521 thread_block(THREAD_CONTINUE_NULL);
522
523 task_lock(task);
524 } while (task->t_flags & TF_LRETURNWAIT);
525 }
526
527 task_unlock(task);
528
529 thread_bootstrap_return();
530 }
531
532 #ifdef CONFIG_32BIT_TELEMETRY
533 boolean_t
534 task_consume_32bit_log_flag(task_t task)
535 {
536 if ((task->t_procflags & TPF_LOG_32BIT_TELEMETRY) != 0) {
537 task->t_procflags &= ~TPF_LOG_32BIT_TELEMETRY;
538 return TRUE;
539 } else {
540 return FALSE;
541 }
542 }
543
544 void
545 task_set_32bit_log_flag(task_t task)
546 {
547 task->t_procflags |= TPF_LOG_32BIT_TELEMETRY;
548 }
549 #endif /* CONFIG_32BIT_TELEMETRY */
550
551 boolean_t
552 task_is_exec_copy(task_t task)
553 {
554 return task_is_exec_copy_internal(task);
555 }
556
557 boolean_t
558 task_did_exec(task_t task)
559 {
560 return task_did_exec_internal(task);
561 }
562
563 boolean_t
564 task_is_active(task_t task)
565 {
566 return task->active;
567 }
568
569 boolean_t
570 task_is_halting(task_t task)
571 {
572 return task->halting;
573 }
574
575 #if TASK_REFERENCE_LEAK_DEBUG
576 #include <kern/btlog.h>
577
578 static btlog_t *task_ref_btlog;
579 #define TASK_REF_OP_INCR 0x1
580 #define TASK_REF_OP_DECR 0x2
581
582 #define TASK_REF_NUM_RECORDS 100000
583 #define TASK_REF_BTDEPTH 7
584
585 void
586 task_reference_internal(task_t task)
587 {
588 void * bt[TASK_REF_BTDEPTH];
589 int numsaved = 0;
590
591 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
592
593 (void)hw_atomic_add(&(task)->ref_count, 1);
594 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
595 bt, numsaved);
596 }
597
598 uint32_t
599 task_deallocate_internal(task_t task)
600 {
601 void * bt[TASK_REF_BTDEPTH];
602 int numsaved = 0;
603
604 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
605
606 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
607 bt, numsaved);
608 return hw_atomic_sub(&(task)->ref_count, 1);
609 }
610
611 #endif /* TASK_REFERENCE_LEAK_DEBUG */
612
613 void
614 task_init(void)
615 {
616
617 lck_grp_attr_setdefault(&task_lck_grp_attr);
618 lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
619 lck_attr_setdefault(&task_lck_attr);
620 lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
621 lck_mtx_init(&tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
622
623 task_zone = zinit(
624 sizeof(struct task),
625 task_max * sizeof(struct task),
626 TASK_CHUNK * sizeof(struct task),
627 "tasks");
628
629 zone_change(task_zone, Z_NOENCRYPT, TRUE);
630
631 #if CONFIG_EMBEDDED
632 task_watch_init();
633 #endif /* CONFIG_EMBEDDED */
634
635 /*
636 * Configure per-task memory limit.
637 * The boot-arg is interpreted as Megabytes,
638 * and takes precedence over the device tree.
639 * Setting the boot-arg to 0 disables task limits.
640 */
641 if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
642 sizeof (max_task_footprint_mb))) {
643 /*
644 * No limit was found in boot-args, so go look in the device tree.
645 */
646 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
647 sizeof(max_task_footprint_mb))) {
648 /*
649 * No limit was found in device tree.
650 */
651 max_task_footprint_mb = 0;
652 }
653 }
654
655 if (max_task_footprint_mb != 0) {
656 #if CONFIG_MEMORYSTATUS
657 if (max_task_footprint_mb < 50) {
658 printf("Warning: max_task_pmem %d below minimum.\n",
659 max_task_footprint_mb);
660 max_task_footprint_mb = 50;
661 }
662 printf("Limiting task physical memory footprint to %d MB\n",
663 max_task_footprint_mb);
664
665 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
666
667 /*
668 * Configure the per-task memory limit warning level.
669 * This is computed as a percentage.
670 */
671 max_task_footprint_warning_level = 0;
672
673 if (max_mem < 0x40000000) {
674 /*
675 * On devices with < 1GB of memory:
676 * -- set warnings to 50MB below the per-task limit.
677 */
678 if (max_task_footprint_mb > 50) {
679 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
680 }
681 } else {
682 /*
683 * On devices with >= 1GB of memory:
684 * -- set warnings to 100MB below the per-task limit.
685 */
686 if (max_task_footprint_mb > 100) {
687 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
688 }
689 }
690
691 /*
692 * Never allow warning level to land below the default.
693 */
694 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
695 max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
696 }
697
698 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
699
700 #else
701 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
702 #endif /* CONFIG_MEMORYSTATUS */
703 }
704
705 #if MACH_ASSERT
706 PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
707 sizeof (pmap_ledgers_panic));
708 #endif /* MACH_ASSERT */
709
710 #if CONFIG_COREDUMP
711 if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
712 sizeof (hwm_user_cores))) {
713 hwm_user_cores = 0;
714 }
715 #endif
716
717 proc_init_cpumon_params();
718
719 if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
720 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
721 }
722
723 if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
724 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
725 }
726
727 if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
728 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
729 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
730 }
731
732 if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
733 sizeof (disable_exc_resource))) {
734 disable_exc_resource = 0;
735 }
736
737 if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof (task_iomon_limit_mb))) {
738 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
739 }
740
741 if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof (task_iomon_interval_secs))) {
742 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
743 }
744
745 if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof (io_telemetry_limit))) {
746 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
747 }
748
749 /*
750 * If we have coalitions, coalition_init() will call init_task_ledgers() as it
751 * sets up the ledgers for the default coalition. If we don't have coalitions,
752 * then we have to call it now.
753 */
754 #if CONFIG_COALITIONS
755 assert(task_ledger_template);
756 #else /* CONFIG_COALITIONS */
757 init_task_ledgers();
758 #endif /* CONFIG_COALITIONS */
759
760 #if TASK_REFERENCE_LEAK_DEBUG
761 task_ref_btlog = btlog_create(TASK_REF_NUM_RECORDS, TASK_REF_BTDEPTH, TRUE /* caller_will_remove_entries_for_element? */);
762 assert(task_ref_btlog);
763 #endif
764
765 /*
766 * Create the kernel task as the first task.
767 */
768 #ifdef __LP64__
769 if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)
770 #else
771 if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)
772 #endif
773 panic("task_init\n");
774
775
776 vm_map_deallocate(kernel_task->map);
777 kernel_task->map = kernel_map;
778 lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
779 }
780
781 /*
782 * Create a task running in the kernel address space. It may
783 * have its own map of size mem_size and may have ipc privileges.
784 */
785 kern_return_t
786 kernel_task_create(
787 __unused task_t parent_task,
788 __unused vm_offset_t map_base,
789 __unused vm_size_t map_size,
790 __unused task_t *child_task)
791 {
792 return (KERN_INVALID_ARGUMENT);
793 }
794
795 kern_return_t
796 task_create(
797 task_t parent_task,
798 __unused ledger_port_array_t ledger_ports,
799 __unused mach_msg_type_number_t num_ledger_ports,
800 __unused boolean_t inherit_memory,
801 __unused task_t *child_task) /* OUT */
802 {
803 if (parent_task == TASK_NULL)
804 return(KERN_INVALID_ARGUMENT);
805
806 /*
807 * No longer supported: too many calls assume that a task has a valid
808 * process attached.
809 */
810 return(KERN_FAILURE);
811 }
812
813 kern_return_t
814 host_security_create_task_token(
815 host_security_t host_security,
816 task_t parent_task,
817 __unused security_token_t sec_token,
818 __unused audit_token_t audit_token,
819 __unused host_priv_t host_priv,
820 __unused ledger_port_array_t ledger_ports,
821 __unused mach_msg_type_number_t num_ledger_ports,
822 __unused boolean_t inherit_memory,
823 __unused task_t *child_task) /* OUT */
824 {
825 if (parent_task == TASK_NULL)
826 return(KERN_INVALID_ARGUMENT);
827
828 if (host_security == HOST_NULL)
829 return(KERN_INVALID_SECURITY);
830
831 /*
832 * No longer supported.
833 */
834 return(KERN_FAILURE);
835 }
836
837 /*
838 * Task ledgers
839 * ------------
840 *
841 * phys_footprint
842 * Physical footprint: This is the sum of:
843 * + (internal - alternate_accounting)
844 * + (internal_compressed - alternate_accounting_compressed)
845 * + iokit_mapped
846 * + purgeable_nonvolatile
847 * + purgeable_nonvolatile_compressed
848 * + page_table
849 *
850 * internal
851 * The task's anonymous memory, which on iOS is always resident.
852 *
853 * internal_compressed
854 * Amount of this task's internal memory which is held by the compressor.
855 * Such memory is no longer actually resident for the task [i.e., resident in its pmap],
856 * and could be either decompressed back into memory, or paged out to storage, depending
857 * on our implementation.
858 *
859 * iokit_mapped
860 * IOKit mappings: The total size of all IOKit mappings in this task, regardless of
861 clean/dirty or internal/external state].
862 *
863 * alternate_accounting
864 * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
865 * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
866 * double counting.
867 */
868 void
869 init_task_ledgers(void)
870 {
871 ledger_template_t t;
872
873 assert(task_ledger_template == NULL);
874 assert(kernel_task == TASK_NULL);
875
876 #if MACH_ASSERT
877 PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic,
878 sizeof (pmap_ledgers_panic));
879 #endif /* MACH_ASSERT */
880
881 if ((t = ledger_template_create("Per-task ledger")) == NULL)
882 panic("couldn't create task ledger template");
883
884 task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
885 task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
886 "physmem", "bytes");
887 task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
888 "bytes");
889 task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
890 "bytes");
891 task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
892 "bytes");
893 task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
894 "bytes");
895 task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
896 "bytes");
897 task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
898 "bytes");
899 task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
900 "bytes");
901 task_ledgers.page_table = ledger_entry_add(t, "page_table", "physmem",
902 "bytes");
903 task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
904 "bytes");
905 task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
906 "bytes");
907 task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
908 task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
909 task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
910 task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
911 task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
912 "count");
913 task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
914 "count");
915
916 #if CONFIG_SCHED_SFI
917 sfi_class_id_t class_id, ledger_alias;
918 for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
919 task_ledgers.sfi_wait_times[class_id] = -1;
920 }
921
922 /* don't account for UNSPECIFIED */
923 for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
924 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
925 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
926 /* Check to see if alias has been registered yet */
927 if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
928 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
929 } else {
930 /* Otherwise, initialize it first */
931 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
932 }
933 } else {
934 task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
935 }
936
937 if (task_ledgers.sfi_wait_times[class_id] < 0) {
938 panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
939 }
940 }
941
942 assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
943 #endif /* CONFIG_SCHED_SFI */
944
945 task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
946 task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
947 task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
948 task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
949 task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
950 task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
951
952 if ((task_ledgers.cpu_time < 0) ||
953 (task_ledgers.tkm_private < 0) ||
954 (task_ledgers.tkm_shared < 0) ||
955 (task_ledgers.phys_mem < 0) ||
956 (task_ledgers.wired_mem < 0) ||
957 (task_ledgers.internal < 0) ||
958 (task_ledgers.iokit_mapped < 0) ||
959 (task_ledgers.alternate_accounting < 0) ||
960 (task_ledgers.alternate_accounting_compressed < 0) ||
961 (task_ledgers.page_table < 0) ||
962 (task_ledgers.phys_footprint < 0) ||
963 (task_ledgers.internal_compressed < 0) ||
964 (task_ledgers.purgeable_volatile < 0) ||
965 (task_ledgers.purgeable_nonvolatile < 0) ||
966 (task_ledgers.purgeable_volatile_compressed < 0) ||
967 (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
968 (task_ledgers.platform_idle_wakeups < 0) ||
969 (task_ledgers.interrupt_wakeups < 0) ||
970 (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
971 (task_ledgers.physical_writes < 0) ||
972 (task_ledgers.logical_writes < 0) ||
973 (task_ledgers.energy_billed_to_me < 0) ||
974 (task_ledgers.energy_billed_to_others < 0)
975 ) {
976 panic("couldn't create entries for task ledger template");
977 }
978
979 ledger_track_credit_only(t, task_ledgers.phys_footprint);
980 ledger_track_credit_only(t, task_ledgers.page_table);
981 ledger_track_credit_only(t, task_ledgers.internal);
982 ledger_track_credit_only(t, task_ledgers.internal_compressed);
983 ledger_track_credit_only(t, task_ledgers.iokit_mapped);
984 ledger_track_credit_only(t, task_ledgers.alternate_accounting);
985 ledger_track_credit_only(t, task_ledgers.alternate_accounting_compressed);
986 ledger_track_credit_only(t, task_ledgers.purgeable_volatile);
987 ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile);
988 ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed);
989 ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed);
990
991 ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
992 #if MACH_ASSERT
993 if (pmap_ledgers_panic) {
994 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
995 ledger_panic_on_negative(t, task_ledgers.page_table);
996 ledger_panic_on_negative(t, task_ledgers.internal);
997 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
998 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
999 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1000 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1001 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1002 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1003 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1004 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1005 }
1006 #endif /* MACH_ASSERT */
1007
1008 #if CONFIG_MEMORYSTATUS
1009 ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
1010 #endif /* CONFIG_MEMORYSTATUS */
1011
1012 ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1013 task_wakeups_rate_exceeded, NULL, NULL);
1014 ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1015 ledger_set_callback(t, task_ledgers.logical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_LOGICAL_WRITES, NULL);
1016
1017 ledger_template_complete(t);
1018 task_ledger_template = t;
1019 }
1020
1021 kern_return_t
1022 task_create_internal(
1023 task_t parent_task,
1024 coalition_t *parent_coalitions __unused,
1025 boolean_t inherit_memory,
1026 __unused boolean_t is_64bit,
1027 uint32_t t_flags,
1028 uint32_t t_procflags,
1029 task_t *child_task) /* OUT */
1030 {
1031 task_t new_task;
1032 vm_shared_region_t shared_region;
1033 ledger_t ledger = NULL;
1034
1035 new_task = (task_t) zalloc(task_zone);
1036
1037 if (new_task == TASK_NULL)
1038 return(KERN_RESOURCE_SHORTAGE);
1039
1040 /* one ref for just being alive; one for our caller */
1041 new_task->ref_count = 2;
1042
1043 /* allocate with active entries */
1044 assert(task_ledger_template != NULL);
1045 if ((ledger = ledger_instantiate(task_ledger_template,
1046 LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
1047 zfree(task_zone, new_task);
1048 return(KERN_RESOURCE_SHORTAGE);
1049 }
1050
1051
1052 new_task->ledger = ledger;
1053
1054 #if defined(CONFIG_SCHED_MULTIQ)
1055 new_task->sched_group = sched_group_create();
1056 #endif
1057
1058 /* if inherit_memory is true, parent_task MUST not be NULL */
1059 if (!(t_flags & TF_CORPSE_FORK) && inherit_memory)
1060 new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1061 else
1062 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
1063 (vm_map_offset_t)(VM_MIN_ADDRESS),
1064 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
1065
1066 /* Inherit memlock limit from parent */
1067 if (parent_task)
1068 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
1069
1070 lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1071 queue_init(&new_task->threads);
1072 new_task->suspend_count = 0;
1073 new_task->thread_count = 0;
1074 new_task->active_thread_count = 0;
1075 new_task->user_stop_count = 0;
1076 new_task->legacy_stop_count = 0;
1077 new_task->active = TRUE;
1078 new_task->halting = FALSE;
1079 new_task->user_data = NULL;
1080 new_task->priv_flags = 0;
1081 new_task->t_flags = t_flags;
1082 new_task->t_procflags = t_procflags;
1083 new_task->importance = 0;
1084 new_task->crashed_thread_id = 0;
1085 new_task->exec_token = 0;
1086
1087 #if CONFIG_ATM
1088 new_task->atm_context = NULL;
1089 #endif
1090 new_task->bank_context = NULL;
1091
1092 #ifdef MACH_BSD
1093 new_task->bsd_info = NULL;
1094 new_task->corpse_info = NULL;
1095 #endif /* MACH_BSD */
1096
1097 #if CONFIG_MACF
1098 new_task->crash_label = NULL;
1099 #endif
1100
1101 #if CONFIG_MEMORYSTATUS
1102 if (max_task_footprint != 0) {
1103 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1104 }
1105 #endif /* CONFIG_MEMORYSTATUS */
1106
1107 if (task_wakeups_monitor_rate != 0) {
1108 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1109 int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
1110 task_wakeups_monitor_ctl(new_task, &flags, &rate);
1111 }
1112
1113 #if CONFIG_IO_ACCOUNTING
1114 uint32_t flags = IOMON_ENABLE;
1115 task_io_monitor_ctl(new_task, &flags);
1116 #endif /* CONFIG_IO_ACCOUNTING */
1117
1118 machine_task_init(new_task, parent_task, inherit_memory);
1119
1120 new_task->task_debug = NULL;
1121
1122 #if DEVELOPMENT || DEBUG
1123 new_task->task_unnested = FALSE;
1124 new_task->task_disconnected_count = 0;
1125 #endif
1126 queue_init(&new_task->semaphore_list);
1127 new_task->semaphores_owned = 0;
1128
1129 ipc_task_init(new_task, parent_task);
1130
1131 new_task->vtimers = 0;
1132
1133 new_task->shared_region = NULL;
1134
1135 new_task->affinity_space = NULL;
1136
1137 new_task->t_kpc = 0;
1138
1139 new_task->pidsuspended = FALSE;
1140 new_task->frozen = FALSE;
1141 new_task->changing_freeze_state = FALSE;
1142 new_task->rusage_cpu_flags = 0;
1143 new_task->rusage_cpu_percentage = 0;
1144 new_task->rusage_cpu_interval = 0;
1145 new_task->rusage_cpu_deadline = 0;
1146 new_task->rusage_cpu_callt = NULL;
1147 #if MACH_ASSERT
1148 new_task->suspends_outstanding = 0;
1149 #endif
1150
1151 #if HYPERVISOR
1152 new_task->hv_task_target = NULL;
1153 #endif /* HYPERVISOR */
1154
1155 #if CONFIG_EMBEDDED
1156 queue_init(&new_task->task_watchers);
1157 new_task->num_taskwatchers = 0;
1158 new_task->watchapplying = 0;
1159 #endif /* CONFIG_EMBEDDED */
1160
1161 new_task->mem_notify_reserved = 0;
1162 new_task->memlimit_attrs_reserved = 0;
1163 #if IMPORTANCE_INHERITANCE
1164 new_task->task_imp_base = NULL;
1165 #endif /* IMPORTANCE_INHERITANCE */
1166
1167 new_task->requested_policy = default_task_requested_policy;
1168 new_task->effective_policy = default_task_effective_policy;
1169
1170 if (parent_task != TASK_NULL) {
1171 new_task->sec_token = parent_task->sec_token;
1172 new_task->audit_token = parent_task->audit_token;
1173
1174 /* inherit the parent's shared region */
1175 shared_region = vm_shared_region_get(parent_task);
1176 vm_shared_region_set(new_task, shared_region);
1177
1178 if(task_has_64BitAddr(parent_task))
1179 task_set_64BitAddr(new_task);
1180 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1181 new_task->all_image_info_size = parent_task->all_image_info_size;
1182
1183 if (inherit_memory && parent_task->affinity_space)
1184 task_affinity_create(parent_task, new_task);
1185
1186 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1187
1188 #if IMPORTANCE_INHERITANCE
1189 ipc_importance_task_t new_task_imp = IIT_NULL;
1190 boolean_t inherit_receive = TRUE;
1191
1192 if (task_is_marked_importance_donor(parent_task)) {
1193 new_task_imp = ipc_importance_for_task(new_task, FALSE);
1194 assert(IIT_NULL != new_task_imp);
1195 ipc_importance_task_mark_donor(new_task_imp, TRUE);
1196 }
1197 #if CONFIG_EMBEDDED
1198 /* Embedded only wants to inherit for exec copy task */
1199 if ((t_procflags & TPF_EXEC_COPY) == 0) {
1200 inherit_receive = FALSE;
1201 }
1202 #endif /* CONFIG_EMBEDDED */
1203
1204 if (inherit_receive) {
1205 if (task_is_marked_importance_receiver(parent_task)) {
1206 if (IIT_NULL == new_task_imp)
1207 new_task_imp = ipc_importance_for_task(new_task, FALSE);
1208 assert(IIT_NULL != new_task_imp);
1209 ipc_importance_task_mark_receiver(new_task_imp, TRUE);
1210 }
1211 if (task_is_marked_importance_denap_receiver(parent_task)) {
1212 if (IIT_NULL == new_task_imp)
1213 new_task_imp = ipc_importance_for_task(new_task, FALSE);
1214 assert(IIT_NULL != new_task_imp);
1215 ipc_importance_task_mark_denap_receiver(new_task_imp, TRUE);
1216 }
1217 }
1218
1219 if (IIT_NULL != new_task_imp) {
1220 assert(new_task->task_imp_base == new_task_imp);
1221 ipc_importance_task_release(new_task_imp);
1222 }
1223 #endif /* IMPORTANCE_INHERITANCE */
1224
1225 new_task->priority = BASEPRI_DEFAULT;
1226 new_task->max_priority = MAXPRI_USER;
1227
1228 task_policy_create(new_task, parent_task);
1229 } else {
1230 new_task->sec_token = KERNEL_SECURITY_TOKEN;
1231 new_task->audit_token = KERNEL_AUDIT_TOKEN;
1232 #ifdef __LP64__
1233 if(is_64bit)
1234 task_set_64BitAddr(new_task);
1235 #endif
1236 new_task->all_image_info_addr = (mach_vm_address_t)0;
1237 new_task->all_image_info_size = (mach_vm_size_t)0;
1238
1239 new_task->pset_hint = PROCESSOR_SET_NULL;
1240
1241 if (kernel_task == TASK_NULL) {
1242 new_task->priority = BASEPRI_KERNEL;
1243 new_task->max_priority = MAXPRI_KERNEL;
1244 } else {
1245 new_task->priority = BASEPRI_DEFAULT;
1246 new_task->max_priority = MAXPRI_USER;
1247 }
1248 }
1249
1250 bzero(new_task->coalition, sizeof(new_task->coalition));
1251 for (int i = 0; i < COALITION_NUM_TYPES; i++)
1252 queue_chain_init(new_task->task_coalition[i]);
1253
1254 /* Allocate I/O Statistics */
1255 new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1256 assert(new_task->task_io_stats != NULL);
1257 bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1258
1259 bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1260 bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
1261
1262 bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1263
1264 /* Copy resource acc. info from Parent for Corpe Forked task. */
1265 if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1266 task_rollup_accounting_info(new_task, parent_task);
1267 } else {
1268 /* Initialize to zero for standard fork/spawn case */
1269 new_task->total_user_time = 0;
1270 new_task->total_system_time = 0;
1271 new_task->total_ptime = 0;
1272 new_task->faults = 0;
1273 new_task->pageins = 0;
1274 new_task->cow_faults = 0;
1275 new_task->messages_sent = 0;
1276 new_task->messages_received = 0;
1277 new_task->syscalls_mach = 0;
1278 new_task->syscalls_unix = 0;
1279 new_task->c_switch = 0;
1280 new_task->p_switch = 0;
1281 new_task->ps_switch = 0;
1282 new_task->low_mem_notified_warn = 0;
1283 new_task->low_mem_notified_critical = 0;
1284 new_task->purged_memory_warn = 0;
1285 new_task->purged_memory_critical = 0;
1286 new_task->low_mem_privileged_listener = 0;
1287 new_task->memlimit_is_active = 0;
1288 new_task->memlimit_is_fatal = 0;
1289 new_task->memlimit_active_exc_resource = 0;
1290 new_task->memlimit_inactive_exc_resource = 0;
1291 new_task->task_timer_wakeups_bin_1 = 0;
1292 new_task->task_timer_wakeups_bin_2 = 0;
1293 new_task->task_gpu_ns = 0;
1294 new_task->task_immediate_writes = 0;
1295 new_task->task_deferred_writes = 0;
1296 new_task->task_invalidated_writes = 0;
1297 new_task->task_metadata_writes = 0;
1298 new_task->task_energy = 0;
1299 #if MONOTONIC
1300 memset(&new_task->task_monotonic, 0, sizeof(new_task->task_monotonic));
1301 #endif /* MONOTONIC */
1302 }
1303
1304
1305 #if CONFIG_COALITIONS
1306 if (!(t_flags & TF_CORPSE_FORK)) {
1307 /* TODO: there is no graceful failure path here... */
1308 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1309 coalitions_adopt_task(parent_coalitions, new_task);
1310 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1311 /*
1312 * all tasks at least have a resource coalition, so
1313 * if the parent has one then inherit all coalitions
1314 * the parent is a part of
1315 */
1316 coalitions_adopt_task(parent_task->coalition, new_task);
1317 } else {
1318 /* TODO: assert that new_task will be PID 1 (launchd) */
1319 coalitions_adopt_init_task(new_task);
1320 }
1321 /*
1322 * on exec, we need to transfer the coalition roles from the
1323 * parent task to the exec copy task.
1324 */
1325 if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1326 int coal_roles[COALITION_NUM_TYPES];
1327 task_coalition_roles(parent_task, coal_roles);
1328 (void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1329 }
1330 } else {
1331 coalitions_adopt_corpse_task(new_task);
1332 }
1333
1334 if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1335 panic("created task is not a member of a resource coalition");
1336 }
1337 #endif /* CONFIG_COALITIONS */
1338
1339 new_task->dispatchqueue_offset = 0;
1340 if (parent_task != NULL) {
1341 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1342 }
1343
1344 if (vm_backing_store_low && parent_task != NULL)
1345 new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
1346
1347 new_task->task_volatile_objects = 0;
1348 new_task->task_nonvolatile_objects = 0;
1349 new_task->task_purgeable_disowning = FALSE;
1350 new_task->task_purgeable_disowned = FALSE;
1351 queue_init(&new_task->task_objq);
1352 task_objq_lock_init(new_task);
1353
1354 new_task->task_region_footprint = FALSE;
1355
1356 #if CONFIG_SECLUDED_MEMORY
1357 new_task->task_can_use_secluded_mem = FALSE;
1358 new_task->task_could_use_secluded_mem = FALSE;
1359 new_task->task_could_also_use_secluded_mem = FALSE;
1360 #endif /* CONFIG_SECLUDED_MEMORY */
1361
1362 queue_init(&new_task->io_user_clients);
1363
1364 ipc_task_enable(new_task);
1365
1366 lck_mtx_lock(&tasks_threads_lock);
1367 queue_enter(&tasks, new_task, task_t, tasks);
1368 tasks_count++;
1369 if (tasks_suspend_state) {
1370 task_suspend_internal(new_task);
1371 }
1372 lck_mtx_unlock(&tasks_threads_lock);
1373
1374 *child_task = new_task;
1375 return(KERN_SUCCESS);
1376 }
1377
1378 /*
1379 * task_rollup_accounting_info
1380 *
1381 * Roll up accounting stats. Used to rollup stats
1382 * for exec copy task and corpse fork.
1383 */
1384 void
1385 task_rollup_accounting_info(task_t to_task, task_t from_task)
1386 {
1387 assert(from_task != to_task);
1388
1389 to_task->total_user_time = from_task->total_user_time;
1390 to_task->total_system_time = from_task->total_system_time;
1391 to_task->total_ptime = from_task->total_ptime;
1392 to_task->faults = from_task->faults;
1393 to_task->pageins = from_task->pageins;
1394 to_task->cow_faults = from_task->cow_faults;
1395 to_task->messages_sent = from_task->messages_sent;
1396 to_task->messages_received = from_task->messages_received;
1397 to_task->syscalls_mach = from_task->syscalls_mach;
1398 to_task->syscalls_unix = from_task->syscalls_unix;
1399 to_task->c_switch = from_task->c_switch;
1400 to_task->p_switch = from_task->p_switch;
1401 to_task->ps_switch = from_task->ps_switch;
1402 to_task->extmod_statistics = from_task->extmod_statistics;
1403 to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1404 to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1405 to_task->purged_memory_warn = from_task->purged_memory_warn;
1406 to_task->purged_memory_critical = from_task->purged_memory_critical;
1407 to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1408 *to_task->task_io_stats = *from_task->task_io_stats;
1409 to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1410 to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1411 to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1412 to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1413 to_task->task_gpu_ns = from_task->task_gpu_ns;
1414 to_task->task_immediate_writes = from_task->task_immediate_writes;
1415 to_task->task_deferred_writes = from_task->task_deferred_writes;
1416 to_task->task_invalidated_writes = from_task->task_invalidated_writes;
1417 to_task->task_metadata_writes = from_task->task_metadata_writes;
1418 to_task->task_energy = from_task->task_energy;
1419
1420 /* Skip ledger roll up for memory accounting entries */
1421 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1422 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1423 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1424 #if CONFIG_SCHED_SFI
1425 for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1426 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1427 }
1428 #endif
1429 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1430 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1431 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1432 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1433 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1434 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
1435 }
1436
1437 int task_dropped_imp_count = 0;
1438
1439 /*
1440 * task_deallocate:
1441 *
1442 * Drop a reference on a task.
1443 */
1444 void
1445 task_deallocate(
1446 task_t task)
1447 {
1448 ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1449 uint32_t refs;
1450
1451 if (task == TASK_NULL)
1452 return;
1453
1454 refs = task_deallocate_internal(task);
1455
1456 #if IMPORTANCE_INHERITANCE
1457 if (refs > 1)
1458 return;
1459
1460 atomic_load_explicit(&task->ref_count, memory_order_acquire);
1461
1462 if (refs == 1) {
1463 /*
1464 * If last ref potentially comes from the task's importance,
1465 * disconnect it. But more task refs may be added before
1466 * that completes, so wait for the reference to go to zero
1467 * naturually (it may happen on a recursive task_deallocate()
1468 * from the ipc_importance_disconnect_task() call).
1469 */
1470 if (IIT_NULL != task->task_imp_base)
1471 ipc_importance_disconnect_task(task);
1472 return;
1473 }
1474 #else
1475 if (refs > 0)
1476 return;
1477
1478 atomic_load_explicit(&task->ref_count, memory_order_acquire);
1479
1480 #endif /* IMPORTANCE_INHERITANCE */
1481
1482 lck_mtx_lock(&tasks_threads_lock);
1483 queue_remove(&terminated_tasks, task, task_t, tasks);
1484 terminated_tasks_count--;
1485 lck_mtx_unlock(&tasks_threads_lock);
1486
1487 /*
1488 * remove the reference on atm descriptor
1489 */
1490 task_atm_reset(task);
1491
1492 /*
1493 * remove the reference on bank context
1494 */
1495 task_bank_reset(task);
1496
1497 if (task->task_io_stats)
1498 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1499
1500 /*
1501 * Give the machine dependent code a chance
1502 * to perform cleanup before ripping apart
1503 * the task.
1504 */
1505 machine_task_terminate(task);
1506
1507 ipc_task_terminate(task);
1508
1509 /* let iokit know */
1510 iokit_task_terminate(task);
1511
1512 if (task->affinity_space)
1513 task_affinity_deallocate(task);
1514
1515 #if MACH_ASSERT
1516 if (task->ledger != NULL &&
1517 task->map != NULL &&
1518 task->map->pmap != NULL &&
1519 task->map->pmap->ledger != NULL) {
1520 assert(task->ledger == task->map->pmap->ledger);
1521 }
1522 #endif /* MACH_ASSERT */
1523
1524 vm_purgeable_disown(task);
1525 assert(task->task_purgeable_disowned);
1526 if (task->task_volatile_objects != 0 ||
1527 task->task_nonvolatile_objects != 0) {
1528 panic("task_deallocate(%p): "
1529 "volatile_objects=%d nonvolatile_objects=%d\n",
1530 task,
1531 task->task_volatile_objects,
1532 task->task_nonvolatile_objects);
1533 }
1534
1535 vm_map_deallocate(task->map);
1536 is_release(task->itk_space);
1537
1538 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1539 &interrupt_wakeups, &debit);
1540 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1541 &platform_idle_wakeups, &debit);
1542
1543 #if defined(CONFIG_SCHED_MULTIQ)
1544 sched_group_destroy(task->sched_group);
1545 #endif
1546
1547 /* Accumulate statistics for dead tasks */
1548 lck_spin_lock(&dead_task_statistics_lock);
1549 dead_task_statistics.total_user_time += task->total_user_time;
1550 dead_task_statistics.total_system_time += task->total_system_time;
1551
1552 dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1553 dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1554
1555 dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1556 dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1557 dead_task_statistics.total_ptime += task->total_ptime;
1558 dead_task_statistics.total_pset_switches += task->ps_switch;
1559 dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
1560 dead_task_statistics.task_energy += task->task_energy;
1561
1562 lck_spin_unlock(&dead_task_statistics_lock);
1563 lck_mtx_destroy(&task->lock, &task_lck_grp);
1564
1565 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1566 &debit)) {
1567 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1568 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1569 }
1570 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1571 &debit)) {
1572 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1573 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1574 }
1575 ledger_dereference(task->ledger);
1576
1577 #if TASK_REFERENCE_LEAK_DEBUG
1578 btlog_remove_entries_for_element(task_ref_btlog, task);
1579 #endif
1580
1581 #if CONFIG_COALITIONS
1582 task_release_coalitions(task);
1583 #endif /* CONFIG_COALITIONS */
1584
1585 bzero(task->coalition, sizeof(task->coalition));
1586
1587 #if MACH_BSD
1588 /* clean up collected information since last reference to task is gone */
1589 if (task->corpse_info) {
1590 void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
1591 task_crashinfo_destroy(task->corpse_info);
1592 task->corpse_info = NULL;
1593 if (corpse_info_kernel) {
1594 kfree(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
1595 }
1596 }
1597 #endif
1598
1599 #if CONFIG_MACF
1600 if (task->crash_label) {
1601 mac_exc_free_label(task->crash_label);
1602 task->crash_label = NULL;
1603 }
1604 #endif
1605
1606 assert(queue_empty(&task->task_objq));
1607
1608 zfree(task_zone, task);
1609 }
1610
1611 /*
1612 * task_name_deallocate:
1613 *
1614 * Drop a reference on a task name.
1615 */
1616 void
1617 task_name_deallocate(
1618 task_name_t task_name)
1619 {
1620 return(task_deallocate((task_t)task_name));
1621 }
1622
1623 /*
1624 * task_inspect_deallocate:
1625 *
1626 * Drop a task inspection reference.
1627 */
1628 void
1629 task_inspect_deallocate(
1630 task_inspect_t task_inspect)
1631 {
1632 return(task_deallocate((task_t)task_inspect));
1633 }
1634
1635 /*
1636 * task_suspension_token_deallocate:
1637 *
1638 * Drop a reference on a task suspension token.
1639 */
1640 void
1641 task_suspension_token_deallocate(
1642 task_suspension_token_t token)
1643 {
1644 return(task_deallocate((task_t)token));
1645 }
1646
1647
1648 /*
1649 * task_collect_crash_info:
1650 *
1651 * collect crash info from bsd and mach based data
1652 */
1653 kern_return_t
1654 task_collect_crash_info(
1655 task_t task,
1656 #ifdef CONFIG_MACF
1657 struct label *crash_label,
1658 #endif
1659 int is_corpse_fork)
1660 {
1661 kern_return_t kr = KERN_SUCCESS;
1662
1663 kcdata_descriptor_t crash_data = NULL;
1664 kcdata_descriptor_t crash_data_release = NULL;
1665 mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1666 mach_vm_offset_t crash_data_ptr = 0;
1667 void *crash_data_kernel = NULL;
1668 void *crash_data_kernel_release = NULL;
1669 #if CONFIG_MACF
1670 struct label *label, *free_label;
1671 #endif
1672
1673 if (!corpses_enabled()) {
1674 return KERN_NOT_SUPPORTED;
1675 }
1676
1677 #if CONFIG_MACF
1678 free_label = label = mac_exc_create_label();
1679 #endif
1680
1681 task_lock(task);
1682
1683 assert(is_corpse_fork || task->bsd_info != NULL);
1684 if (task->corpse_info == NULL && (is_corpse_fork || task->bsd_info != NULL)) {
1685 #if CONFIG_MACF
1686 /* Set the crash label, used by the exception delivery mac hook */
1687 free_label = task->crash_label; // Most likely NULL.
1688 task->crash_label = label;
1689 mac_exc_update_task_crash_label(task, crash_label);
1690 #endif
1691 task_unlock(task);
1692
1693 crash_data_kernel = (void *) kalloc(CORPSEINFO_ALLOCATION_SIZE);
1694 if (crash_data_kernel == NULL) {
1695 kr = KERN_RESOURCE_SHORTAGE;
1696 goto out_no_lock;
1697 }
1698 bzero(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1699 crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
1700
1701 /* Do not get a corpse ref for corpse fork */
1702 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
1703 is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
1704 KCFLAG_USE_MEMCOPY);
1705 if (crash_data) {
1706 task_lock(task);
1707 crash_data_release = task->corpse_info;
1708 crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
1709 task->corpse_info = crash_data;
1710
1711 task_unlock(task);
1712 kr = KERN_SUCCESS;
1713 } else {
1714 kfree(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1715 kr = KERN_FAILURE;
1716 }
1717
1718 if (crash_data_release != NULL) {
1719 task_crashinfo_destroy(crash_data_release);
1720 }
1721 if (crash_data_kernel_release != NULL) {
1722 kfree(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
1723 }
1724 } else {
1725 task_unlock(task);
1726 }
1727
1728 out_no_lock:
1729 #if CONFIG_MACF
1730 if (free_label != NULL) {
1731 mac_exc_free_label(free_label);
1732 }
1733 #endif
1734 return kr;
1735 }
1736
1737 /*
1738 * task_deliver_crash_notification:
1739 *
1740 * Makes outcall to registered host port for a corpse.
1741 */
1742 kern_return_t
1743 task_deliver_crash_notification(
1744 task_t task,
1745 thread_t thread,
1746 exception_type_t etype,
1747 mach_exception_subcode_t subcode)
1748 {
1749 kcdata_descriptor_t crash_info = task->corpse_info;
1750 thread_t th_iter = NULL;
1751 kern_return_t kr = KERN_SUCCESS;
1752 wait_interrupt_t wsave;
1753 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1754 ipc_port_t task_port, old_notify;
1755
1756 if (crash_info == NULL)
1757 return KERN_FAILURE;
1758
1759 task_lock(task);
1760 if (task_is_a_corpse_fork(task)) {
1761 /* Populate code with EXC_{RESOURCE,GUARD} for corpse fork */
1762 code[0] = etype;
1763 code[1] = subcode;
1764 } else {
1765 /* Populate code with EXC_CRASH for corpses */
1766 code[0] = EXC_CRASH;
1767 code[1] = 0;
1768 /* Update the code[1] if the boot-arg corpse_for_fatal_memkill is set */
1769 if (corpse_for_fatal_memkill) {
1770 code[1] = subcode;
1771 }
1772 }
1773
1774 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1775 {
1776 if (th_iter->corpse_dup == FALSE) {
1777 ipc_thread_reset(th_iter);
1778 }
1779 }
1780 task_unlock(task);
1781
1782 /* Arm the no-sender notification for taskport */
1783 task_reference(task);
1784 task_port = convert_task_to_port(task);
1785 ip_lock(task_port);
1786 assert(ip_active(task_port));
1787 ipc_port_nsrequest(task_port, task_port->ip_mscount, ipc_port_make_sonce_locked(task_port), &old_notify);
1788 /* port unlocked */
1789 assert(IP_NULL == old_notify);
1790
1791 wsave = thread_interrupt_level(THREAD_UNINT);
1792 kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
1793 if (kr != KERN_SUCCESS) {
1794 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
1795 }
1796
1797 (void)thread_interrupt_level(wsave);
1798
1799 /*
1800 * Drop the send right on task port, will fire the
1801 * no-sender notification if exception deliver failed.
1802 */
1803 ipc_port_release_send(task_port);
1804 return kr;
1805 }
1806
1807 /*
1808 * task_terminate:
1809 *
1810 * Terminate the specified task. See comments on thread_terminate
1811 * (kern/thread.c) about problems with terminating the "current task."
1812 */
1813
1814 kern_return_t
1815 task_terminate(
1816 task_t task)
1817 {
1818 if (task == TASK_NULL)
1819 return (KERN_INVALID_ARGUMENT);
1820
1821 if (task->bsd_info)
1822 return (KERN_FAILURE);
1823
1824 return (task_terminate_internal(task));
1825 }
1826
1827 #if MACH_ASSERT
1828 extern int proc_pid(struct proc *);
1829 extern void proc_name_kdp(task_t t, char *buf, int size);
1830 #endif /* MACH_ASSERT */
1831
1832 #define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
1833 static void
1834 __unused task_partial_reap(task_t task, __unused int pid)
1835 {
1836 unsigned int reclaimed_resident = 0;
1837 unsigned int reclaimed_compressed = 0;
1838 uint64_t task_page_count;
1839
1840 task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1841
1842 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
1843 pid, task_page_count, 0, 0, 0);
1844
1845 vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1846
1847 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
1848 pid, reclaimed_resident, reclaimed_compressed, 0, 0);
1849 }
1850
1851 kern_return_t
1852 task_mark_corpse(task_t task)
1853 {
1854 kern_return_t kr = KERN_SUCCESS;
1855 thread_t self_thread;
1856 (void) self_thread;
1857 wait_interrupt_t wsave;
1858 #if CONFIG_MACF
1859 struct label *crash_label = NULL;
1860 #endif
1861
1862 assert(task != kernel_task);
1863 assert(task == current_task());
1864 assert(!task_is_a_corpse(task));
1865
1866 #if CONFIG_MACF
1867 crash_label = mac_exc_create_label_for_proc((struct proc*)task->bsd_info);
1868 #endif
1869
1870 kr = task_collect_crash_info(task,
1871 #if CONFIG_MACF
1872 crash_label,
1873 #endif
1874 FALSE);
1875 if (kr != KERN_SUCCESS) {
1876 goto out;
1877 }
1878
1879 self_thread = current_thread();
1880
1881 wsave = thread_interrupt_level(THREAD_UNINT);
1882 task_lock(task);
1883
1884 task_set_corpse_pending_report(task);
1885 task_set_corpse(task);
1886 task->crashed_thread_id = thread_tid(self_thread);
1887
1888 kr = task_start_halt_locked(task, TRUE);
1889 assert(kr == KERN_SUCCESS);
1890
1891 ipc_task_reset(task);
1892 /* Remove the naked send right for task port, needed to arm no sender notification */
1893 task_set_special_port(task, TASK_KERNEL_PORT, IPC_PORT_NULL);
1894 ipc_task_enable(task);
1895
1896 task_unlock(task);
1897 /* terminate the ipc space */
1898 ipc_space_terminate(task->itk_space);
1899
1900 /* Add it to global corpse task list */
1901 task_add_to_corpse_task_list(task);
1902
1903 task_start_halt(task);
1904 thread_terminate_internal(self_thread);
1905
1906 (void) thread_interrupt_level(wsave);
1907 assert(task->halting == TRUE);
1908
1909 out:
1910 #if CONFIG_MACF
1911 mac_exc_free_label(crash_label);
1912 #endif
1913 return kr;
1914 }
1915
1916 /*
1917 * task_clear_corpse
1918 *
1919 * Clears the corpse pending bit on task.
1920 * Removes inspection bit on the threads.
1921 */
1922 void
1923 task_clear_corpse(task_t task)
1924 {
1925 thread_t th_iter = NULL;
1926
1927 task_lock(task);
1928 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1929 {
1930 thread_mtx_lock(th_iter);
1931 th_iter->inspection = FALSE;
1932 thread_mtx_unlock(th_iter);
1933 }
1934
1935 thread_terminate_crashed_threads();
1936 /* remove the pending corpse report flag */
1937 task_clear_corpse_pending_report(task);
1938
1939 task_unlock(task);
1940 }
1941
1942 /*
1943 * task_port_notify
1944 *
1945 * Called whenever the Mach port system detects no-senders on
1946 * the task port of a corpse.
1947 * Each notification that comes in should terminate the task (corpse).
1948 */
1949 void
1950 task_port_notify(mach_msg_header_t *msg)
1951 {
1952 mach_no_senders_notification_t *notification = (void *)msg;
1953 ipc_port_t port = notification->not_header.msgh_remote_port;
1954 task_t task;
1955
1956 assert(ip_active(port));
1957 assert(IKOT_TASK == ip_kotype(port));
1958 task = (task_t) port->ip_kobject;
1959
1960 assert(task_is_a_corpse(task));
1961
1962 /* Remove the task from global corpse task list */
1963 task_remove_from_corpse_task_list(task);
1964
1965 task_clear_corpse(task);
1966 task_terminate_internal(task);
1967 }
1968
1969 /*
1970 * task_wait_till_threads_terminate_locked
1971 *
1972 * Wait till all the threads in the task are terminated.
1973 * Might release the task lock and re-acquire it.
1974 */
1975 void
1976 task_wait_till_threads_terminate_locked(task_t task)
1977 {
1978 /* wait for all the threads in the task to terminate */
1979 while (task->active_thread_count != 0) {
1980 assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
1981 task_unlock(task);
1982 thread_block(THREAD_CONTINUE_NULL);
1983
1984 task_lock(task);
1985 }
1986 }
1987
1988 /*
1989 * task_duplicate_map_and_threads
1990 *
1991 * Copy vmmap of source task.
1992 * Copy active threads from source task to destination task.
1993 * Source task would be suspended during the copy.
1994 */
1995 kern_return_t
1996 task_duplicate_map_and_threads(
1997 task_t task,
1998 void *p,
1999 task_t new_task,
2000 thread_t *thread_ret,
2001 uint64_t **udata_buffer,
2002 int *size,
2003 int *num_udata)
2004 {
2005 kern_return_t kr = KERN_SUCCESS;
2006 int active;
2007 thread_t thread, self, thread_return = THREAD_NULL;
2008 thread_t new_thread = THREAD_NULL;
2009 thread_t *thread_array;
2010 uint32_t active_thread_count = 0, array_count = 0, i;
2011 vm_map_t oldmap;
2012 uint64_t *buffer = NULL;
2013 int buf_size = 0;
2014 int est_knotes = 0, num_knotes = 0;
2015
2016 self = current_thread();
2017
2018 /*
2019 * Suspend the task to copy thread state, use the internal
2020 * variant so that no user-space process can resume
2021 * the task from under us
2022 */
2023 kr = task_suspend_internal(task);
2024 if (kr != KERN_SUCCESS) {
2025 return kr;
2026 }
2027
2028 if (task->map->disable_vmentry_reuse == TRUE) {
2029 /*
2030 * Quite likely GuardMalloc (or some debugging tool)
2031 * is being used on this task. And it has gone through
2032 * its limit. Making a corpse will likely encounter
2033 * a lot of VM entries that will need COW.
2034 *
2035 * Skip it.
2036 */
2037 #if DEVELOPMENT || DEBUG
2038 memorystatus_abort_vm_map_fork(task);
2039 #endif
2040 task_resume_internal(task);
2041 return KERN_FAILURE;
2042 }
2043
2044 /* Check with VM if vm_map_fork is allowed for this task */
2045 if (memorystatus_allowed_vm_map_fork(task)) {
2046
2047 /* Setup new task's vmmap, switch from parent task's map to it COW map */
2048 oldmap = new_task->map;
2049 new_task->map = vm_map_fork(new_task->ledger,
2050 task->map,
2051 (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
2052 VM_MAP_FORK_PRESERVE_PURGEABLE));
2053 vm_map_deallocate(oldmap);
2054
2055 /* Get all the udata pointers from kqueue */
2056 est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
2057 if (est_knotes > 0) {
2058 buf_size = (est_knotes + 32) * sizeof(uint64_t);
2059 buffer = (uint64_t *) kalloc(buf_size);
2060 num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2061 if (num_knotes > est_knotes + 32) {
2062 num_knotes = est_knotes + 32;
2063 }
2064 }
2065 }
2066
2067 active_thread_count = task->active_thread_count;
2068 if (active_thread_count == 0) {
2069 if (buffer != NULL) {
2070 kfree(buffer, buf_size);
2071 }
2072 task_resume_internal(task);
2073 return KERN_FAILURE;
2074 }
2075
2076 thread_array = (thread_t *) kalloc(sizeof(thread_t) * active_thread_count);
2077
2078 /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2079 task_lock(task);
2080 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2081 /* Skip inactive threads */
2082 active = thread->active;
2083 if (!active) {
2084 continue;
2085 }
2086
2087 if (array_count >= active_thread_count) {
2088 break;
2089 }
2090
2091 thread_array[array_count++] = thread;
2092 thread_reference(thread);
2093 }
2094 task_unlock(task);
2095
2096 for (i = 0; i < array_count; i++) {
2097
2098 kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2099 if (kr != KERN_SUCCESS) {
2100 break;
2101 }
2102
2103 /* Equivalent of current thread in corpse */
2104 if (thread_array[i] == self) {
2105 thread_return = new_thread;
2106 new_task->crashed_thread_id = thread_tid(new_thread);
2107 } else {
2108 /* drop the extra ref returned by thread_create_with_continuation */
2109 thread_deallocate(new_thread);
2110 }
2111
2112 kr = thread_dup2(thread_array[i], new_thread);
2113 if (kr != KERN_SUCCESS) {
2114 thread_mtx_lock(new_thread);
2115 new_thread->corpse_dup = TRUE;
2116 thread_mtx_unlock(new_thread);
2117 continue;
2118 }
2119
2120 /* Copy thread name */
2121 bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread);
2122 thread_copy_resource_info(new_thread, thread_array[i]);
2123 }
2124
2125 task_resume_internal(task);
2126
2127 for (i = 0; i < array_count; i++) {
2128 thread_deallocate(thread_array[i]);
2129 }
2130 kfree(thread_array, sizeof(thread_t) * active_thread_count);
2131
2132 if (kr == KERN_SUCCESS) {
2133 *thread_ret = thread_return;
2134 *udata_buffer = buffer;
2135 *size = buf_size;
2136 *num_udata = num_knotes;
2137 } else {
2138 if (thread_return != THREAD_NULL) {
2139 thread_deallocate(thread_return);
2140 }
2141 if (buffer != NULL) {
2142 kfree(buffer, buf_size);
2143 }
2144 }
2145
2146 return kr;
2147 }
2148
2149 #if CONFIG_SECLUDED_MEMORY
2150 extern void task_set_can_use_secluded_mem_locked(
2151 task_t task,
2152 boolean_t can_use_secluded_mem);
2153 #endif /* CONFIG_SECLUDED_MEMORY */
2154
2155 kern_return_t
2156 task_terminate_internal(
2157 task_t task)
2158 {
2159 thread_t thread, self;
2160 task_t self_task;
2161 boolean_t interrupt_save;
2162 int pid = 0;
2163
2164 assert(task != kernel_task);
2165
2166 self = current_thread();
2167 self_task = self->task;
2168
2169 /*
2170 * Get the task locked and make sure that we are not racing
2171 * with someone else trying to terminate us.
2172 */
2173 if (task == self_task)
2174 task_lock(task);
2175 else
2176 if (task < self_task) {
2177 task_lock(task);
2178 task_lock(self_task);
2179 }
2180 else {
2181 task_lock(self_task);
2182 task_lock(task);
2183 }
2184
2185 #if CONFIG_SECLUDED_MEMORY
2186 if (task->task_can_use_secluded_mem) {
2187 task_set_can_use_secluded_mem_locked(task, FALSE);
2188 }
2189 task->task_could_use_secluded_mem = FALSE;
2190 task->task_could_also_use_secluded_mem = FALSE;
2191 #endif /* CONFIG_SECLUDED_MEMORY */
2192
2193 if (!task->active) {
2194 /*
2195 * Task is already being terminated.
2196 * Just return an error. If we are dying, this will
2197 * just get us to our AST special handler and that
2198 * will get us to finalize the termination of ourselves.
2199 */
2200 task_unlock(task);
2201 if (self_task != task)
2202 task_unlock(self_task);
2203
2204 return (KERN_FAILURE);
2205 }
2206
2207 if (task_corpse_pending_report(task)) {
2208 /*
2209 * Task is marked for reporting as corpse.
2210 * Just return an error. This will
2211 * just get us to our AST special handler and that
2212 * will get us to finish the path to death
2213 */
2214 task_unlock(task);
2215 if (self_task != task)
2216 task_unlock(self_task);
2217
2218 return (KERN_FAILURE);
2219 }
2220
2221 if (self_task != task)
2222 task_unlock(self_task);
2223
2224 /*
2225 * Make sure the current thread does not get aborted out of
2226 * the waits inside these operations.
2227 */
2228 interrupt_save = thread_interrupt_level(THREAD_UNINT);
2229
2230 /*
2231 * Indicate that we want all the threads to stop executing
2232 * at user space by holding the task (we would have held
2233 * each thread independently in thread_terminate_internal -
2234 * but this way we may be more likely to already find it
2235 * held there). Mark the task inactive, and prevent
2236 * further task operations via the task port.
2237 */
2238 task_hold_locked(task);
2239 task->active = FALSE;
2240 ipc_task_disable(task);
2241
2242 #if CONFIG_TELEMETRY
2243 /*
2244 * Notify telemetry that this task is going away.
2245 */
2246 telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2247 #endif
2248
2249 /*
2250 * Terminate each thread in the task.
2251 */
2252 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2253 thread_terminate_internal(thread);
2254 }
2255
2256 #ifdef MACH_BSD
2257 if (task->bsd_info != NULL && !task_is_exec_copy(task)) {
2258 pid = proc_pid(task->bsd_info);
2259 }
2260 #endif /* MACH_BSD */
2261
2262 task_unlock(task);
2263
2264 proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
2265 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
2266
2267 /* Early object reap phase */
2268
2269 // PR-17045188: Revisit implementation
2270 // task_partial_reap(task, pid);
2271
2272 #if CONFIG_EMBEDDED
2273 /*
2274 * remove all task watchers
2275 */
2276 task_removewatchers(task);
2277
2278 #endif /* CONFIG_EMBEDDED */
2279
2280 /*
2281 * Destroy all synchronizers owned by the task.
2282 */
2283 task_synchronizer_destroy_all(task);
2284
2285 /*
2286 * Destroy the IPC space, leaving just a reference for it.
2287 */
2288 ipc_space_terminate(task->itk_space);
2289
2290 #if 00
2291 /* if some ledgers go negative on tear-down again... */
2292 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2293 task_ledgers.phys_footprint);
2294 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2295 task_ledgers.internal);
2296 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2297 task_ledgers.internal_compressed);
2298 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2299 task_ledgers.iokit_mapped);
2300 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2301 task_ledgers.alternate_accounting);
2302 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2303 task_ledgers.alternate_accounting_compressed);
2304 #endif
2305
2306 /*
2307 * If the current thread is a member of the task
2308 * being terminated, then the last reference to
2309 * the task will not be dropped until the thread
2310 * is finally reaped. To avoid incurring the
2311 * expense of removing the address space regions
2312 * at reap time, we do it explictly here.
2313 */
2314
2315 vm_map_lock(task->map);
2316 vm_map_disable_hole_optimization(task->map);
2317 vm_map_unlock(task->map);
2318
2319 #if MACH_ASSERT
2320 /*
2321 * Identify the pmap's process, in case the pmap ledgers drift
2322 * and we have to report it.
2323 */
2324 char procname[17];
2325 if (task->bsd_info && !task_is_exec_copy(task)) {
2326 pid = proc_pid(task->bsd_info);
2327 proc_name_kdp(task, procname, sizeof (procname));
2328 } else {
2329 pid = 0;
2330 strlcpy(procname, "<unknown>", sizeof (procname));
2331 }
2332 pmap_set_process(task->map->pmap, pid, procname);
2333 #endif /* MACH_ASSERT */
2334
2335 vm_map_remove(task->map,
2336 task->map->min_offset,
2337 task->map->max_offset,
2338 /*
2339 * Final cleanup:
2340 * + no unnesting
2341 * + remove immutable mappings
2342 */
2343 (VM_MAP_REMOVE_NO_UNNESTING |
2344 VM_MAP_REMOVE_IMMUTABLE));
2345
2346 /* release our shared region */
2347 vm_shared_region_set(task, NULL);
2348
2349
2350 lck_mtx_lock(&tasks_threads_lock);
2351 queue_remove(&tasks, task, task_t, tasks);
2352 queue_enter(&terminated_tasks, task, task_t, tasks);
2353 tasks_count--;
2354 terminated_tasks_count++;
2355 lck_mtx_unlock(&tasks_threads_lock);
2356
2357 /*
2358 * We no longer need to guard against being aborted, so restore
2359 * the previous interruptible state.
2360 */
2361 thread_interrupt_level(interrupt_save);
2362
2363 #if KPC
2364 /* force the task to release all ctrs */
2365 if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS)
2366 kpc_force_all_ctrs(task, 0);
2367 #endif /* KPC */
2368
2369 #if CONFIG_COALITIONS
2370 /*
2371 * Leave our coalitions. (drop activation but not reference)
2372 */
2373 coalitions_remove_task(task);
2374 #endif
2375
2376 /*
2377 * Get rid of the task active reference on itself.
2378 */
2379 task_deallocate(task);
2380
2381 return (KERN_SUCCESS);
2382 }
2383
2384 void
2385 tasks_system_suspend(boolean_t suspend)
2386 {
2387 task_t task;
2388
2389 lck_mtx_lock(&tasks_threads_lock);
2390 assert(tasks_suspend_state != suspend);
2391 tasks_suspend_state = suspend;
2392 queue_iterate(&tasks, task, task_t, tasks) {
2393 if (task == kernel_task) {
2394 continue;
2395 }
2396 suspend ? task_suspend_internal(task) : task_resume_internal(task);
2397 }
2398 lck_mtx_unlock(&tasks_threads_lock);
2399 }
2400
2401 /*
2402 * task_start_halt:
2403 *
2404 * Shut the current task down (except for the current thread) in
2405 * preparation for dramatic changes to the task (probably exec).
2406 * We hold the task and mark all other threads in the task for
2407 * termination.
2408 */
2409 kern_return_t
2410 task_start_halt(task_t task)
2411 {
2412 kern_return_t kr = KERN_SUCCESS;
2413 task_lock(task);
2414 kr = task_start_halt_locked(task, FALSE);
2415 task_unlock(task);
2416 return kr;
2417 }
2418
2419 static kern_return_t
2420 task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
2421 {
2422 thread_t thread, self;
2423 uint64_t dispatchqueue_offset;
2424
2425 assert(task != kernel_task);
2426
2427 self = current_thread();
2428
2429 if (task != self->task && !task_is_a_corpse_fork(task))
2430 return (KERN_INVALID_ARGUMENT);
2431
2432 if (task->halting || !task->active || !self->active) {
2433 /*
2434 * Task or current thread is already being terminated.
2435 * Hurry up and return out of the current kernel context
2436 * so that we run our AST special handler to terminate
2437 * ourselves.
2438 */
2439 return (KERN_FAILURE);
2440 }
2441
2442 task->halting = TRUE;
2443
2444 /*
2445 * Mark all the threads to keep them from starting any more
2446 * user-level execution. The thread_terminate_internal code
2447 * would do this on a thread by thread basis anyway, but this
2448 * gives us a better chance of not having to wait there.
2449 */
2450 task_hold_locked(task);
2451 dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
2452
2453 /*
2454 * Terminate all the other threads in the task.
2455 */
2456 queue_iterate(&task->threads, thread, thread_t, task_threads)
2457 {
2458 if (should_mark_corpse) {
2459 thread_mtx_lock(thread);
2460 thread->inspection = TRUE;
2461 thread_mtx_unlock(thread);
2462 }
2463 if (thread != self)
2464 thread_terminate_internal(thread);
2465 }
2466 task->dispatchqueue_offset = dispatchqueue_offset;
2467
2468 task_release_locked(task);
2469
2470 return KERN_SUCCESS;
2471 }
2472
2473
2474 /*
2475 * task_complete_halt:
2476 *
2477 * Complete task halt by waiting for threads to terminate, then clean
2478 * up task resources (VM, port namespace, etc...) and then let the
2479 * current thread go in the (practically empty) task context.
2480 *
2481 * Note: task->halting flag is not cleared in order to avoid creation
2482 * of new thread in old exec'ed task.
2483 */
2484 void
2485 task_complete_halt(task_t task)
2486 {
2487 task_lock(task);
2488 assert(task->halting);
2489 assert(task == current_task());
2490
2491 /*
2492 * Wait for the other threads to get shut down.
2493 * When the last other thread is reaped, we'll be
2494 * woken up.
2495 */
2496 if (task->thread_count > 1) {
2497 assert_wait((event_t)&task->halting, THREAD_UNINT);
2498 task_unlock(task);
2499 thread_block(THREAD_CONTINUE_NULL);
2500 } else {
2501 task_unlock(task);
2502 }
2503
2504 /*
2505 * Give the machine dependent code a chance
2506 * to perform cleanup of task-level resources
2507 * associated with the current thread before
2508 * ripping apart the task.
2509 */
2510 machine_task_terminate(task);
2511
2512 /*
2513 * Destroy all synchronizers owned by the task.
2514 */
2515 task_synchronizer_destroy_all(task);
2516
2517 /*
2518 * Destroy the contents of the IPC space, leaving just
2519 * a reference for it.
2520 */
2521 ipc_space_clean(task->itk_space);
2522
2523 /*
2524 * Clean out the address space, as we are going to be
2525 * getting a new one.
2526 */
2527 vm_map_remove(task->map, task->map->min_offset,
2528 task->map->max_offset,
2529 /*
2530 * Final cleanup:
2531 * + no unnesting
2532 * + remove immutable mappings
2533 */
2534 (VM_MAP_REMOVE_NO_UNNESTING |
2535 VM_MAP_REMOVE_IMMUTABLE));
2536
2537 /*
2538 * Kick out any IOKitUser handles to the task. At best they're stale,
2539 * at worst someone is racing a SUID exec.
2540 */
2541 iokit_task_terminate(task);
2542 }
2543
2544 /*
2545 * task_hold_locked:
2546 *
2547 * Suspend execution of the specified task.
2548 * This is a recursive-style suspension of the task, a count of
2549 * suspends is maintained.
2550 *
2551 * CONDITIONS: the task is locked and active.
2552 */
2553 void
2554 task_hold_locked(
2555 task_t task)
2556 {
2557 thread_t thread;
2558
2559 assert(task->active);
2560
2561 if (task->suspend_count++ > 0)
2562 return;
2563
2564 /*
2565 * Iterate through all the threads and hold them.
2566 */
2567 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2568 thread_mtx_lock(thread);
2569 thread_hold(thread);
2570 thread_mtx_unlock(thread);
2571 }
2572 }
2573
2574 /*
2575 * task_hold:
2576 *
2577 * Same as the internal routine above, except that is must lock
2578 * and verify that the task is active. This differs from task_suspend
2579 * in that it places a kernel hold on the task rather than just a
2580 * user-level hold. This keeps users from over resuming and setting
2581 * it running out from under the kernel.
2582 *
2583 * CONDITIONS: the caller holds a reference on the task
2584 */
2585 kern_return_t
2586 task_hold(
2587 task_t task)
2588 {
2589 if (task == TASK_NULL)
2590 return (KERN_INVALID_ARGUMENT);
2591
2592 task_lock(task);
2593
2594 if (!task->active) {
2595 task_unlock(task);
2596
2597 return (KERN_FAILURE);
2598 }
2599
2600 task_hold_locked(task);
2601 task_unlock(task);
2602
2603 return (KERN_SUCCESS);
2604 }
2605
2606 kern_return_t
2607 task_wait(
2608 task_t task,
2609 boolean_t until_not_runnable)
2610 {
2611 if (task == TASK_NULL)
2612 return (KERN_INVALID_ARGUMENT);
2613
2614 task_lock(task);
2615
2616 if (!task->active) {
2617 task_unlock(task);
2618
2619 return (KERN_FAILURE);
2620 }
2621
2622 task_wait_locked(task, until_not_runnable);
2623 task_unlock(task);
2624
2625 return (KERN_SUCCESS);
2626 }
2627
2628 /*
2629 * task_wait_locked:
2630 *
2631 * Wait for all threads in task to stop.
2632 *
2633 * Conditions:
2634 * Called with task locked, active, and held.
2635 */
2636 void
2637 task_wait_locked(
2638 task_t task,
2639 boolean_t until_not_runnable)
2640 {
2641 thread_t thread, self;
2642
2643 assert(task->active);
2644 assert(task->suspend_count > 0);
2645
2646 self = current_thread();
2647
2648 /*
2649 * Iterate through all the threads and wait for them to
2650 * stop. Do not wait for the current thread if it is within
2651 * the task.
2652 */
2653 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2654 if (thread != self)
2655 thread_wait(thread, until_not_runnable);
2656 }
2657 }
2658
2659 /*
2660 * task_release_locked:
2661 *
2662 * Release a kernel hold on a task.
2663 *
2664 * CONDITIONS: the task is locked and active
2665 */
2666 void
2667 task_release_locked(
2668 task_t task)
2669 {
2670 thread_t thread;
2671
2672 assert(task->active);
2673 assert(task->suspend_count > 0);
2674
2675 if (--task->suspend_count > 0)
2676 return;
2677
2678 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2679 thread_mtx_lock(thread);
2680 thread_release(thread);
2681 thread_mtx_unlock(thread);
2682 }
2683 }
2684
2685 /*
2686 * task_release:
2687 *
2688 * Same as the internal routine above, except that it must lock
2689 * and verify that the task is active.
2690 *
2691 * CONDITIONS: The caller holds a reference to the task
2692 */
2693 kern_return_t
2694 task_release(
2695 task_t task)
2696 {
2697 if (task == TASK_NULL)
2698 return (KERN_INVALID_ARGUMENT);
2699
2700 task_lock(task);
2701
2702 if (!task->active) {
2703 task_unlock(task);
2704
2705 return (KERN_FAILURE);
2706 }
2707
2708 task_release_locked(task);
2709 task_unlock(task);
2710
2711 return (KERN_SUCCESS);
2712 }
2713
2714 kern_return_t
2715 task_threads(
2716 task_t task,
2717 thread_act_array_t *threads_out,
2718 mach_msg_type_number_t *count)
2719 {
2720 mach_msg_type_number_t actual;
2721 thread_t *thread_list;
2722 thread_t thread;
2723 vm_size_t size, size_needed;
2724 void *addr;
2725 unsigned int i, j;
2726
2727 if (task == TASK_NULL)
2728 return (KERN_INVALID_ARGUMENT);
2729
2730 size = 0; addr = NULL;
2731
2732 for (;;) {
2733 task_lock(task);
2734 if (!task->active) {
2735 task_unlock(task);
2736
2737 if (size != 0)
2738 kfree(addr, size);
2739
2740 return (KERN_FAILURE);
2741 }
2742
2743 actual = task->thread_count;
2744
2745 /* do we have the memory we need? */
2746 size_needed = actual * sizeof (mach_port_t);
2747 if (size_needed <= size)
2748 break;
2749
2750 /* unlock the task and allocate more memory */
2751 task_unlock(task);
2752
2753 if (size != 0)
2754 kfree(addr, size);
2755
2756 assert(size_needed > 0);
2757 size = size_needed;
2758
2759 addr = kalloc(size);
2760 if (addr == 0)
2761 return (KERN_RESOURCE_SHORTAGE);
2762 }
2763
2764 /* OK, have memory and the task is locked & active */
2765 thread_list = (thread_t *)addr;
2766
2767 i = j = 0;
2768
2769 for (thread = (thread_t)queue_first(&task->threads); i < actual;
2770 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
2771 thread_reference_internal(thread);
2772 thread_list[j++] = thread;
2773 }
2774
2775 assert(queue_end(&task->threads, (queue_entry_t)thread));
2776
2777 actual = j;
2778 size_needed = actual * sizeof (mach_port_t);
2779
2780 /* can unlock task now that we've got the thread refs */
2781 task_unlock(task);
2782
2783 if (actual == 0) {
2784 /* no threads, so return null pointer and deallocate memory */
2785
2786 *threads_out = NULL;
2787 *count = 0;
2788
2789 if (size != 0)
2790 kfree(addr, size);
2791 }
2792 else {
2793 /* if we allocated too much, must copy */
2794
2795 if (size_needed < size) {
2796 void *newaddr;
2797
2798 newaddr = kalloc(size_needed);
2799 if (newaddr == 0) {
2800 for (i = 0; i < actual; ++i)
2801 thread_deallocate(thread_list[i]);
2802 kfree(addr, size);
2803 return (KERN_RESOURCE_SHORTAGE);
2804 }
2805
2806 bcopy(addr, newaddr, size_needed);
2807 kfree(addr, size);
2808 thread_list = (thread_t *)newaddr;
2809 }
2810
2811 *threads_out = thread_list;
2812 *count = actual;
2813
2814 /* do the conversion that Mig should handle */
2815
2816 for (i = 0; i < actual; ++i)
2817 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
2818 }
2819
2820 return (KERN_SUCCESS);
2821 }
2822
2823 #define TASK_HOLD_NORMAL 0
2824 #define TASK_HOLD_PIDSUSPEND 1
2825 #define TASK_HOLD_LEGACY 2
2826 #define TASK_HOLD_LEGACY_ALL 3
2827
2828 static kern_return_t
2829 place_task_hold (
2830 task_t task,
2831 int mode)
2832 {
2833 if (!task->active && !task_is_a_corpse(task)) {
2834 return (KERN_FAILURE);
2835 }
2836
2837 /* Return success for corpse task */
2838 if (task_is_a_corpse(task)) {
2839 return KERN_SUCCESS;
2840 }
2841
2842 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2843 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
2844 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2845 task->user_stop_count, task->user_stop_count + 1, 0);
2846
2847 #if MACH_ASSERT
2848 current_task()->suspends_outstanding++;
2849 #endif
2850
2851 if (mode == TASK_HOLD_LEGACY)
2852 task->legacy_stop_count++;
2853
2854 if (task->user_stop_count++ > 0) {
2855 /*
2856 * If the stop count was positive, the task is
2857 * already stopped and we can exit.
2858 */
2859 return (KERN_SUCCESS);
2860 }
2861
2862 /*
2863 * Put a kernel-level hold on the threads in the task (all
2864 * user-level task suspensions added together represent a
2865 * single kernel-level hold). We then wait for the threads
2866 * to stop executing user code.
2867 */
2868 task_hold_locked(task);
2869 task_wait_locked(task, FALSE);
2870
2871 return (KERN_SUCCESS);
2872 }
2873
2874 static kern_return_t
2875 release_task_hold (
2876 task_t task,
2877 int mode)
2878 {
2879 boolean_t release = FALSE;
2880
2881 if (!task->active && !task_is_a_corpse(task)) {
2882 return (KERN_FAILURE);
2883 }
2884
2885 /* Return success for corpse task */
2886 if (task_is_a_corpse(task)) {
2887 return KERN_SUCCESS;
2888 }
2889
2890 if (mode == TASK_HOLD_PIDSUSPEND) {
2891 if (task->pidsuspended == FALSE) {
2892 return (KERN_FAILURE);
2893 }
2894 task->pidsuspended = FALSE;
2895 }
2896
2897 if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
2898
2899 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2900 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
2901 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2902 task->user_stop_count, mode, task->legacy_stop_count);
2903
2904 #if MACH_ASSERT
2905 /*
2906 * This is obviously not robust; if we suspend one task and then resume a different one,
2907 * we'll fly under the radar. This is only meant to catch the common case of a crashed
2908 * or buggy suspender.
2909 */
2910 current_task()->suspends_outstanding--;
2911 #endif
2912
2913 if (mode == TASK_HOLD_LEGACY_ALL) {
2914 if (task->legacy_stop_count >= task->user_stop_count) {
2915 task->user_stop_count = 0;
2916 release = TRUE;
2917 } else {
2918 task->user_stop_count -= task->legacy_stop_count;
2919 }
2920 task->legacy_stop_count = 0;
2921 } else {
2922 if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
2923 task->legacy_stop_count--;
2924 if (--task->user_stop_count == 0)
2925 release = TRUE;
2926 }
2927 }
2928 else {
2929 return (KERN_FAILURE);
2930 }
2931
2932 /*
2933 * Release the task if necessary.
2934 */
2935 if (release)
2936 task_release_locked(task);
2937
2938 return (KERN_SUCCESS);
2939 }
2940
2941
2942 /*
2943 * task_suspend:
2944 *
2945 * Implement an (old-fashioned) user-level suspension on a task.
2946 *
2947 * Because the user isn't expecting to have to manage a suspension
2948 * token, we'll track it for him in the kernel in the form of a naked
2949 * send right to the task's resume port. All such send rights
2950 * account for a single suspension against the task (unlike task_suspend2()
2951 * where each caller gets a unique suspension count represented by a
2952 * unique send-once right).
2953 *
2954 * Conditions:
2955 * The caller holds a reference to the task
2956 */
2957 kern_return_t
2958 task_suspend(
2959 task_t task)
2960 {
2961 kern_return_t kr;
2962 mach_port_t port, send, old_notify;
2963 mach_port_name_t name;
2964
2965 if (task == TASK_NULL || task == kernel_task)
2966 return (KERN_INVALID_ARGUMENT);
2967
2968 task_lock(task);
2969
2970 /*
2971 * Claim a send right on the task resume port, and request a no-senders
2972 * notification on that port (if none outstanding).
2973 */
2974 if (task->itk_resume == IP_NULL) {
2975 task->itk_resume = ipc_port_alloc_kernel();
2976 if (!IP_VALID(task->itk_resume))
2977 panic("failed to create resume port");
2978 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
2979 }
2980
2981 port = task->itk_resume;
2982 ip_lock(port);
2983 assert(ip_active(port));
2984
2985 send = ipc_port_make_send_locked(port);
2986 assert(IP_VALID(send));
2987
2988 if (port->ip_nsrequest == IP_NULL) {
2989 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
2990 assert(old_notify == IP_NULL);
2991 /* port unlocked */
2992 } else {
2993 ip_unlock(port);
2994 }
2995
2996 /*
2997 * place a legacy hold on the task.
2998 */
2999 kr = place_task_hold(task, TASK_HOLD_LEGACY);
3000 if (kr != KERN_SUCCESS) {
3001 task_unlock(task);
3002 ipc_port_release_send(send);
3003 return kr;
3004 }
3005
3006 task_unlock(task);
3007
3008 /*
3009 * Copyout the send right into the calling task's IPC space. It won't know it is there,
3010 * but we'll look it up when calling a traditional resume. Any IPC operations that
3011 * deallocate the send right will auto-release the suspension.
3012 */
3013 if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
3014 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
3015 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
3016 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3017 task_pid(task), kr);
3018 return (kr);
3019 }
3020
3021 return (kr);
3022 }
3023
3024 /*
3025 * task_resume:
3026 * Release a user hold on a task.
3027 *
3028 * Conditions:
3029 * The caller holds a reference to the task
3030 */
3031 kern_return_t
3032 task_resume(
3033 task_t task)
3034 {
3035 kern_return_t kr;
3036 mach_port_name_t resume_port_name;
3037 ipc_entry_t resume_port_entry;
3038 ipc_space_t space = current_task()->itk_space;
3039
3040 if (task == TASK_NULL || task == kernel_task )
3041 return (KERN_INVALID_ARGUMENT);
3042
3043 /* release a legacy task hold */
3044 task_lock(task);
3045 kr = release_task_hold(task, TASK_HOLD_LEGACY);
3046 task_unlock(task);
3047
3048 is_write_lock(space);
3049 if (is_active(space) && IP_VALID(task->itk_resume) &&
3050 ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
3051 /*
3052 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
3053 * we are holding one less legacy hold on the task from this caller. If the release failed,
3054 * go ahead and drop all the rights, as someone either already released our holds or the task
3055 * is gone.
3056 */
3057 if (kr == KERN_SUCCESS)
3058 ipc_right_dealloc(space, resume_port_name, resume_port_entry);
3059 else
3060 ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
3061 /* space unlocked */
3062 } else {
3063 is_write_unlock(space);
3064 if (kr == KERN_SUCCESS)
3065 printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
3066 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3067 task_pid(task));
3068 }
3069
3070 return kr;
3071 }
3072
3073 /*
3074 * Suspend the target task.
3075 * Making/holding a token/reference/port is the callers responsibility.
3076 */
3077 kern_return_t
3078 task_suspend_internal(task_t task)
3079 {
3080 kern_return_t kr;
3081
3082 if (task == TASK_NULL || task == kernel_task)
3083 return (KERN_INVALID_ARGUMENT);
3084
3085 task_lock(task);
3086 kr = place_task_hold(task, TASK_HOLD_NORMAL);
3087 task_unlock(task);
3088 return (kr);
3089 }
3090
3091 /*
3092 * Suspend the target task, and return a suspension token. The token
3093 * represents a reference on the suspended task.
3094 */
3095 kern_return_t
3096 task_suspend2(
3097 task_t task,
3098 task_suspension_token_t *suspend_token)
3099 {
3100 kern_return_t kr;
3101
3102 kr = task_suspend_internal(task);
3103 if (kr != KERN_SUCCESS) {
3104 *suspend_token = TASK_NULL;
3105 return (kr);
3106 }
3107
3108 /*
3109 * Take a reference on the target task and return that to the caller
3110 * as a "suspension token," which can be converted into an SO right to
3111 * the now-suspended task's resume port.
3112 */
3113 task_reference_internal(task);
3114 *suspend_token = task;
3115
3116 return (KERN_SUCCESS);
3117 }
3118
3119 /*
3120 * Resume the task
3121 * (reference/token/port management is caller's responsibility).
3122 */
3123 kern_return_t
3124 task_resume_internal(
3125 task_suspension_token_t task)
3126 {
3127 kern_return_t kr;
3128
3129 if (task == TASK_NULL || task == kernel_task)
3130 return (KERN_INVALID_ARGUMENT);
3131
3132 task_lock(task);
3133 kr = release_task_hold(task, TASK_HOLD_NORMAL);
3134 task_unlock(task);
3135 return (kr);
3136 }
3137
3138 /*
3139 * Resume the task using a suspension token. Consumes the token's ref.
3140 */
3141 kern_return_t
3142 task_resume2(
3143 task_suspension_token_t task)
3144 {
3145 kern_return_t kr;
3146
3147 kr = task_resume_internal(task);
3148 task_suspension_token_deallocate(task);
3149
3150 return (kr);
3151 }
3152
3153 boolean_t
3154 task_suspension_notify(mach_msg_header_t *request_header)
3155 {
3156 ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
3157 task_t task = convert_port_to_task_suspension_token(port);
3158 mach_msg_type_number_t not_count;
3159
3160 if (task == TASK_NULL || task == kernel_task)
3161 return TRUE; /* nothing to do */
3162
3163 switch (request_header->msgh_id) {
3164
3165 case MACH_NOTIFY_SEND_ONCE:
3166 /* release the hold held by this specific send-once right */
3167 task_lock(task);
3168 release_task_hold(task, TASK_HOLD_NORMAL);
3169 task_unlock(task);
3170 break;
3171
3172 case MACH_NOTIFY_NO_SENDERS:
3173 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
3174
3175 task_lock(task);
3176 ip_lock(port);
3177 if (port->ip_mscount == not_count) {
3178
3179 /* release all the [remaining] outstanding legacy holds */
3180 assert(port->ip_nsrequest == IP_NULL);
3181 ip_unlock(port);
3182 release_task_hold(task, TASK_HOLD_LEGACY_ALL);
3183 task_unlock(task);
3184
3185 } else if (port->ip_nsrequest == IP_NULL) {
3186 ipc_port_t old_notify;
3187
3188 task_unlock(task);
3189 /* new send rights, re-arm notification at current make-send count */
3190 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3191 assert(old_notify == IP_NULL);
3192 /* port unlocked */
3193 } else {
3194 ip_unlock(port);
3195 task_unlock(task);
3196 }
3197 break;
3198
3199 default:
3200 break;
3201 }
3202
3203 task_suspension_token_deallocate(task); /* drop token reference */
3204 return TRUE;
3205 }
3206
3207 kern_return_t
3208 task_pidsuspend_locked(task_t task)
3209 {
3210 kern_return_t kr;
3211
3212 if (task->pidsuspended) {
3213 kr = KERN_FAILURE;
3214 goto out;
3215 }
3216
3217 task->pidsuspended = TRUE;
3218
3219 kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
3220 if (kr != KERN_SUCCESS) {
3221 task->pidsuspended = FALSE;
3222 }
3223 out:
3224 return(kr);
3225 }
3226
3227
3228 /*
3229 * task_pidsuspend:
3230 *
3231 * Suspends a task by placing a hold on its threads.
3232 *
3233 * Conditions:
3234 * The caller holds a reference to the task
3235 */
3236 kern_return_t
3237 task_pidsuspend(
3238 task_t task)
3239 {
3240 kern_return_t kr;
3241
3242 if (task == TASK_NULL || task == kernel_task)
3243 return (KERN_INVALID_ARGUMENT);
3244
3245 task_lock(task);
3246
3247 kr = task_pidsuspend_locked(task);
3248
3249 task_unlock(task);
3250
3251 return (kr);
3252 }
3253
3254 /*
3255 * task_pidresume:
3256 * Resumes a previously suspended task.
3257 *
3258 * Conditions:
3259 * The caller holds a reference to the task
3260 */
3261 kern_return_t
3262 task_pidresume(
3263 task_t task)
3264 {
3265 kern_return_t kr;
3266
3267 if (task == TASK_NULL || task == kernel_task)
3268 return (KERN_INVALID_ARGUMENT);
3269
3270 task_lock(task);
3271
3272 #if CONFIG_FREEZE
3273
3274 while (task->changing_freeze_state) {
3275
3276 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3277 task_unlock(task);
3278 thread_block(THREAD_CONTINUE_NULL);
3279
3280 task_lock(task);
3281 }
3282 task->changing_freeze_state = TRUE;
3283 #endif
3284
3285 kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
3286
3287 task_unlock(task);
3288
3289 #if CONFIG_FREEZE
3290
3291 task_lock(task);
3292
3293 if (kr == KERN_SUCCESS)
3294 task->frozen = FALSE;
3295 task->changing_freeze_state = FALSE;
3296 thread_wakeup(&task->changing_freeze_state);
3297
3298 task_unlock(task);
3299 #endif
3300
3301 return (kr);
3302 }
3303
3304
3305 #if DEVELOPMENT || DEBUG
3306
3307 extern void IOSleep(int);
3308
3309 kern_return_t
3310 task_disconnect_page_mappings(task_t task)
3311 {
3312 int n;
3313
3314 if (task == TASK_NULL || task == kernel_task)
3315 return (KERN_INVALID_ARGUMENT);
3316
3317 /*
3318 * this function is used to strip all of the mappings from
3319 * the pmap for the specified task to force the task to
3320 * re-fault all of the pages it is actively using... this
3321 * allows us to approximate the true working set of the
3322 * specified task. We only engage if at least 1 of the
3323 * threads in the task is runnable, but we want to continuously
3324 * sweep (at least for a while - I've arbitrarily set the limit at
3325 * 100 sweeps to be re-looked at as we gain experience) to get a better
3326 * view into what areas within a page are being visited (as opposed to only
3327 * seeing the first fault of a page after the task becomes
3328 * runnable)... in the future I may
3329 * try to block until awakened by a thread in this task
3330 * being made runnable, but for now we'll periodically poll from the
3331 * user level debug tool driving the sysctl
3332 */
3333 for (n = 0; n < 100; n++) {
3334 thread_t thread;
3335 boolean_t runnable;
3336 boolean_t do_unnest;
3337 int page_count;
3338
3339 runnable = FALSE;
3340 do_unnest = FALSE;
3341
3342 task_lock(task);
3343
3344 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3345
3346 if (thread->state & TH_RUN) {
3347 runnable = TRUE;
3348 break;
3349 }
3350 }
3351 if (n == 0)
3352 task->task_disconnected_count++;
3353
3354 if (task->task_unnested == FALSE) {
3355 if (runnable == TRUE) {
3356 task->task_unnested = TRUE;
3357 do_unnest = TRUE;
3358 }
3359 }
3360 task_unlock(task);
3361
3362 if (runnable == FALSE)
3363 break;
3364
3365 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
3366 task, do_unnest, task->task_disconnected_count, 0, 0);
3367
3368 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
3369
3370 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
3371 task, page_count, 0, 0, 0);
3372
3373 if ((n % 5) == 4)
3374 IOSleep(1);
3375 }
3376 return (KERN_SUCCESS);
3377 }
3378
3379 #endif
3380
3381
3382 #if CONFIG_FREEZE
3383
3384 /*
3385 * task_freeze:
3386 *
3387 * Freeze a task.
3388 *
3389 * Conditions:
3390 * The caller holds a reference to the task
3391 */
3392 extern void vm_wake_compactor_swapper(void);
3393 extern queue_head_t c_swapout_list_head;
3394
3395 kern_return_t
3396 task_freeze(
3397 task_t task,
3398 uint32_t *purgeable_count,
3399 uint32_t *wired_count,
3400 uint32_t *clean_count,
3401 uint32_t *dirty_count,
3402 uint32_t dirty_budget,
3403 boolean_t *shared,
3404 boolean_t walk_only)
3405 {
3406 kern_return_t kr = KERN_SUCCESS;
3407
3408 if (task == TASK_NULL || task == kernel_task)
3409 return (KERN_INVALID_ARGUMENT);
3410
3411 task_lock(task);
3412
3413 while (task->changing_freeze_state) {
3414
3415 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3416 task_unlock(task);
3417 thread_block(THREAD_CONTINUE_NULL);
3418
3419 task_lock(task);
3420 }
3421 if (task->frozen) {
3422 task_unlock(task);
3423 return (KERN_FAILURE);
3424 }
3425 task->changing_freeze_state = TRUE;
3426
3427 task_unlock(task);
3428
3429 if (walk_only) {
3430 panic("task_freeze - walk_only == TRUE");
3431 } else {
3432 kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
3433 }
3434
3435 task_lock(task);
3436
3437 if (walk_only == FALSE && kr == KERN_SUCCESS)
3438 task->frozen = TRUE;
3439 task->changing_freeze_state = FALSE;
3440 thread_wakeup(&task->changing_freeze_state);
3441
3442 task_unlock(task);
3443
3444 if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3445 vm_wake_compactor_swapper();
3446 /*
3447 * We do an explicit wakeup of the swapout thread here
3448 * because the compact_and_swap routines don't have
3449 * knowledge about these kind of "per-task packed c_segs"
3450 * and so will not be evaluating whether we need to do
3451 * a wakeup there.
3452 */
3453 thread_wakeup((event_t)&c_swapout_list_head);
3454 }
3455
3456 return (kr);
3457 }
3458
3459 /*
3460 * task_thaw:
3461 *
3462 * Thaw a currently frozen task.
3463 *
3464 * Conditions:
3465 * The caller holds a reference to the task
3466 */
3467 kern_return_t
3468 task_thaw(
3469 task_t task)
3470 {
3471 if (task == TASK_NULL || task == kernel_task)
3472 return (KERN_INVALID_ARGUMENT);
3473
3474 task_lock(task);
3475
3476 while (task->changing_freeze_state) {
3477
3478 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3479 task_unlock(task);
3480 thread_block(THREAD_CONTINUE_NULL);
3481
3482 task_lock(task);
3483 }
3484 if (!task->frozen) {
3485 task_unlock(task);
3486 return (KERN_FAILURE);
3487 }
3488 task->frozen = FALSE;
3489
3490 task_unlock(task);
3491
3492 return (KERN_SUCCESS);
3493 }
3494
3495 #endif /* CONFIG_FREEZE */
3496
3497 kern_return_t
3498 host_security_set_task_token(
3499 host_security_t host_security,
3500 task_t task,
3501 security_token_t sec_token,
3502 audit_token_t audit_token,
3503 host_priv_t host_priv)
3504 {
3505 ipc_port_t host_port;
3506 kern_return_t kr;
3507
3508 if (task == TASK_NULL)
3509 return(KERN_INVALID_ARGUMENT);
3510
3511 if (host_security == HOST_NULL)
3512 return(KERN_INVALID_SECURITY);
3513
3514 task_lock(task);
3515 task->sec_token = sec_token;
3516 task->audit_token = audit_token;
3517
3518 task_unlock(task);
3519
3520 if (host_priv != HOST_PRIV_NULL) {
3521 kr = host_get_host_priv_port(host_priv, &host_port);
3522 } else {
3523 kr = host_get_host_port(host_priv_self(), &host_port);
3524 }
3525 assert(kr == KERN_SUCCESS);
3526 kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
3527 return(kr);
3528 }
3529
3530 kern_return_t
3531 task_send_trace_memory(
3532 task_t target_task,
3533 __unused uint32_t pid,
3534 __unused uint64_t uniqueid)
3535 {
3536 kern_return_t kr = KERN_INVALID_ARGUMENT;
3537 if (target_task == TASK_NULL)
3538 return (KERN_INVALID_ARGUMENT);
3539
3540 #if CONFIG_ATM
3541 kr = atm_send_proc_inspect_notification(target_task,
3542 pid,
3543 uniqueid);
3544
3545 #endif
3546 return (kr);
3547 }
3548 /*
3549 * This routine was added, pretty much exclusively, for registering the
3550 * RPC glue vector for in-kernel short circuited tasks. Rather than
3551 * removing it completely, I have only disabled that feature (which was
3552 * the only feature at the time). It just appears that we are going to
3553 * want to add some user data to tasks in the future (i.e. bsd info,
3554 * task names, etc...), so I left it in the formal task interface.
3555 */
3556 kern_return_t
3557 task_set_info(
3558 task_t task,
3559 task_flavor_t flavor,
3560 __unused task_info_t task_info_in, /* pointer to IN array */
3561 __unused mach_msg_type_number_t task_info_count)
3562 {
3563 if (task == TASK_NULL)
3564 return(KERN_INVALID_ARGUMENT);
3565
3566 switch (flavor) {
3567
3568 #if CONFIG_ATM
3569 case TASK_TRACE_MEMORY_INFO:
3570 {
3571 if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
3572 return (KERN_INVALID_ARGUMENT);
3573
3574 assert(task_info_in != NULL);
3575 task_trace_memory_info_t mem_info;
3576 mem_info = (task_trace_memory_info_t) task_info_in;
3577 kern_return_t kr = atm_register_trace_memory(task,
3578 mem_info->user_memory_address,
3579 mem_info->buffer_size);
3580 return kr;
3581 }
3582
3583 #endif
3584 default:
3585 return (KERN_INVALID_ARGUMENT);
3586 }
3587 return (KERN_SUCCESS);
3588 }
3589
3590 int radar_20146450 = 1;
3591 kern_return_t
3592 task_info(
3593 task_t task,
3594 task_flavor_t flavor,
3595 task_info_t task_info_out,
3596 mach_msg_type_number_t *task_info_count)
3597 {
3598 kern_return_t error = KERN_SUCCESS;
3599 mach_msg_type_number_t original_task_info_count;
3600
3601 if (task == TASK_NULL)
3602 return (KERN_INVALID_ARGUMENT);
3603
3604 original_task_info_count = *task_info_count;
3605 task_lock(task);
3606
3607 if ((task != current_task()) && (!task->active)) {
3608 task_unlock(task);
3609 return (KERN_INVALID_ARGUMENT);
3610 }
3611
3612 switch (flavor) {
3613
3614 case TASK_BASIC_INFO_32:
3615 case TASK_BASIC2_INFO_32:
3616 #if defined(__arm__) || defined(__arm64__)
3617 case TASK_BASIC_INFO_64:
3618 #endif
3619 {
3620 task_basic_info_32_t basic_info;
3621 vm_map_t map;
3622 clock_sec_t secs;
3623 clock_usec_t usecs;
3624
3625 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
3626 error = KERN_INVALID_ARGUMENT;
3627 break;
3628 }
3629
3630 basic_info = (task_basic_info_32_t)task_info_out;
3631
3632 map = (task == kernel_task)? kernel_map: task->map;
3633 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
3634 if (flavor == TASK_BASIC2_INFO_32) {
3635 /*
3636 * The "BASIC2" flavor gets the maximum resident
3637 * size instead of the current resident size...
3638 */
3639 basic_info->resident_size = pmap_resident_max(map->pmap);
3640 } else {
3641 basic_info->resident_size = pmap_resident_count(map->pmap);
3642 }
3643 basic_info->resident_size *= PAGE_SIZE;
3644
3645 basic_info->policy = ((task != kernel_task)?
3646 POLICY_TIMESHARE: POLICY_RR);
3647 basic_info->suspend_count = task->user_stop_count;
3648
3649 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3650 basic_info->user_time.seconds =
3651 (typeof(basic_info->user_time.seconds))secs;
3652 basic_info->user_time.microseconds = usecs;
3653
3654 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3655 basic_info->system_time.seconds =
3656 (typeof(basic_info->system_time.seconds))secs;
3657 basic_info->system_time.microseconds = usecs;
3658
3659 *task_info_count = TASK_BASIC_INFO_32_COUNT;
3660 break;
3661 }
3662
3663 #if defined(__arm__) || defined(__arm64__)
3664 case TASK_BASIC_INFO_64_2:
3665 {
3666 task_basic_info_64_2_t basic_info;
3667 vm_map_t map;
3668 clock_sec_t secs;
3669 clock_usec_t usecs;
3670
3671 if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
3672 error = KERN_INVALID_ARGUMENT;
3673 break;
3674 }
3675
3676 basic_info = (task_basic_info_64_2_t)task_info_out;
3677
3678 map = (task == kernel_task)? kernel_map: task->map;
3679 basic_info->virtual_size = map->size;
3680 basic_info->resident_size =
3681 (mach_vm_size_t)(pmap_resident_count(map->pmap))
3682 * PAGE_SIZE_64;
3683
3684 basic_info->policy = ((task != kernel_task)?
3685 POLICY_TIMESHARE: POLICY_RR);
3686 basic_info->suspend_count = task->user_stop_count;
3687
3688 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3689 basic_info->user_time.seconds =
3690 (typeof(basic_info->user_time.seconds))secs;
3691 basic_info->user_time.microseconds = usecs;
3692
3693 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3694 basic_info->system_time.seconds =
3695 (typeof(basic_info->system_time.seconds))secs;
3696 basic_info->system_time.microseconds = usecs;
3697
3698 *task_info_count = TASK_BASIC_INFO_64_2_COUNT;
3699 break;
3700 }
3701
3702 #else /* defined(__arm__) || defined(__arm64__) */
3703 case TASK_BASIC_INFO_64:
3704 {
3705 task_basic_info_64_t basic_info;
3706 vm_map_t map;
3707 clock_sec_t secs;
3708 clock_usec_t usecs;
3709
3710 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
3711 error = KERN_INVALID_ARGUMENT;
3712 break;
3713 }
3714
3715 basic_info = (task_basic_info_64_t)task_info_out;
3716
3717 map = (task == kernel_task)? kernel_map: task->map;
3718 basic_info->virtual_size = map->size;
3719 basic_info->resident_size =
3720 (mach_vm_size_t)(pmap_resident_count(map->pmap))
3721 * PAGE_SIZE_64;
3722
3723 basic_info->policy = ((task != kernel_task)?
3724 POLICY_TIMESHARE: POLICY_RR);
3725 basic_info->suspend_count = task->user_stop_count;
3726
3727 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3728 basic_info->user_time.seconds =
3729 (typeof(basic_info->user_time.seconds))secs;
3730 basic_info->user_time.microseconds = usecs;
3731
3732 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3733 basic_info->system_time.seconds =
3734 (typeof(basic_info->system_time.seconds))secs;
3735 basic_info->system_time.microseconds = usecs;
3736
3737 *task_info_count = TASK_BASIC_INFO_64_COUNT;
3738 break;
3739 }
3740 #endif /* defined(__arm__) || defined(__arm64__) */
3741
3742 case MACH_TASK_BASIC_INFO:
3743 {
3744 mach_task_basic_info_t basic_info;
3745 vm_map_t map;
3746 clock_sec_t secs;
3747 clock_usec_t usecs;
3748
3749 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
3750 error = KERN_INVALID_ARGUMENT;
3751 break;
3752 }
3753
3754 basic_info = (mach_task_basic_info_t)task_info_out;
3755
3756 map = (task == kernel_task) ? kernel_map : task->map;
3757
3758 basic_info->virtual_size = map->size;
3759
3760 basic_info->resident_size =
3761 (mach_vm_size_t)(pmap_resident_count(map->pmap));
3762 basic_info->resident_size *= PAGE_SIZE_64;
3763
3764 basic_info->resident_size_max =
3765 (mach_vm_size_t)(pmap_resident_max(map->pmap));
3766 basic_info->resident_size_max *= PAGE_SIZE_64;
3767
3768 basic_info->policy = ((task != kernel_task) ?
3769 POLICY_TIMESHARE : POLICY_RR);
3770
3771 basic_info->suspend_count = task->user_stop_count;
3772
3773 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3774 basic_info->user_time.seconds =
3775 (typeof(basic_info->user_time.seconds))secs;
3776 basic_info->user_time.microseconds = usecs;
3777
3778 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3779 basic_info->system_time.seconds =
3780 (typeof(basic_info->system_time.seconds))secs;
3781 basic_info->system_time.microseconds = usecs;
3782
3783 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
3784 break;
3785 }
3786
3787 case TASK_THREAD_TIMES_INFO:
3788 {
3789 task_thread_times_info_t times_info;
3790 thread_t thread;
3791
3792 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
3793 error = KERN_INVALID_ARGUMENT;
3794 break;
3795 }
3796
3797 times_info = (task_thread_times_info_t) task_info_out;
3798 times_info->user_time.seconds = 0;
3799 times_info->user_time.microseconds = 0;
3800 times_info->system_time.seconds = 0;
3801 times_info->system_time.microseconds = 0;
3802
3803
3804 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3805 time_value_t user_time, system_time;
3806
3807 if (thread->options & TH_OPT_IDLE_THREAD)
3808 continue;
3809
3810 thread_read_times(thread, &user_time, &system_time);
3811
3812 time_value_add(&times_info->user_time, &user_time);
3813 time_value_add(&times_info->system_time, &system_time);
3814 }
3815
3816 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
3817 break;
3818 }
3819
3820 case TASK_ABSOLUTETIME_INFO:
3821 {
3822 task_absolutetime_info_t info;
3823 thread_t thread;
3824
3825 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
3826 error = KERN_INVALID_ARGUMENT;
3827 break;
3828 }
3829
3830 info = (task_absolutetime_info_t)task_info_out;
3831 info->threads_user = info->threads_system = 0;
3832
3833
3834 info->total_user = task->total_user_time;
3835 info->total_system = task->total_system_time;
3836
3837 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3838 uint64_t tval;
3839 spl_t x;
3840
3841 if (thread->options & TH_OPT_IDLE_THREAD)
3842 continue;
3843
3844 x = splsched();
3845 thread_lock(thread);
3846
3847 tval = timer_grab(&thread->user_timer);
3848 info->threads_user += tval;
3849 info->total_user += tval;
3850
3851 tval = timer_grab(&thread->system_timer);
3852 if (thread->precise_user_kernel_time) {
3853 info->threads_system += tval;
3854 info->total_system += tval;
3855 } else {
3856 /* system_timer may represent either sys or user */
3857 info->threads_user += tval;
3858 info->total_user += tval;
3859 }
3860
3861 thread_unlock(thread);
3862 splx(x);
3863 }
3864
3865
3866 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
3867 break;
3868 }
3869
3870 case TASK_DYLD_INFO:
3871 {
3872 task_dyld_info_t info;
3873
3874 /*
3875 * We added the format field to TASK_DYLD_INFO output. For
3876 * temporary backward compatibility, accept the fact that
3877 * clients may ask for the old version - distinquished by the
3878 * size of the expected result structure.
3879 */
3880 #define TASK_LEGACY_DYLD_INFO_COUNT \
3881 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
3882
3883 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
3884 error = KERN_INVALID_ARGUMENT;
3885 break;
3886 }
3887
3888 info = (task_dyld_info_t)task_info_out;
3889 info->all_image_info_addr = task->all_image_info_addr;
3890 info->all_image_info_size = task->all_image_info_size;
3891
3892 /* only set format on output for those expecting it */
3893 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
3894 info->all_image_info_format = task_has_64BitAddr(task) ?
3895 TASK_DYLD_ALL_IMAGE_INFO_64 :
3896 TASK_DYLD_ALL_IMAGE_INFO_32 ;
3897 *task_info_count = TASK_DYLD_INFO_COUNT;
3898 } else {
3899 *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
3900 }
3901 break;
3902 }
3903
3904 case TASK_EXTMOD_INFO:
3905 {
3906 task_extmod_info_t info;
3907 void *p;
3908
3909 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
3910 error = KERN_INVALID_ARGUMENT;
3911 break;
3912 }
3913
3914 info = (task_extmod_info_t)task_info_out;
3915
3916 p = get_bsdtask_info(task);
3917 if (p) {
3918 proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
3919 } else {
3920 bzero(info->task_uuid, sizeof(info->task_uuid));
3921 }
3922 info->extmod_statistics = task->extmod_statistics;
3923 *task_info_count = TASK_EXTMOD_INFO_COUNT;
3924
3925 break;
3926 }
3927
3928 case TASK_KERNELMEMORY_INFO:
3929 {
3930 task_kernelmemory_info_t tkm_info;
3931 ledger_amount_t credit, debit;
3932
3933 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
3934 error = KERN_INVALID_ARGUMENT;
3935 break;
3936 }
3937
3938 tkm_info = (task_kernelmemory_info_t) task_info_out;
3939 tkm_info->total_palloc = 0;
3940 tkm_info->total_pfree = 0;
3941 tkm_info->total_salloc = 0;
3942 tkm_info->total_sfree = 0;
3943
3944 if (task == kernel_task) {
3945 /*
3946 * All shared allocs/frees from other tasks count against
3947 * the kernel private memory usage. If we are looking up
3948 * info for the kernel task, gather from everywhere.
3949 */
3950 task_unlock(task);
3951
3952 /* start by accounting for all the terminated tasks against the kernel */
3953 tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
3954 tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
3955
3956 /* count all other task/thread shared alloc/free against the kernel */
3957 lck_mtx_lock(&tasks_threads_lock);
3958
3959 /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
3960 queue_iterate(&tasks, task, task_t, tasks) {
3961 if (task == kernel_task) {
3962 if (ledger_get_entries(task->ledger,
3963 task_ledgers.tkm_private, &credit,
3964 &debit) == KERN_SUCCESS) {
3965 tkm_info->total_palloc += credit;
3966 tkm_info->total_pfree += debit;
3967 }
3968 }
3969 if (!ledger_get_entries(task->ledger,
3970 task_ledgers.tkm_shared, &credit, &debit)) {
3971 tkm_info->total_palloc += credit;
3972 tkm_info->total_pfree += debit;
3973 }
3974 }
3975 lck_mtx_unlock(&tasks_threads_lock);
3976 } else {
3977 if (!ledger_get_entries(task->ledger,
3978 task_ledgers.tkm_private, &credit, &debit)) {
3979 tkm_info->total_palloc = credit;
3980 tkm_info->total_pfree = debit;
3981 }
3982 if (!ledger_get_entries(task->ledger,
3983 task_ledgers.tkm_shared, &credit, &debit)) {
3984 tkm_info->total_salloc = credit;
3985 tkm_info->total_sfree = debit;
3986 }
3987 task_unlock(task);
3988 }
3989
3990 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
3991 return KERN_SUCCESS;
3992 }
3993
3994 /* OBSOLETE */
3995 case TASK_SCHED_FIFO_INFO:
3996 {
3997
3998 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
3999 error = KERN_INVALID_ARGUMENT;
4000 break;
4001 }
4002
4003 error = KERN_INVALID_POLICY;
4004 break;
4005 }
4006
4007 /* OBSOLETE */
4008 case TASK_SCHED_RR_INFO:
4009 {
4010 policy_rr_base_t rr_base;
4011 uint32_t quantum_time;
4012 uint64_t quantum_ns;
4013
4014 if (*task_info_count < POLICY_RR_BASE_COUNT) {
4015 error = KERN_INVALID_ARGUMENT;
4016 break;
4017 }
4018
4019 rr_base = (policy_rr_base_t) task_info_out;
4020
4021 if (task != kernel_task) {
4022 error = KERN_INVALID_POLICY;
4023 break;
4024 }
4025
4026 rr_base->base_priority = task->priority;
4027
4028 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
4029 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
4030
4031 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
4032
4033 *task_info_count = POLICY_RR_BASE_COUNT;
4034 break;
4035 }
4036
4037 /* OBSOLETE */
4038 case TASK_SCHED_TIMESHARE_INFO:
4039 {
4040 policy_timeshare_base_t ts_base;
4041
4042 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
4043 error = KERN_INVALID_ARGUMENT;
4044 break;
4045 }
4046
4047 ts_base = (policy_timeshare_base_t) task_info_out;
4048
4049 if (task == kernel_task) {
4050 error = KERN_INVALID_POLICY;
4051 break;
4052 }
4053
4054 ts_base->base_priority = task->priority;
4055
4056 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
4057 break;
4058 }
4059
4060 case TASK_SECURITY_TOKEN:
4061 {
4062 security_token_t *sec_token_p;
4063
4064 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
4065 error = KERN_INVALID_ARGUMENT;
4066 break;
4067 }
4068
4069 sec_token_p = (security_token_t *) task_info_out;
4070
4071 *sec_token_p = task->sec_token;
4072
4073 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
4074 break;
4075 }
4076
4077 case TASK_AUDIT_TOKEN:
4078 {
4079 audit_token_t *audit_token_p;
4080
4081 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
4082 error = KERN_INVALID_ARGUMENT;
4083 break;
4084 }
4085
4086 audit_token_p = (audit_token_t *) task_info_out;
4087
4088 *audit_token_p = task->audit_token;
4089
4090 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
4091 break;
4092 }
4093
4094 case TASK_SCHED_INFO:
4095 error = KERN_INVALID_ARGUMENT;
4096 break;
4097
4098 case TASK_EVENTS_INFO:
4099 {
4100 task_events_info_t events_info;
4101 thread_t thread;
4102
4103 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
4104 error = KERN_INVALID_ARGUMENT;
4105 break;
4106 }
4107
4108 events_info = (task_events_info_t) task_info_out;
4109
4110
4111 events_info->faults = task->faults;
4112 events_info->pageins = task->pageins;
4113 events_info->cow_faults = task->cow_faults;
4114 events_info->messages_sent = task->messages_sent;
4115 events_info->messages_received = task->messages_received;
4116 events_info->syscalls_mach = task->syscalls_mach;
4117 events_info->syscalls_unix = task->syscalls_unix;
4118
4119 events_info->csw = task->c_switch;
4120
4121 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4122 events_info->csw += thread->c_switch;
4123 events_info->syscalls_mach += thread->syscalls_mach;
4124 events_info->syscalls_unix += thread->syscalls_unix;
4125 }
4126
4127
4128 *task_info_count = TASK_EVENTS_INFO_COUNT;
4129 break;
4130 }
4131 case TASK_AFFINITY_TAG_INFO:
4132 {
4133 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
4134 error = KERN_INVALID_ARGUMENT;
4135 break;
4136 }
4137
4138 error = task_affinity_info(task, task_info_out, task_info_count);
4139 break;
4140 }
4141 case TASK_POWER_INFO:
4142 {
4143 if (*task_info_count < TASK_POWER_INFO_COUNT) {
4144 error = KERN_INVALID_ARGUMENT;
4145 break;
4146 }
4147
4148 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL);
4149 break;
4150 }
4151
4152 case TASK_POWER_INFO_V2:
4153 {
4154 if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
4155 error = KERN_INVALID_ARGUMENT;
4156 break;
4157 }
4158 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
4159 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2);
4160 break;
4161 }
4162
4163 case TASK_VM_INFO:
4164 case TASK_VM_INFO_PURGEABLE:
4165 {
4166 task_vm_info_t vm_info;
4167 vm_map_t map;
4168
4169 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
4170 error = KERN_INVALID_ARGUMENT;
4171 break;
4172 }
4173
4174 vm_info = (task_vm_info_t)task_info_out;
4175
4176 if (task == kernel_task) {
4177 map = kernel_map;
4178 /* no lock */
4179 } else {
4180 map = task->map;
4181 vm_map_lock_read(map);
4182 }
4183
4184 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
4185 vm_info->region_count = map->hdr.nentries;
4186 vm_info->page_size = vm_map_page_size(map);
4187
4188 vm_info->resident_size = pmap_resident_count(map->pmap);
4189 vm_info->resident_size *= PAGE_SIZE;
4190 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
4191 vm_info->resident_size_peak *= PAGE_SIZE;
4192
4193 #define _VM_INFO(_name) \
4194 vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
4195
4196 _VM_INFO(device);
4197 _VM_INFO(device_peak);
4198 _VM_INFO(external);
4199 _VM_INFO(external_peak);
4200 _VM_INFO(internal);
4201 _VM_INFO(internal_peak);
4202 _VM_INFO(reusable);
4203 _VM_INFO(reusable_peak);
4204 _VM_INFO(compressed);
4205 _VM_INFO(compressed_peak);
4206 _VM_INFO(compressed_lifetime);
4207
4208 vm_info->purgeable_volatile_pmap = 0;
4209 vm_info->purgeable_volatile_resident = 0;
4210 vm_info->purgeable_volatile_virtual = 0;
4211 if (task == kernel_task) {
4212 /*
4213 * We do not maintain the detailed stats for the
4214 * kernel_pmap, so just count everything as
4215 * "internal"...
4216 */
4217 vm_info->internal = vm_info->resident_size;
4218 /*
4219 * ... but since the memory held by the VM compressor
4220 * in the kernel address space ought to be attributed
4221 * to user-space tasks, we subtract it from "internal"
4222 * to give memory reporting tools a more accurate idea
4223 * of what the kernel itself is actually using, instead
4224 * of making it look like the kernel is leaking memory
4225 * when the system is under memory pressure.
4226 */
4227 vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
4228 PAGE_SIZE);
4229 } else {
4230 mach_vm_size_t volatile_virtual_size;
4231 mach_vm_size_t volatile_resident_size;
4232 mach_vm_size_t volatile_compressed_size;
4233 mach_vm_size_t volatile_pmap_size;
4234 mach_vm_size_t volatile_compressed_pmap_size;
4235 kern_return_t kr;
4236
4237 if (flavor == TASK_VM_INFO_PURGEABLE) {
4238 kr = vm_map_query_volatile(
4239 map,
4240 &volatile_virtual_size,
4241 &volatile_resident_size,
4242 &volatile_compressed_size,
4243 &volatile_pmap_size,
4244 &volatile_compressed_pmap_size);
4245 if (kr == KERN_SUCCESS) {
4246 vm_info->purgeable_volatile_pmap =
4247 volatile_pmap_size;
4248 if (radar_20146450) {
4249 vm_info->compressed -=
4250 volatile_compressed_pmap_size;
4251 }
4252 vm_info->purgeable_volatile_resident =
4253 volatile_resident_size;
4254 vm_info->purgeable_volatile_virtual =
4255 volatile_virtual_size;
4256 }
4257 }
4258 }
4259 *task_info_count = TASK_VM_INFO_REV0_COUNT;
4260
4261 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
4262 vm_info->phys_footprint =
4263 (mach_vm_size_t) get_task_phys_footprint(task);
4264 *task_info_count = TASK_VM_INFO_REV1_COUNT;
4265 }
4266 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
4267 vm_info->min_address = map->min_offset;
4268 vm_info->max_address = map->max_offset;
4269 *task_info_count = TASK_VM_INFO_REV2_COUNT;
4270 }
4271
4272 if (task != kernel_task) {
4273 vm_map_unlock_read(map);
4274 }
4275
4276 break;
4277 }
4278
4279 case TASK_WAIT_STATE_INFO:
4280 {
4281 /*
4282 * Deprecated flavor. Currently allowing some results until all users
4283 * stop calling it. The results may not be accurate.
4284 */
4285 task_wait_state_info_t wait_state_info;
4286 uint64_t total_sfi_ledger_val = 0;
4287
4288 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
4289 error = KERN_INVALID_ARGUMENT;
4290 break;
4291 }
4292
4293 wait_state_info = (task_wait_state_info_t) task_info_out;
4294
4295 wait_state_info->total_wait_state_time = 0;
4296 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
4297
4298 #if CONFIG_SCHED_SFI
4299 int i, prev_lentry = -1;
4300 int64_t val_credit, val_debit;
4301
4302 for (i = 0; i < MAX_SFI_CLASS_ID; i++){
4303 val_credit =0;
4304 /*
4305 * checking with prev_lentry != entry ensures adjacent classes
4306 * which share the same ledger do not add wait times twice.
4307 * Note: Use ledger() call to get data for each individual sfi class.
4308 */
4309 if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
4310 KERN_SUCCESS == ledger_get_entries(task->ledger,
4311 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
4312 total_sfi_ledger_val += val_credit;
4313 }
4314 prev_lentry = task_ledgers.sfi_wait_times[i];
4315 }
4316
4317 #endif /* CONFIG_SCHED_SFI */
4318 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
4319 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
4320
4321 break;
4322 }
4323 case TASK_VM_INFO_PURGEABLE_ACCOUNT:
4324 {
4325 #if DEVELOPMENT || DEBUG
4326 pvm_account_info_t acnt_info;
4327
4328 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
4329 error = KERN_INVALID_ARGUMENT;
4330 break;
4331 }
4332
4333 if (task_info_out == NULL) {
4334 error = KERN_INVALID_ARGUMENT;
4335 break;
4336 }
4337
4338 acnt_info = (pvm_account_info_t) task_info_out;
4339
4340 error = vm_purgeable_account(task, acnt_info);
4341
4342 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
4343
4344 break;
4345 #else /* DEVELOPMENT || DEBUG */
4346 error = KERN_NOT_SUPPORTED;
4347 break;
4348 #endif /* DEVELOPMENT || DEBUG */
4349 }
4350 case TASK_FLAGS_INFO:
4351 {
4352 task_flags_info_t flags_info;
4353
4354 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
4355 error = KERN_INVALID_ARGUMENT;
4356 break;
4357 }
4358
4359 flags_info = (task_flags_info_t)task_info_out;
4360
4361 /* only publish the 64-bit flag of the task */
4362 flags_info->flags = task->t_flags & TF_64B_ADDR;
4363
4364 *task_info_count = TASK_FLAGS_INFO_COUNT;
4365 break;
4366 }
4367
4368 case TASK_DEBUG_INFO_INTERNAL:
4369 {
4370 #if DEVELOPMENT || DEBUG
4371 task_debug_info_internal_t dbg_info;
4372 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
4373 error = KERN_NOT_SUPPORTED;
4374 break;
4375 }
4376
4377 if (task_info_out == NULL) {
4378 error = KERN_INVALID_ARGUMENT;
4379 break;
4380 }
4381 dbg_info = (task_debug_info_internal_t) task_info_out;
4382 dbg_info->ipc_space_size = 0;
4383 if (task->itk_space){
4384 dbg_info->ipc_space_size = task->itk_space->is_table_size;
4385 }
4386
4387 dbg_info->suspend_count = task->suspend_count;
4388
4389 error = KERN_SUCCESS;
4390 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
4391 break;
4392 #else /* DEVELOPMENT || DEBUG */
4393 error = KERN_NOT_SUPPORTED;
4394 break;
4395 #endif /* DEVELOPMENT || DEBUG */
4396 }
4397 default:
4398 error = KERN_INVALID_ARGUMENT;
4399 }
4400
4401 task_unlock(task);
4402 return (error);
4403 }
4404
4405 /*
4406 * task_info_from_user
4407 *
4408 * When calling task_info from user space,
4409 * this function will be executed as mig server side
4410 * instead of calling directly into task_info.
4411 * This gives the possibility to perform more security
4412 * checks on task_port.
4413 *
4414 * In the case of TASK_DYLD_INFO, we require the more
4415 * privileged task_port not the less-privileged task_name_port.
4416 *
4417 */
4418 kern_return_t
4419 task_info_from_user(
4420 mach_port_t task_port,
4421 task_flavor_t flavor,
4422 task_info_t task_info_out,
4423 mach_msg_type_number_t *task_info_count)
4424 {
4425 task_t task;
4426 kern_return_t ret;
4427
4428 if (flavor == TASK_DYLD_INFO)
4429 task = convert_port_to_task(task_port);
4430 else
4431 task = convert_port_to_task_name(task_port);
4432
4433 ret = task_info(task, flavor, task_info_out, task_info_count);
4434
4435 task_deallocate(task);
4436
4437 return ret;
4438 }
4439
4440 /*
4441 * task_power_info
4442 *
4443 * Returns power stats for the task.
4444 * Note: Called with task locked.
4445 */
4446 void
4447 task_power_info_locked(
4448 task_t task,
4449 task_power_info_t info,
4450 gpu_energy_data_t ginfo,
4451 task_power_info_v2_t infov2)
4452 {
4453 thread_t thread;
4454 ledger_amount_t tmp;
4455
4456 task_lock_assert_owned(task);
4457
4458 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
4459 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
4460 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
4461 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
4462
4463 info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
4464 info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
4465
4466 info->total_user = task->total_user_time;
4467 info->total_system = task->total_system_time;
4468
4469 #if CONFIG_EMBEDDED
4470 if (infov2) {
4471 infov2->task_energy = task->task_energy;
4472 }
4473 #endif
4474
4475 if (ginfo) {
4476 ginfo->task_gpu_utilisation = task->task_gpu_ns;
4477 }
4478
4479 if (infov2) {
4480 infov2->task_ptime = task->total_ptime;
4481 infov2->task_pset_switches = task->ps_switch;
4482 }
4483
4484 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4485 uint64_t tval;
4486 spl_t x;
4487
4488 if (thread->options & TH_OPT_IDLE_THREAD)
4489 continue;
4490
4491 x = splsched();
4492 thread_lock(thread);
4493
4494 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
4495 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
4496
4497 #if CONFIG_EMBEDDED
4498 if (infov2) {
4499 infov2->task_energy += ml_energy_stat(thread);
4500 }
4501 #endif
4502
4503 tval = timer_grab(&thread->user_timer);
4504 info->total_user += tval;
4505
4506 if (infov2) {
4507 tval = timer_grab(&thread->ptime);
4508 infov2->task_ptime += tval;
4509 infov2->task_pset_switches += thread->ps_switch;
4510 }
4511
4512 tval = timer_grab(&thread->system_timer);
4513 if (thread->precise_user_kernel_time) {
4514 info->total_system += tval;
4515 } else {
4516 /* system_timer may represent either sys or user */
4517 info->total_user += tval;
4518 }
4519
4520 if (ginfo) {
4521 ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
4522 }
4523 thread_unlock(thread);
4524 splx(x);
4525 }
4526 }
4527
4528 /*
4529 * task_gpu_utilisation
4530 *
4531 * Returns the total gpu time used by the all the threads of the task
4532 * (both dead and alive)
4533 */
4534 uint64_t
4535 task_gpu_utilisation(
4536 task_t task)
4537 {
4538 uint64_t gpu_time = 0;
4539 #if !CONFIG_EMBEDDED
4540 thread_t thread;
4541
4542 task_lock(task);
4543 gpu_time += task->task_gpu_ns;
4544
4545 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4546 spl_t x;
4547 x = splsched();
4548 thread_lock(thread);
4549 gpu_time += ml_gpu_stat(thread);
4550 thread_unlock(thread);
4551 splx(x);
4552 }
4553
4554 task_unlock(task);
4555 #else /* CONFIG_EMBEDDED */
4556 /* silence compiler warning */
4557 (void)task;
4558 #endif /* !CONFIG_EMBEDDED */
4559 return gpu_time;
4560 }
4561
4562 /*
4563 * task_energy
4564 *
4565 * Returns the total energy used by the all the threads of the task
4566 * (both dead and alive)
4567 */
4568 uint64_t
4569 task_energy(
4570 task_t task)
4571 {
4572 uint64_t energy = 0;
4573 thread_t thread;
4574
4575 task_lock(task);
4576 energy += task->task_energy;
4577
4578 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4579 spl_t x;
4580 x = splsched();
4581 thread_lock(thread);
4582 energy += ml_energy_stat(thread);
4583 thread_unlock(thread);
4584 splx(x);
4585 }
4586
4587 task_unlock(task);
4588 return energy;
4589 }
4590
4591
4592 uint64_t
4593 task_cpu_ptime(
4594 __unused task_t task)
4595 {
4596 return 0;
4597 }
4598
4599
4600 /* This function updates the cpu time in the arrays for each
4601 * effective and requested QoS class
4602 */
4603 void
4604 task_update_cpu_time_qos_stats(
4605 task_t task,
4606 uint64_t *eqos_stats,
4607 uint64_t *rqos_stats)
4608 {
4609 if (!eqos_stats && !rqos_stats) {
4610 return;
4611 }
4612
4613 task_lock(task);
4614 thread_t thread;
4615 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4616 if (thread->options & TH_OPT_IDLE_THREAD) {
4617 continue;
4618 }
4619
4620 thread_update_qos_cpu_time(thread);
4621 }
4622
4623 if (eqos_stats) {
4624 eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
4625 eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
4626 eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
4627 eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
4628 eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
4629 eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
4630 eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
4631 }
4632
4633 if (rqos_stats) {
4634 rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
4635 rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
4636 rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
4637 rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
4638 rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
4639 rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
4640 rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
4641 }
4642
4643 task_unlock(task);
4644 }
4645
4646 kern_return_t
4647 task_purgable_info(
4648 task_t task,
4649 task_purgable_info_t *stats)
4650 {
4651 if (task == TASK_NULL || stats == NULL)
4652 return KERN_INVALID_ARGUMENT;
4653 /* Take task reference */
4654 task_reference(task);
4655 vm_purgeable_stats((vm_purgeable_info_t)stats, task);
4656 /* Drop task reference */
4657 task_deallocate(task);
4658 return KERN_SUCCESS;
4659 }
4660
4661 void
4662 task_vtimer_set(
4663 task_t task,
4664 integer_t which)
4665 {
4666 thread_t thread;
4667 spl_t x;
4668
4669 task_lock(task);
4670
4671 task->vtimers |= which;
4672
4673 switch (which) {
4674
4675 case TASK_VTIMER_USER:
4676 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4677 x = splsched();
4678 thread_lock(thread);
4679 if (thread->precise_user_kernel_time)
4680 thread->vtimer_user_save = timer_grab(&thread->user_timer);
4681 else
4682 thread->vtimer_user_save = timer_grab(&thread->system_timer);
4683 thread_unlock(thread);
4684 splx(x);
4685 }
4686 break;
4687
4688 case TASK_VTIMER_PROF:
4689 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4690 x = splsched();
4691 thread_lock(thread);
4692 thread->vtimer_prof_save = timer_grab(&thread->user_timer);
4693 thread->vtimer_prof_save += timer_grab(&thread->system_timer);
4694 thread_unlock(thread);
4695 splx(x);
4696 }
4697 break;
4698
4699 case TASK_VTIMER_RLIM:
4700 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4701 x = splsched();
4702 thread_lock(thread);
4703 thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
4704 thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
4705 thread_unlock(thread);
4706 splx(x);
4707 }
4708 break;
4709 }
4710
4711 task_unlock(task);
4712 }
4713
4714 void
4715 task_vtimer_clear(
4716 task_t task,
4717 integer_t which)
4718 {
4719 assert(task == current_task());
4720
4721 task_lock(task);
4722
4723 task->vtimers &= ~which;
4724
4725 task_unlock(task);
4726 }
4727
4728 void
4729 task_vtimer_update(
4730 __unused
4731 task_t task,
4732 integer_t which,
4733 uint32_t *microsecs)
4734 {
4735 thread_t thread = current_thread();
4736 uint32_t tdelt = 0;
4737 clock_sec_t secs = 0;
4738 uint64_t tsum;
4739
4740 assert(task == current_task());
4741
4742 spl_t s = splsched();
4743 thread_lock(thread);
4744
4745 if ((task->vtimers & which) != (uint32_t)which) {
4746 thread_unlock(thread);
4747 splx(s);
4748 return;
4749 }
4750
4751 switch (which) {
4752
4753 case TASK_VTIMER_USER:
4754 if (thread->precise_user_kernel_time) {
4755 tdelt = (uint32_t)timer_delta(&thread->user_timer,
4756 &thread->vtimer_user_save);
4757 } else {
4758 tdelt = (uint32_t)timer_delta(&thread->system_timer,
4759 &thread->vtimer_user_save);
4760 }
4761 absolutetime_to_microtime(tdelt, &secs, microsecs);
4762 break;
4763
4764 case TASK_VTIMER_PROF:
4765 tsum = timer_grab(&thread->user_timer);
4766 tsum += timer_grab(&thread->system_timer);
4767 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
4768 absolutetime_to_microtime(tdelt, &secs, microsecs);
4769 /* if the time delta is smaller than a usec, ignore */
4770 if (*microsecs != 0)
4771 thread->vtimer_prof_save = tsum;
4772 break;
4773
4774 case TASK_VTIMER_RLIM:
4775 tsum = timer_grab(&thread->user_timer);
4776 tsum += timer_grab(&thread->system_timer);
4777 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
4778 thread->vtimer_rlim_save = tsum;
4779 absolutetime_to_microtime(tdelt, &secs, microsecs);
4780 break;
4781 }
4782
4783 thread_unlock(thread);
4784 splx(s);
4785 }
4786
4787 /*
4788 * task_assign:
4789 *
4790 * Change the assigned processor set for the task
4791 */
4792 kern_return_t
4793 task_assign(
4794 __unused task_t task,
4795 __unused processor_set_t new_pset,
4796 __unused boolean_t assign_threads)
4797 {
4798 return(KERN_FAILURE);
4799 }
4800
4801 /*
4802 * task_assign_default:
4803 *
4804 * Version of task_assign to assign to default processor set.
4805 */
4806 kern_return_t
4807 task_assign_default(
4808 task_t task,
4809 boolean_t assign_threads)
4810 {
4811 return (task_assign(task, &pset0, assign_threads));
4812 }
4813
4814 /*
4815 * task_get_assignment
4816 *
4817 * Return name of processor set that task is assigned to.
4818 */
4819 kern_return_t
4820 task_get_assignment(
4821 task_t task,
4822 processor_set_t *pset)
4823 {
4824 if (!task || !task->active)
4825 return KERN_FAILURE;
4826
4827 *pset = &pset0;
4828
4829 return KERN_SUCCESS;
4830 }
4831
4832 uint64_t
4833 get_task_dispatchqueue_offset(
4834 task_t task)
4835 {
4836 return task->dispatchqueue_offset;
4837 }
4838
4839 /*
4840 * task_policy
4841 *
4842 * Set scheduling policy and parameters, both base and limit, for
4843 * the given task. Policy must be a policy which is enabled for the
4844 * processor set. Change contained threads if requested.
4845 */
4846 kern_return_t
4847 task_policy(
4848 __unused task_t task,
4849 __unused policy_t policy_id,
4850 __unused policy_base_t base,
4851 __unused mach_msg_type_number_t count,
4852 __unused boolean_t set_limit,
4853 __unused boolean_t change)
4854 {
4855 return(KERN_FAILURE);
4856 }
4857
4858 /*
4859 * task_set_policy
4860 *
4861 * Set scheduling policy and parameters, both base and limit, for
4862 * the given task. Policy can be any policy implemented by the
4863 * processor set, whether enabled or not. Change contained threads
4864 * if requested.
4865 */
4866 kern_return_t
4867 task_set_policy(
4868 __unused task_t task,
4869 __unused processor_set_t pset,
4870 __unused policy_t policy_id,
4871 __unused policy_base_t base,
4872 __unused mach_msg_type_number_t base_count,
4873 __unused policy_limit_t limit,
4874 __unused mach_msg_type_number_t limit_count,
4875 __unused boolean_t change)
4876 {
4877 return(KERN_FAILURE);
4878 }
4879
4880 kern_return_t
4881 task_set_ras_pc(
4882 __unused task_t task,
4883 __unused vm_offset_t pc,
4884 __unused vm_offset_t endpc)
4885 {
4886 return KERN_FAILURE;
4887 }
4888
4889 void
4890 task_synchronizer_destroy_all(task_t task)
4891 {
4892 /*
4893 * Destroy owned semaphores
4894 */
4895 semaphore_destroy_all(task);
4896 }
4897
4898 /*
4899 * Install default (machine-dependent) initial thread state
4900 * on the task. Subsequent thread creation will have this initial
4901 * state set on the thread by machine_thread_inherit_taskwide().
4902 * Flavors and structures are exactly the same as those to thread_set_state()
4903 */
4904 kern_return_t
4905 task_set_state(
4906 task_t task,
4907 int flavor,
4908 thread_state_t state,
4909 mach_msg_type_number_t state_count)
4910 {
4911 kern_return_t ret;
4912
4913 if (task == TASK_NULL) {
4914 return (KERN_INVALID_ARGUMENT);
4915 }
4916
4917 task_lock(task);
4918
4919 if (!task->active) {
4920 task_unlock(task);
4921 return (KERN_FAILURE);
4922 }
4923
4924 ret = machine_task_set_state(task, flavor, state, state_count);
4925
4926 task_unlock(task);
4927 return ret;
4928 }
4929
4930 /*
4931 * Examine the default (machine-dependent) initial thread state
4932 * on the task, as set by task_set_state(). Flavors and structures
4933 * are exactly the same as those passed to thread_get_state().
4934 */
4935 kern_return_t
4936 task_get_state(
4937 task_t task,
4938 int flavor,
4939 thread_state_t state,
4940 mach_msg_type_number_t *state_count)
4941 {
4942 kern_return_t ret;
4943
4944 if (task == TASK_NULL) {
4945 return (KERN_INVALID_ARGUMENT);
4946 }
4947
4948 task_lock(task);
4949
4950 if (!task->active) {
4951 task_unlock(task);
4952 return (KERN_FAILURE);
4953 }
4954
4955 ret = machine_task_get_state(task, flavor, state, state_count);
4956
4957 task_unlock(task);
4958 return ret;
4959 }
4960
4961
4962 static kern_return_t __attribute__((noinline,not_tail_called))
4963 PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(
4964 mach_exception_code_t code,
4965 mach_exception_subcode_t subcode,
4966 void *reason)
4967 {
4968 #ifdef MACH_BSD
4969 if (1 == proc_selfpid())
4970 return KERN_NOT_SUPPORTED; // initproc is immune
4971 #endif
4972 mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
4973 [0] = code,
4974 [1] = subcode,
4975 };
4976 task_t task = current_task();
4977 kern_return_t kr;
4978
4979 /* (See jetsam-related comments below) */
4980
4981 proc_memstat_terminated(task->bsd_info, TRUE);
4982 kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason);
4983 proc_memstat_terminated(task->bsd_info, FALSE);
4984 return kr;
4985 }
4986
4987 extern kern_return_t
4988 task_violated_guard(mach_exception_code_t, mach_exception_subcode_t, void *);
4989
4990 kern_return_t
4991 task_violated_guard(
4992 mach_exception_code_t code,
4993 mach_exception_subcode_t subcode,
4994 void *reason)
4995 {
4996 return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code, subcode, reason);
4997 }
4998
4999
5000 #if CONFIG_MEMORYSTATUS
5001
5002 boolean_t
5003 task_get_memlimit_is_active(task_t task)
5004 {
5005 assert (task != NULL);
5006
5007 if (task->memlimit_is_active == 1) {
5008 return(TRUE);
5009 } else {
5010 return (FALSE);
5011 }
5012 }
5013
5014 void
5015 task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
5016 {
5017 assert (task != NULL);
5018
5019 if (memlimit_is_active) {
5020 task->memlimit_is_active = 1;
5021 } else {
5022 task->memlimit_is_active = 0;
5023 }
5024 }
5025
5026 boolean_t
5027 task_get_memlimit_is_fatal(task_t task)
5028 {
5029 assert(task != NULL);
5030
5031 if (task->memlimit_is_fatal == 1) {
5032 return(TRUE);
5033 } else {
5034 return(FALSE);
5035 }
5036 }
5037
5038 void
5039 task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
5040 {
5041 assert (task != NULL);
5042
5043 if (memlimit_is_fatal) {
5044 task->memlimit_is_fatal = 1;
5045 } else {
5046 task->memlimit_is_fatal = 0;
5047 }
5048 }
5049
5050 boolean_t
5051 task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
5052 {
5053 boolean_t triggered = FALSE;
5054
5055 assert(task == current_task());
5056
5057 /*
5058 * Returns true, if task has already triggered an exc_resource exception.
5059 */
5060
5061 if (memlimit_is_active) {
5062 triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
5063 } else {
5064 triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
5065 }
5066
5067 return(triggered);
5068 }
5069
5070 void
5071 task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
5072 {
5073 assert(task == current_task());
5074
5075 /*
5076 * We allow one exc_resource per process per active/inactive limit.
5077 * The limit's fatal attribute does not come into play.
5078 */
5079
5080 if (memlimit_is_active) {
5081 task->memlimit_active_exc_resource = 1;
5082 } else {
5083 task->memlimit_inactive_exc_resource = 1;
5084 }
5085 }
5086
5087 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
5088
5089 void __attribute__((noinline))
5090 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
5091 {
5092 task_t task = current_task();
5093 int pid = 0;
5094 const char *procname = "unknown";
5095 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
5096
5097 #ifdef MACH_BSD
5098 pid = proc_selfpid();
5099
5100 if (pid == 1) {
5101 /*
5102 * Cannot have ReportCrash analyzing
5103 * a suspended initproc.
5104 */
5105 return;
5106 }
5107
5108 if (task->bsd_info != NULL)
5109 procname = proc_name_address(current_task()->bsd_info);
5110 #endif
5111 #if CONFIG_COREDUMP
5112 if (hwm_user_cores) {
5113 int error;
5114 uint64_t starttime, end;
5115 clock_sec_t secs = 0;
5116 uint32_t microsecs = 0;
5117
5118 starttime = mach_absolute_time();
5119 /*
5120 * Trigger a coredump of this process. Don't proceed unless we know we won't
5121 * be filling up the disk; and ignore the core size resource limit for this
5122 * core file.
5123 */
5124 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
5125 printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
5126 }
5127 /*
5128 * coredump() leaves the task suspended.
5129 */
5130 task_resume_internal(current_task());
5131
5132 end = mach_absolute_time();
5133 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
5134 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
5135 proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
5136 }
5137 #endif /* CONFIG_COREDUMP */
5138
5139 if (disable_exc_resource) {
5140 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
5141 "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
5142 return;
5143 }
5144
5145 /*
5146 * A task that has triggered an EXC_RESOURCE, should not be
5147 * jetsammed when the device is under memory pressure. Here
5148 * we set the P_MEMSTAT_TERMINATED flag so that the process
5149 * will be skipped if the memorystatus_thread wakes up.
5150 */
5151 proc_memstat_terminated(current_task()->bsd_info, TRUE);
5152
5153 code[0] = code[1] = 0;
5154 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
5155 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
5156 EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
5157
5158 /* Do not generate a corpse fork if the violation is a fatal one */
5159 if (is_fatal || exc_via_corpse_forking == 0) {
5160 /* Do not send a EXC_RESOURCE is corpse_for_fatal_memkill is set */
5161 if (corpse_for_fatal_memkill == 0) {
5162 /*
5163 * Use the _internal_ variant so that no user-space
5164 * process can resume our task from under us.
5165 */
5166 task_suspend_internal(task);
5167 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5168 task_resume_internal(task);
5169 }
5170 } else {
5171 if (audio_active) {
5172 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
5173 "supressed due to audio playback.\n", procname, pid, max_footprint_mb);
5174 } else {
5175 task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
5176 code, EXCEPTION_CODE_MAX, NULL);
5177 }
5178 }
5179
5180 /*
5181 * After the EXC_RESOURCE has been handled, we must clear the
5182 * P_MEMSTAT_TERMINATED flag so that the process can again be
5183 * considered for jetsam if the memorystatus_thread wakes up.
5184 */
5185 proc_memstat_terminated(current_task()->bsd_info, FALSE); /* clear the flag */
5186 }
5187
5188 /*
5189 * Callback invoked when a task exceeds its physical footprint limit.
5190 */
5191 void
5192 task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
5193 {
5194 ledger_amount_t max_footprint, max_footprint_mb;
5195 task_t task;
5196 boolean_t is_warning;
5197 boolean_t memlimit_is_active;
5198 boolean_t memlimit_is_fatal;
5199
5200 if (warning == LEDGER_WARNING_DIPPED_BELOW) {
5201 /*
5202 * Task memory limits only provide a warning on the way up.
5203 */
5204 return;
5205 } else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
5206 /*
5207 * This task is in danger of violating a memory limit,
5208 * It has exceeded a percentage level of the limit.
5209 */
5210 is_warning = TRUE;
5211 } else {
5212 /*
5213 * The task has exceeded the physical footprint limit.
5214 * This is not a warning but a true limit violation.
5215 */
5216 is_warning = FALSE;
5217 }
5218
5219 task = current_task();
5220
5221 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
5222 max_footprint_mb = max_footprint >> 20;
5223
5224 memlimit_is_active = task_get_memlimit_is_active(task);
5225 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
5226
5227 /*
5228 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
5229 * We only generate the exception once per process per memlimit (active/inactive limit).
5230 * To enforce this, we monitor state based on the memlimit's active/inactive attribute
5231 * and we disable it by marking that memlimit as exception triggered.
5232 */
5233 if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
5234 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
5235 memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
5236 task_mark_has_triggered_exc_resource(task, memlimit_is_active);
5237 }
5238
5239 memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
5240 }
5241
5242 extern int proc_check_footprint_priv(void);
5243
5244 kern_return_t
5245 task_set_phys_footprint_limit(
5246 task_t task,
5247 int new_limit_mb,
5248 int *old_limit_mb)
5249 {
5250 kern_return_t error;
5251
5252 boolean_t memlimit_is_active;
5253 boolean_t memlimit_is_fatal;
5254
5255 if ((error = proc_check_footprint_priv())) {
5256 return (KERN_NO_ACCESS);
5257 }
5258
5259 /*
5260 * This call should probably be obsoleted.
5261 * But for now, we default to current state.
5262 */
5263 memlimit_is_active = task_get_memlimit_is_active(task);
5264 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
5265
5266 return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
5267 }
5268
5269 kern_return_t
5270 task_convert_phys_footprint_limit(
5271 int limit_mb,
5272 int *converted_limit_mb)
5273 {
5274 if (limit_mb == -1) {
5275 /*
5276 * No limit
5277 */
5278 if (max_task_footprint != 0) {
5279 *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024); /* bytes to MB */
5280 } else {
5281 *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
5282 }
5283 } else {
5284 /* nothing to convert */
5285 *converted_limit_mb = limit_mb;
5286 }
5287 return (KERN_SUCCESS);
5288 }
5289
5290
5291 kern_return_t
5292 task_set_phys_footprint_limit_internal(
5293 task_t task,
5294 int new_limit_mb,
5295 int *old_limit_mb,
5296 boolean_t memlimit_is_active,
5297 boolean_t memlimit_is_fatal)
5298 {
5299 ledger_amount_t old;
5300
5301 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
5302
5303 /*
5304 * Check that limit >> 20 will not give an "unexpected" 32-bit
5305 * result. There are, however, implicit assumptions that -1 mb limit
5306 * equates to LEDGER_LIMIT_INFINITY.
5307 */
5308 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
5309
5310 if (old_limit_mb) {
5311 *old_limit_mb = (int)(old >> 20);
5312 }
5313
5314 if (new_limit_mb == -1) {
5315 /*
5316 * Caller wishes to remove the limit.
5317 */
5318 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
5319 max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
5320 max_task_footprint ? max_task_footprint_warning_level : 0);
5321
5322 task_lock(task);
5323 task_set_memlimit_is_active(task, memlimit_is_active);
5324 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
5325 task_unlock(task);
5326
5327 return (KERN_SUCCESS);
5328 }
5329
5330 #ifdef CONFIG_NOMONITORS
5331 return (KERN_SUCCESS);
5332 #endif /* CONFIG_NOMONITORS */
5333
5334 task_lock(task);
5335
5336 if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
5337 (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
5338 (((ledger_amount_t)new_limit_mb << 20) == old)) {
5339 /*
5340 * memlimit state is not changing
5341 */
5342 task_unlock(task);
5343 return(KERN_SUCCESS);
5344 }
5345
5346 task_set_memlimit_is_active(task, memlimit_is_active);
5347 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
5348
5349 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
5350 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
5351
5352 if (task == current_task()) {
5353 ledger_check_new_balance(current_thread(), task->ledger,
5354 task_ledgers.phys_footprint);
5355 }
5356
5357 task_unlock(task);
5358
5359 return (KERN_SUCCESS);
5360 }
5361
5362 kern_return_t
5363 task_get_phys_footprint_limit(
5364 task_t task,
5365 int *limit_mb)
5366 {
5367 ledger_amount_t limit;
5368
5369 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
5370 /*
5371 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
5372 * result. There are, however, implicit assumptions that -1 mb limit
5373 * equates to LEDGER_LIMIT_INFINITY.
5374 */
5375 assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
5376 *limit_mb = (int)(limit >> 20);
5377
5378 return (KERN_SUCCESS);
5379 }
5380 #else /* CONFIG_MEMORYSTATUS */
5381 kern_return_t
5382 task_set_phys_footprint_limit(
5383 __unused task_t task,
5384 __unused int new_limit_mb,
5385 __unused int *old_limit_mb)
5386 {
5387 return (KERN_FAILURE);
5388 }
5389
5390 kern_return_t
5391 task_get_phys_footprint_limit(
5392 __unused task_t task,
5393 __unused int *limit_mb)
5394 {
5395 return (KERN_FAILURE);
5396 }
5397 #endif /* CONFIG_MEMORYSTATUS */
5398
5399 /*
5400 * We need to export some functions to other components that
5401 * are currently implemented in macros within the osfmk
5402 * component. Just export them as functions of the same name.
5403 */
5404 boolean_t is_kerneltask(task_t t)
5405 {
5406 if (t == kernel_task)
5407 return (TRUE);
5408
5409 return (FALSE);
5410 }
5411
5412 boolean_t is_corpsetask(task_t t)
5413 {
5414 return (task_is_a_corpse(t));
5415 }
5416
5417 #undef current_task
5418 task_t current_task(void);
5419 task_t current_task(void)
5420 {
5421 return (current_task_fast());
5422 }
5423
5424 #undef task_reference
5425 void task_reference(task_t task);
5426 void
5427 task_reference(
5428 task_t task)
5429 {
5430 if (task != TASK_NULL)
5431 task_reference_internal(task);
5432 }
5433
5434 /* defined in bsd/kern/kern_prot.c */
5435 extern int get_audit_token_pid(audit_token_t *audit_token);
5436
5437 int task_pid(task_t task)
5438 {
5439 if (task)
5440 return get_audit_token_pid(&task->audit_token);
5441 return -1;
5442 }
5443
5444
5445 /*
5446 * This routine finds a thread in a task by its unique id
5447 * Returns a referenced thread or THREAD_NULL if the thread was not found
5448 *
5449 * TODO: This is super inefficient - it's an O(threads in task) list walk!
5450 * We should make a tid hash, or transition all tid clients to thread ports
5451 *
5452 * Precondition: No locks held (will take task lock)
5453 */
5454 thread_t
5455 task_findtid(task_t task, uint64_t tid)
5456 {
5457 thread_t self = current_thread();
5458 thread_t found_thread = THREAD_NULL;
5459 thread_t iter_thread = THREAD_NULL;
5460
5461 /* Short-circuit the lookup if we're looking up ourselves */
5462 if (tid == self->thread_id || tid == TID_NULL) {
5463 assert(self->task == task);
5464
5465 thread_reference(self);
5466
5467 return self;
5468 }
5469
5470 task_lock(task);
5471
5472 queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
5473 if (iter_thread->thread_id == tid) {
5474 found_thread = iter_thread;
5475 thread_reference(found_thread);
5476 break;
5477 }
5478 }
5479
5480 task_unlock(task);
5481
5482 return (found_thread);
5483 }
5484
5485 int pid_from_task(task_t task)
5486 {
5487 int pid = -1;
5488
5489 if (task->bsd_info) {
5490 pid = proc_pid(task->bsd_info);
5491 } else {
5492 pid = task_pid(task);
5493 }
5494
5495 return pid;
5496 }
5497
5498 /*
5499 * Control the CPU usage monitor for a task.
5500 */
5501 kern_return_t
5502 task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
5503 {
5504 int error = KERN_SUCCESS;
5505
5506 if (*flags & CPUMON_MAKE_FATAL) {
5507 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
5508 } else {
5509 error = KERN_INVALID_ARGUMENT;
5510 }
5511
5512 return error;
5513 }
5514
5515 /*
5516 * Control the wakeups monitor for a task.
5517 */
5518 kern_return_t
5519 task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
5520 {
5521 ledger_t ledger = task->ledger;
5522
5523 task_lock(task);
5524 if (*flags & WAKEMON_GET_PARAMS) {
5525 ledger_amount_t limit;
5526 uint64_t period;
5527
5528 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
5529 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
5530
5531 if (limit != LEDGER_LIMIT_INFINITY) {
5532 /*
5533 * An active limit means the wakeups monitor is enabled.
5534 */
5535 *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
5536 *flags = WAKEMON_ENABLE;
5537 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
5538 *flags |= WAKEMON_MAKE_FATAL;
5539 }
5540 } else {
5541 *flags = WAKEMON_DISABLE;
5542 *rate_hz = -1;
5543 }
5544
5545 /*
5546 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
5547 */
5548 task_unlock(task);
5549 return KERN_SUCCESS;
5550 }
5551
5552 if (*flags & WAKEMON_ENABLE) {
5553 if (*flags & WAKEMON_SET_DEFAULTS) {
5554 *rate_hz = task_wakeups_monitor_rate;
5555 }
5556
5557 #ifndef CONFIG_NOMONITORS
5558 if (*flags & WAKEMON_MAKE_FATAL) {
5559 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5560 }
5561 #endif /* CONFIG_NOMONITORS */
5562
5563 if (*rate_hz <= 0) {
5564 task_unlock(task);
5565 return KERN_INVALID_ARGUMENT;
5566 }
5567
5568 #ifndef CONFIG_NOMONITORS
5569 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
5570 task_wakeups_monitor_ustackshots_trigger_pct);
5571 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
5572 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
5573 #endif /* CONFIG_NOMONITORS */
5574 } else if (*flags & WAKEMON_DISABLE) {
5575 /*
5576 * Caller wishes to disable wakeups monitor on the task.
5577 *
5578 * Disable telemetry if it was triggered by the wakeups monitor, and
5579 * remove the limit & callback on the wakeups ledger entry.
5580 */
5581 #if CONFIG_TELEMETRY
5582 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
5583 #endif
5584 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
5585 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
5586 }
5587
5588 task_unlock(task);
5589 return KERN_SUCCESS;
5590 }
5591
5592 void
5593 task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
5594 {
5595 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
5596 #if CONFIG_TELEMETRY
5597 /*
5598 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
5599 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
5600 */
5601 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
5602 #endif
5603 return;
5604 }
5605
5606 #if CONFIG_TELEMETRY
5607 /*
5608 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
5609 * exceeded the limit, turn telemetry off for the task.
5610 */
5611 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
5612 #endif
5613
5614 if (warning == 0) {
5615 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
5616 }
5617 }
5618
5619 void __attribute__((noinline))
5620 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
5621 {
5622 task_t task = current_task();
5623 int pid = 0;
5624 const char *procname = "unknown";
5625 boolean_t fatal;
5626 kern_return_t kr;
5627 #ifdef EXC_RESOURCE_MONITORS
5628 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
5629 #endif /* EXC_RESOURCE_MONITORS */
5630 struct ledger_entry_info lei;
5631
5632 #ifdef MACH_BSD
5633 pid = proc_selfpid();
5634 if (task->bsd_info != NULL)
5635 procname = proc_name_address(current_task()->bsd_info);
5636 #endif
5637
5638 ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
5639
5640 /*
5641 * Disable the exception notification so we don't overwhelm
5642 * the listener with an endless stream of redundant exceptions.
5643 * TODO: detect whether another thread is already reporting the violation.
5644 */
5645 uint32_t flags = WAKEMON_DISABLE;
5646 task_wakeups_monitor_ctl(task, &flags, NULL);
5647
5648 fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5649 trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
5650 os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
5651 "over ~%llu seconds, averaging %llu wakes / second and "
5652 "violating a %slimit of %llu wakes over %llu seconds.\n",
5653 procname, pid,
5654 lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
5655 lei.lei_last_refill == 0 ? 0 :
5656 (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
5657 fatal ? "FATAL " : "",
5658 lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
5659
5660 kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
5661 fatal ? kRNFatalLimitFlag : 0);
5662 if (kr) {
5663 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
5664 }
5665
5666 #ifdef EXC_RESOURCE_MONITORS
5667 if (disable_exc_resource) {
5668 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5669 "supressed by a boot-arg\n", procname, pid);
5670 return;
5671 }
5672 if (audio_active) {
5673 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5674 "supressed due to audio playback\n", procname, pid);
5675 return;
5676 }
5677 if (lei.lei_last_refill == 0) {
5678 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5679 "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
5680 }
5681
5682 code[0] = code[1] = 0;
5683 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
5684 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
5685 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
5686 NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
5687 EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
5688 lei.lei_last_refill);
5689 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
5690 NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
5691 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5692 #endif /* EXC_RESOURCE_MONITORS */
5693
5694 if (fatal) {
5695 task_terminate_internal(task);
5696 }
5697 }
5698
5699 static boolean_t
5700 global_update_logical_writes(int64_t io_delta)
5701 {
5702 int64_t old_count, new_count;
5703 boolean_t needs_telemetry;
5704
5705 do {
5706 new_count = old_count = global_logical_writes_count;
5707 new_count += io_delta;
5708 if (new_count >= io_telemetry_limit) {
5709 new_count = 0;
5710 needs_telemetry = TRUE;
5711 } else {
5712 needs_telemetry = FALSE;
5713 }
5714 } while(!OSCompareAndSwap64(old_count, new_count, &global_logical_writes_count));
5715 return needs_telemetry;
5716 }
5717
5718 void task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
5719 {
5720 int64_t io_delta = 0;
5721 boolean_t needs_telemetry = FALSE;
5722
5723 if ((!task) || (!io_size) || (!vp))
5724 return;
5725
5726 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
5727 task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
5728 DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
5729 switch(flags) {
5730 case TASK_WRITE_IMMEDIATE:
5731 OSAddAtomic64(io_size, (SInt64 *)&(task->task_immediate_writes));
5732 ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5733 break;
5734 case TASK_WRITE_DEFERRED:
5735 OSAddAtomic64(io_size, (SInt64 *)&(task->task_deferred_writes));
5736 ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5737 break;
5738 case TASK_WRITE_INVALIDATED:
5739 OSAddAtomic64(io_size, (SInt64 *)&(task->task_invalidated_writes));
5740 ledger_debit(task->ledger, task_ledgers.logical_writes, io_size);
5741 break;
5742 case TASK_WRITE_METADATA:
5743 OSAddAtomic64(io_size, (SInt64 *)&(task->task_metadata_writes));
5744 ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5745 break;
5746 }
5747
5748 io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
5749 if (io_telemetry_limit != 0) {
5750 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
5751 needs_telemetry = global_update_logical_writes(io_delta);
5752 if (needs_telemetry) {
5753 act_set_io_telemetry_ast(current_thread());
5754 }
5755 }
5756 }
5757
5758 /*
5759 * Control the I/O monitor for a task.
5760 */
5761 kern_return_t
5762 task_io_monitor_ctl(task_t task, uint32_t *flags)
5763 {
5764 ledger_t ledger = task->ledger;
5765
5766 task_lock(task);
5767 if (*flags & IOMON_ENABLE) {
5768 /* Configure the physical I/O ledger */
5769 ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
5770 ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5771
5772 /* Configure the logical I/O ledger */
5773 ledger_set_limit(ledger, task_ledgers.logical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
5774 ledger_set_period(ledger, task_ledgers.logical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5775
5776 } else if (*flags & IOMON_DISABLE) {
5777 /*
5778 * Caller wishes to disable I/O monitor on the task.
5779 */
5780 ledger_disable_refill(ledger, task_ledgers.physical_writes);
5781 ledger_disable_callback(ledger, task_ledgers.physical_writes);
5782 ledger_disable_refill(ledger, task_ledgers.logical_writes);
5783 ledger_disable_callback(ledger, task_ledgers.logical_writes);
5784 }
5785
5786 task_unlock(task);
5787 return KERN_SUCCESS;
5788 }
5789
5790 void
5791 task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
5792 {
5793 if (warning == 0) {
5794 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
5795 }
5796 }
5797
5798 void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
5799 {
5800 int pid = 0;
5801 task_t task = current_task();
5802 #ifdef EXC_RESOURCE_MONITORS
5803 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
5804 #endif /* EXC_RESOURCE_MONITORS */
5805 struct ledger_entry_info lei;
5806 kern_return_t kr;
5807
5808 #ifdef MACH_BSD
5809 pid = proc_selfpid();
5810 #endif
5811 /*
5812 * Get the ledger entry info. We need to do this before disabling the exception
5813 * to get correct values for all fields.
5814 */
5815 switch(flavor) {
5816 case FLAVOR_IO_PHYSICAL_WRITES:
5817 ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
5818 break;
5819 case FLAVOR_IO_LOGICAL_WRITES:
5820 ledger_get_entry_info(task->ledger, task_ledgers.logical_writes, &lei);
5821 break;
5822 }
5823
5824
5825 /*
5826 * Disable the exception notification so we don't overwhelm
5827 * the listener with an endless stream of redundant exceptions.
5828 * TODO: detect whether another thread is already reporting the violation.
5829 */
5830 uint32_t flags = IOMON_DISABLE;
5831 task_io_monitor_ctl(task, &flags);
5832
5833 if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
5834 trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
5835 }
5836 os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
5837 pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
5838
5839 kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
5840 if (kr) {
5841 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
5842 }
5843
5844 #ifdef EXC_RESOURCE_MONITORS
5845 code[0] = code[1] = 0;
5846 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
5847 EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
5848 EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
5849 EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
5850 EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
5851 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5852 #endif /* EXC_RESOURCE_MONITORS */
5853 }
5854
5855 /* Placeholders for the task set/get voucher interfaces */
5856 kern_return_t
5857 task_get_mach_voucher(
5858 task_t task,
5859 mach_voucher_selector_t __unused which,
5860 ipc_voucher_t *voucher)
5861 {
5862 if (TASK_NULL == task)
5863 return KERN_INVALID_TASK;
5864
5865 *voucher = NULL;
5866 return KERN_SUCCESS;
5867 }
5868
5869 kern_return_t
5870 task_set_mach_voucher(
5871 task_t task,
5872 ipc_voucher_t __unused voucher)
5873 {
5874 if (TASK_NULL == task)
5875 return KERN_INVALID_TASK;
5876
5877 return KERN_SUCCESS;
5878 }
5879
5880 kern_return_t
5881 task_swap_mach_voucher(
5882 task_t task,
5883 ipc_voucher_t new_voucher,
5884 ipc_voucher_t *in_out_old_voucher)
5885 {
5886 if (TASK_NULL == task)
5887 return KERN_INVALID_TASK;
5888
5889 *in_out_old_voucher = new_voucher;
5890 return KERN_SUCCESS;
5891 }
5892
5893 void task_set_gpu_denied(task_t task, boolean_t denied)
5894 {
5895 task_lock(task);
5896
5897 if (denied) {
5898 task->t_flags |= TF_GPU_DENIED;
5899 } else {
5900 task->t_flags &= ~TF_GPU_DENIED;
5901 }
5902
5903 task_unlock(task);
5904 }
5905
5906 boolean_t task_is_gpu_denied(task_t task)
5907 {
5908 /* We don't need the lock to read this flag */
5909 return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
5910 }
5911
5912
5913 uint64_t get_task_memory_region_count(task_t task)
5914 {
5915 vm_map_t map;
5916 map = (task == kernel_task) ? kernel_map: task->map;
5917 return((uint64_t)get_map_nentries(map));
5918 }
5919
5920 static void
5921 kdebug_trace_dyld_internal(uint32_t base_code,
5922 struct dyld_kernel_image_info *info)
5923 {
5924 static_assert(sizeof(info->uuid) >= 16);
5925
5926 #if defined(__LP64__)
5927 uint64_t *uuid = (uint64_t *)&(info->uuid);
5928
5929 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5930 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
5931 uuid[1], info->load_addr,
5932 (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
5933 0);
5934 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5935 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
5936 (uint64_t)info->fsobjid.fid_objno |
5937 ((uint64_t)info->fsobjid.fid_generation << 32),
5938 0, 0, 0, 0);
5939 #else /* defined(__LP64__) */
5940 uint32_t *uuid = (uint32_t *)&(info->uuid);
5941
5942 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5943 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
5944 uuid[1], uuid[2], uuid[3], 0);
5945 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5946 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
5947 (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
5948 info->fsobjid.fid_objno, 0);
5949 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
5950 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
5951 info->fsobjid.fid_generation, 0, 0, 0, 0);
5952 #endif /* !defined(__LP64__) */
5953 }
5954
5955 static kern_return_t
5956 kdebug_trace_dyld(task_t task, uint32_t base_code,
5957 vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
5958 {
5959 kern_return_t kr;
5960 dyld_kernel_image_info_array_t infos;
5961 vm_map_offset_t map_data;
5962 vm_offset_t data;
5963
5964 if (!infos_copy) {
5965 return KERN_INVALID_ADDRESS;
5966 }
5967
5968 if (!kdebug_enable ||
5969 !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0)))
5970 {
5971 vm_map_copy_discard(infos_copy);
5972 return KERN_SUCCESS;
5973 }
5974
5975 if (task == NULL || task != current_task()) {
5976 return KERN_INVALID_TASK;
5977 }
5978
5979 kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
5980 if (kr != KERN_SUCCESS) {
5981 return kr;
5982 }
5983
5984 infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
5985
5986 for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
5987 kdebug_trace_dyld_internal(base_code, &(infos[i]));
5988 }
5989
5990 data = CAST_DOWN(vm_offset_t, map_data);
5991 mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
5992 return KERN_SUCCESS;
5993 }
5994
5995 kern_return_t
5996 task_register_dyld_image_infos(task_t task,
5997 dyld_kernel_image_info_array_t infos_copy,
5998 mach_msg_type_number_t infos_len)
5999 {
6000 return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
6001 (vm_map_copy_t)infos_copy, infos_len);
6002 }
6003
6004 kern_return_t
6005 task_unregister_dyld_image_infos(task_t task,
6006 dyld_kernel_image_info_array_t infos_copy,
6007 mach_msg_type_number_t infos_len)
6008 {
6009 return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
6010 (vm_map_copy_t)infos_copy, infos_len);
6011 }
6012
6013 kern_return_t
6014 task_get_dyld_image_infos(__unused task_t task,
6015 __unused dyld_kernel_image_info_array_t * dyld_images,
6016 __unused mach_msg_type_number_t * dyld_imagesCnt)
6017 {
6018 return KERN_NOT_SUPPORTED;
6019 }
6020
6021 kern_return_t
6022 task_register_dyld_shared_cache_image_info(task_t task,
6023 dyld_kernel_image_info_t cache_img,
6024 __unused boolean_t no_cache,
6025 __unused boolean_t private_cache)
6026 {
6027 if (task == NULL || task != current_task()) {
6028 return KERN_INVALID_TASK;
6029 }
6030
6031 kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
6032 return KERN_SUCCESS;
6033 }
6034
6035 kern_return_t
6036 task_register_dyld_set_dyld_state(__unused task_t task,
6037 __unused uint8_t dyld_state)
6038 {
6039 return KERN_NOT_SUPPORTED;
6040 }
6041
6042 kern_return_t
6043 task_register_dyld_get_process_state(__unused task_t task,
6044 __unused dyld_kernel_process_info_t * dyld_process_state)
6045 {
6046 return KERN_NOT_SUPPORTED;
6047 }
6048
6049 kern_return_t
6050 task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
6051 task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
6052 {
6053 #if MONOTONIC
6054 task_t task = (task_t)task_insp;
6055 kern_return_t kr = KERN_SUCCESS;
6056 mach_msg_type_number_t size;
6057
6058 if (task == TASK_NULL) {
6059 return KERN_INVALID_ARGUMENT;
6060 }
6061
6062 size = *size_in_out;
6063
6064 switch (flavor) {
6065 case TASK_INSPECT_BASIC_COUNTS: {
6066 struct task_inspect_basic_counts *bc;
6067 uint64_t task_counts[MT_CORE_NFIXED];
6068
6069 if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
6070 kr = KERN_INVALID_ARGUMENT;
6071 break;
6072 }
6073
6074 mt_fixed_task_counts(task, task_counts);
6075 bc = (struct task_inspect_basic_counts *)info_out;
6076 #ifdef MT_CORE_INSTRS
6077 bc->instructions = task_counts[MT_CORE_INSTRS];
6078 #else /* defined(MT_CORE_INSTRS) */
6079 bc->instructions = 0;
6080 #endif /* !defined(MT_CORE_INSTRS) */
6081 bc->cycles = task_counts[MT_CORE_CYCLES];
6082 size = TASK_INSPECT_BASIC_COUNTS_COUNT;
6083 break;
6084 }
6085 default:
6086 kr = KERN_INVALID_ARGUMENT;
6087 break;
6088 }
6089
6090 if (kr == KERN_SUCCESS) {
6091 *size_in_out = size;
6092 }
6093 return kr;
6094 #else /* MONOTONIC */
6095 #pragma unused(task_insp, flavor, info_out, size_in_out)
6096 return KERN_NOT_SUPPORTED;
6097 #endif /* !MONOTONIC */
6098 }
6099
6100 #if CONFIG_SECLUDED_MEMORY
6101 int num_tasks_can_use_secluded_mem = 0;
6102
6103 void
6104 task_set_can_use_secluded_mem(
6105 task_t task,
6106 boolean_t can_use_secluded_mem)
6107 {
6108 if (!task->task_could_use_secluded_mem) {
6109 return;
6110 }
6111 task_lock(task);
6112 task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
6113 task_unlock(task);
6114 }
6115
6116 void
6117 task_set_can_use_secluded_mem_locked(
6118 task_t task,
6119 boolean_t can_use_secluded_mem)
6120 {
6121 assert(task->task_could_use_secluded_mem);
6122 if (can_use_secluded_mem &&
6123 secluded_for_apps && /* global boot-arg */
6124 !task->task_can_use_secluded_mem) {
6125 assert(num_tasks_can_use_secluded_mem >= 0);
6126 OSAddAtomic(+1,
6127 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
6128 task->task_can_use_secluded_mem = TRUE;
6129 } else if (!can_use_secluded_mem &&
6130 task->task_can_use_secluded_mem) {
6131 assert(num_tasks_can_use_secluded_mem > 0);
6132 OSAddAtomic(-1,
6133 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
6134 task->task_can_use_secluded_mem = FALSE;
6135 }
6136 }
6137
6138 void
6139 task_set_could_use_secluded_mem(
6140 task_t task,
6141 boolean_t could_use_secluded_mem)
6142 {
6143 task->task_could_use_secluded_mem = could_use_secluded_mem;
6144 }
6145
6146 void
6147 task_set_could_also_use_secluded_mem(
6148 task_t task,
6149 boolean_t could_also_use_secluded_mem)
6150 {
6151 task->task_could_also_use_secluded_mem = could_also_use_secluded_mem;
6152 }
6153
6154 boolean_t
6155 task_can_use_secluded_mem(
6156 task_t task)
6157 {
6158 if (task->task_can_use_secluded_mem) {
6159 assert(task->task_could_use_secluded_mem);
6160 assert(num_tasks_can_use_secluded_mem > 0);
6161 return TRUE;
6162 }
6163 if (task->task_could_also_use_secluded_mem &&
6164 num_tasks_can_use_secluded_mem > 0) {
6165 assert(num_tasks_can_use_secluded_mem > 0);
6166 return TRUE;
6167 }
6168 return FALSE;
6169 }
6170
6171 boolean_t
6172 task_could_use_secluded_mem(
6173 task_t task)
6174 {
6175 return task->task_could_use_secluded_mem;
6176 }
6177 #endif /* CONFIG_SECLUDED_MEMORY */
6178
6179 queue_head_t *
6180 task_io_user_clients(task_t task)
6181 {
6182 return (&task->io_user_clients);
6183 }
6184
6185 void
6186 task_copy_fields_for_exec(task_t dst_task, task_t src_task)
6187 {
6188 dst_task->vtimers = src_task->vtimers;
6189 }
6190
6191 #if DEVELOPMENT || DEBUG
6192 int vm_region_footprint = 0;
6193 #endif /* DEVELOPMENT || DEBUG */
6194
6195 boolean_t
6196 task_self_region_footprint(void)
6197 {
6198 #if DEVELOPMENT || DEBUG
6199 if (vm_region_footprint) {
6200 /* system-wide override */
6201 return TRUE;
6202 }
6203 #endif /* DEVELOPMENT || DEBUG */
6204 return current_task()->task_region_footprint;
6205 }
6206
6207 void
6208 task_self_region_footprint_set(
6209 boolean_t newval)
6210 {
6211 task_t curtask;
6212
6213 curtask = current_task();
6214 task_lock(curtask);
6215 if (newval) {
6216 curtask->task_region_footprint = TRUE;
6217 } else {
6218 curtask->task_region_footprint = FALSE;
6219 }
6220 task_unlock(curtask);
6221 }