2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * @OSF_FREE_COPYRIGHT@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
61 * Task management primitives implementation.
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL). All rights reserved.
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
77 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
78 * improvements that they make and grant CSL redistribution rights.
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections. This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
86 * Copyright (c) 2005 SPARTA, Inc.
89 #include <mach/mach_types.h>
90 #include <mach/boolean.h>
91 #include <mach/host_priv.h>
92 #include <mach/machine/vm_types.h>
93 #include <mach/vm_param.h>
94 #include <mach/mach_vm.h>
95 #include <mach/semaphore.h>
96 #include <mach/task_info.h>
97 #include <mach/task_inspect.h>
98 #include <mach/task_special_ports.h>
101 #include <ipc/ipc_importance.h>
102 #include <ipc/ipc_types.h>
103 #include <ipc/ipc_space.h>
104 #include <ipc/ipc_entry.h>
105 #include <ipc/ipc_hash.h>
107 #include <kern/kern_types.h>
108 #include <kern/mach_param.h>
109 #include <kern/misc_protos.h>
110 #include <kern/task.h>
111 #include <kern/thread.h>
112 #include <kern/coalition.h>
113 #include <kern/zalloc.h>
114 #include <kern/kalloc.h>
115 #include <kern/kern_cdata.h>
116 #include <kern/processor.h>
117 #include <kern/sched_prim.h> /* for thread_wakeup */
118 #include <kern/ipc_tt.h>
119 #include <kern/host.h>
120 #include <kern/clock.h>
121 #include <kern/timer.h>
122 #include <kern/assert.h>
123 #include <kern/sync_lock.h>
124 #include <kern/affinity.h>
125 #include <kern/exc_resource.h>
126 #include <kern/machine.h>
127 #include <kern/policy_internal.h>
129 #include <corpses/task_corpse.h>
131 #include <kern/telemetry.h>
135 #include <kern/monotonic.h>
136 #include <machine/monotonic.h>
137 #endif /* MONOTONIC */
142 #include <vm/vm_map.h>
143 #include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
144 #include <vm/vm_pageout.h>
145 #include <vm/vm_protos.h>
146 #include <vm/vm_purgeable_internal.h>
148 #include <sys/resource.h>
149 #include <sys/signalvar.h> /* for coredump */
152 * Exported interfaces
155 #include <mach/task_server.h>
156 #include <mach/mach_host_server.h>
157 #include <mach/host_security_server.h>
158 #include <mach/mach_port_server.h>
160 #include <vm/vm_shared_region.h>
162 #include <libkern/OSDebug.h>
163 #include <libkern/OSAtomic.h>
166 #include <atm/atm_internal.h>
169 #include <kern/sfi.h> /* picks up ledger.h */
172 #include <security/mac_mach_internal.h>
176 extern int kpc_force_all_ctrs(task_t
, int);
181 lck_attr_t task_lck_attr
;
182 lck_grp_t task_lck_grp
;
183 lck_grp_attr_t task_lck_grp_attr
;
185 extern int exc_via_corpse_forking
;
186 extern int corpse_for_fatal_memkill
;
188 /* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
189 int audio_active
= 0;
191 zinfo_usage_store_t tasks_tkm_private
;
192 zinfo_usage_store_t tasks_tkm_shared
;
194 /* A container to accumulate statistics for expired tasks */
195 expired_task_statistics_t dead_task_statistics
;
196 lck_spin_t dead_task_statistics_lock
;
198 ledger_template_t task_ledger_template
= NULL
;
200 struct _task_ledger_indices task_ledgers
__attribute__((used
)) =
201 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
203 { 0 /* initialized at runtime */},
204 #endif /* !CONFIG_EMBEDDED */
210 /* System sleep state */
211 boolean_t tasks_suspend_state
;
214 void init_task_ledgers(void);
215 void task_footprint_exceeded(int warning
, __unused
const void *param0
, __unused
const void *param1
);
216 void task_wakeups_rate_exceeded(int warning
, __unused
const void *param0
, __unused
const void *param1
);
217 void task_io_rate_exceeded(int warning
, const void *param0
, __unused
const void *param1
);
218 void __attribute__((noinline
)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
219 void __attribute__((noinline
)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb
, boolean_t is_fatal
);
220 void __attribute__((noinline
)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor
);
222 kern_return_t
task_suspend_internal(task_t
);
223 kern_return_t
task_resume_internal(task_t
);
224 static kern_return_t
task_start_halt_locked(task_t task
, boolean_t should_mark_corpse
);
226 extern kern_return_t
iokit_task_terminate(task_t task
);
228 extern kern_return_t
exception_deliver(thread_t
, exception_type_t
, mach_exception_data_t
, mach_msg_type_number_t
, struct exception_action
*, lck_mtx_t
*);
229 extern void bsd_copythreadname(void *dst_uth
, void *src_uth
);
230 extern kern_return_t
thread_resume(thread_t thread
);
232 // Warn tasks when they hit 80% of their memory limit.
233 #define PHYS_FOOTPRINT_WARNING_LEVEL 80
235 #define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
236 #define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
239 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
241 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
242 * stacktraces, aka micro-stackshots)
244 #define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
246 int task_wakeups_monitor_interval
; /* In seconds. Time period over which wakeups rate is observed */
247 int task_wakeups_monitor_rate
; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
249 int task_wakeups_monitor_ustackshots_trigger_pct
; /* Percentage. Level at which we start gathering telemetry. */
251 int disable_exc_resource
; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
253 ledger_amount_t max_task_footprint
= 0; /* Per-task limit on physical memory consumption in bytes */
254 int max_task_footprint_warning_level
= 0; /* Per-task limit warning percentage */
255 int max_task_footprint_mb
= 0; /* Per-task limit on physical memory consumption in megabytes */
257 /* I/O Monitor Limits */
258 #define IOMON_DEFAULT_LIMIT (20480ull) /* MB of logical/physical I/O */
259 #define IOMON_DEFAULT_INTERVAL (86400ull) /* in seconds */
261 uint64_t task_iomon_limit_mb
; /* Per-task I/O monitor limit in MBs */
262 uint64_t task_iomon_interval_secs
; /* Per-task I/O monitor interval in secs */
264 #define IO_TELEMETRY_DEFAULT_LIMIT (10ll * 1024ll * 1024ll)
265 int64_t io_telemetry_limit
; /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
266 int64_t global_logical_writes_count
= 0; /* Global count for logical writes */
267 static boolean_t
global_update_logical_writes(int64_t);
270 int pmap_ledgers_panic
= 1;
271 #endif /* MACH_ASSERT */
273 int task_max
= CONFIG_TASK_MAX
; /* Max number of tasks */
276 int hwm_user_cores
= 0; /* high watermark violations generate user core files */
280 extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
281 extern int proc_pid(struct proc
*p
);
282 extern int proc_selfpid(void);
283 extern char *proc_name_address(struct proc
*p
);
284 extern uint64_t get_dispatchqueue_offset_from_proc(void *);
285 extern int kevent_proc_copy_uptrs(void *proc
, uint64_t *buf
, int bufsize
);
287 #if CONFIG_MEMORYSTATUS
288 extern void proc_memstat_terminated(struct proc
* p
, boolean_t set
);
289 extern void memorystatus_on_ledger_footprint_exceeded(int warning
, boolean_t memlimit_is_active
, boolean_t memlimit_is_fatal
);
290 extern void memorystatus_log_exception(const int max_footprint_mb
, boolean_t memlimit_is_active
, boolean_t memlimit_is_fatal
);
291 extern boolean_t
memorystatus_allowed_vm_map_fork(__unused task_t task
);
292 #endif /* CONFIG_MEMORYSTATUS */
294 #endif /* MACH_BSD */
298 static void task_hold_locked(task_t task
);
299 static void task_wait_locked(task_t task
, boolean_t until_not_runnable
);
300 static void task_release_locked(task_t task
);
302 static void task_synchronizer_destroy_all(task_t task
);
305 task_backing_store_privileged(
309 task
->priv_flags
|= VM_BACKING_STORE_PRIV
;
320 #if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
322 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
327 if (task_has_64BitAddr(task
))
329 task_set_64BitAddr(task
);
331 if ( !task_has_64BitAddr(task
))
333 task_clear_64BitAddr(task
);
335 /* FIXME: On x86, the thread save state flavor can diverge from the
336 * task's 64-bit feature flag due to the 32-bit/64-bit register save
337 * state dichotomy. Since we can be pre-empted in this interval,
338 * certain routines may observe the thread as being in an inconsistent
339 * state with respect to its task's 64-bitness.
342 #if defined(__x86_64__) || defined(__arm64__)
343 queue_iterate(&task
->threads
, thread
, thread_t
, task_threads
) {
344 thread_mtx_lock(thread
);
345 machine_thread_switch_addrmode(thread
);
346 thread_mtx_unlock(thread
);
348 #if defined(__arm64__)
349 /* specifically, if running on H9 */
350 if (thread
== current_thread()) {
353 spl_t spl
= splsched();
355 * This call tell that the current thread changed it's 32bitness.
356 * Other thread were no more on core when 32bitness was changed,
357 * but current_thread() is on core and the previous call to
358 * machine_thread_going_on_core() gave 32bitness which is now wrong.
360 * This is needed for bring-up, a different callback should be used
363 * TODO: Remove this callout when we no longer support 32-bit code on H9
366 urgency
= thread_get_urgency(thread
, &arg1
, &arg2
);
367 machine_thread_going_on_core(thread
, urgency
, 0, 0, mach_approximate_time());
368 thread_unlock(thread
);
371 #endif /* defined(__arm64__) */
373 #endif /* defined(__x86_64__) || defined(__arm64__) */
380 task_set_platform_binary(
382 boolean_t is_platform
)
386 task
->t_flags
|= TF_PLATFORM
;
388 task
->t_flags
&= ~(TF_PLATFORM
);
396 mach_vm_address_t addr
,
400 task
->all_image_info_addr
= addr
;
401 task
->all_image_info_size
= size
;
406 task_atm_reset(__unused task_t task
) {
409 if (task
->atm_context
!= NULL
) {
410 atm_task_descriptor_destroy(task
->atm_context
);
411 task
->atm_context
= NULL
;
418 task_bank_reset(__unused task_t task
) {
420 if (task
->bank_context
!= NULL
) {
421 bank_task_destroy(task
);
426 * NOTE: This should only be called when the P_LINTRANSIT
427 * flag is set (the proc_trans lock is held) on the
428 * proc associated with the task.
431 task_bank_init(__unused task_t task
) {
433 if (task
->bank_context
!= NULL
) {
434 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task
, task
->bank_context
);
436 bank_task_initialize(task
);
440 task_set_did_exec_flag(task_t task
)
442 task
->t_procflags
|= TPF_DID_EXEC
;
446 task_clear_exec_copy_flag(task_t task
)
448 task
->t_procflags
&= ~TPF_EXEC_COPY
;
452 * This wait event is t_procflags instead of t_flags because t_flags is volatile
454 * TODO: store the flags in the same place as the event
455 * rdar://problem/28501994
458 task_get_return_wait_event(task_t task
)
460 return (event_t
)&task
->t_procflags
;
464 task_clear_return_wait(task_t task
)
468 task
->t_flags
&= ~TF_LRETURNWAIT
;
470 if (task
->t_flags
& TF_LRETURNWAITER
) {
471 thread_wakeup(task_get_return_wait_event(task
));
472 task
->t_flags
&= ~TF_LRETURNWAITER
;
479 task_wait_to_return(void)
483 task
= current_task();
486 if (task
->t_flags
& TF_LRETURNWAIT
) {
488 task
->t_flags
|= TF_LRETURNWAITER
;
489 assert_wait(task_get_return_wait_event(task
), THREAD_UNINT
);
492 thread_block(THREAD_CONTINUE_NULL
);
495 } while (task
->t_flags
& TF_LRETURNWAIT
);
500 thread_bootstrap_return();
504 task_is_exec_copy(task_t task
)
506 return task_is_exec_copy_internal(task
);
510 task_did_exec(task_t task
)
512 return task_did_exec_internal(task
);
516 task_is_active(task_t task
)
522 task_is_halting(task_t task
)
524 return task
->halting
;
527 #if TASK_REFERENCE_LEAK_DEBUG
528 #include <kern/btlog.h>
530 static btlog_t
*task_ref_btlog
;
531 #define TASK_REF_OP_INCR 0x1
532 #define TASK_REF_OP_DECR 0x2
534 #define TASK_REF_NUM_RECORDS 100000
535 #define TASK_REF_BTDEPTH 7
538 task_reference_internal(task_t task
)
540 void * bt
[TASK_REF_BTDEPTH
];
543 numsaved
= OSBacktrace(bt
, TASK_REF_BTDEPTH
);
545 (void)hw_atomic_add(&(task
)->ref_count
, 1);
546 btlog_add_entry(task_ref_btlog
, task
, TASK_REF_OP_INCR
,
551 task_deallocate_internal(task_t task
)
553 void * bt
[TASK_REF_BTDEPTH
];
556 numsaved
= OSBacktrace(bt
, TASK_REF_BTDEPTH
);
558 btlog_add_entry(task_ref_btlog
, task
, TASK_REF_OP_DECR
,
560 return hw_atomic_sub(&(task
)->ref_count
, 1);
563 #endif /* TASK_REFERENCE_LEAK_DEBUG */
569 lck_grp_attr_setdefault(&task_lck_grp_attr
);
570 lck_grp_init(&task_lck_grp
, "task", &task_lck_grp_attr
);
571 lck_attr_setdefault(&task_lck_attr
);
572 lck_mtx_init(&tasks_threads_lock
, &task_lck_grp
, &task_lck_attr
);
573 lck_mtx_init(&tasks_corpse_lock
, &task_lck_grp
, &task_lck_attr
);
577 task_max
* sizeof(struct task
),
578 TASK_CHUNK
* sizeof(struct task
),
581 zone_change(task_zone
, Z_NOENCRYPT
, TRUE
);
585 #endif /* CONFIG_EMBEDDED */
588 * Configure per-task memory limit.
589 * The boot-arg is interpreted as Megabytes,
590 * and takes precedence over the device tree.
591 * Setting the boot-arg to 0 disables task limits.
593 if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb
,
594 sizeof (max_task_footprint_mb
))) {
596 * No limit was found in boot-args, so go look in the device tree.
598 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb
,
599 sizeof(max_task_footprint_mb
))) {
601 * No limit was found in device tree.
603 max_task_footprint_mb
= 0;
607 if (max_task_footprint_mb
!= 0) {
608 #if CONFIG_MEMORYSTATUS
609 if (max_task_footprint_mb
< 50) {
610 printf("Warning: max_task_pmem %d below minimum.\n",
611 max_task_footprint_mb
);
612 max_task_footprint_mb
= 50;
614 printf("Limiting task physical memory footprint to %d MB\n",
615 max_task_footprint_mb
);
617 max_task_footprint
= (ledger_amount_t
)max_task_footprint_mb
* 1024 * 1024; // Convert MB to bytes
620 * Configure the per-task memory limit warning level.
621 * This is computed as a percentage.
623 max_task_footprint_warning_level
= 0;
625 if (max_mem
< 0x40000000) {
627 * On devices with < 1GB of memory:
628 * -- set warnings to 50MB below the per-task limit.
630 if (max_task_footprint_mb
> 50) {
631 max_task_footprint_warning_level
= ((max_task_footprint_mb
- 50) * 100) / max_task_footprint_mb
;
635 * On devices with >= 1GB of memory:
636 * -- set warnings to 100MB below the per-task limit.
638 if (max_task_footprint_mb
> 100) {
639 max_task_footprint_warning_level
= ((max_task_footprint_mb
- 100) * 100) / max_task_footprint_mb
;
644 * Never allow warning level to land below the default.
646 if (max_task_footprint_warning_level
< PHYS_FOOTPRINT_WARNING_LEVEL
) {
647 max_task_footprint_warning_level
= PHYS_FOOTPRINT_WARNING_LEVEL
;
650 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level
);
653 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
654 #endif /* CONFIG_MEMORYSTATUS */
658 PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic
,
659 sizeof (pmap_ledgers_panic
));
660 #endif /* MACH_ASSERT */
663 if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores
,
664 sizeof (hwm_user_cores
))) {
669 proc_init_cpumon_params();
671 if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate
, sizeof (task_wakeups_monitor_rate
))) {
672 task_wakeups_monitor_rate
= TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT
;
675 if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval
, sizeof (task_wakeups_monitor_interval
))) {
676 task_wakeups_monitor_interval
= TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL
;
679 if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct
,
680 sizeof (task_wakeups_monitor_ustackshots_trigger_pct
))) {
681 task_wakeups_monitor_ustackshots_trigger_pct
= TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER
;
684 if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource
,
685 sizeof (disable_exc_resource
))) {
686 disable_exc_resource
= 0;
689 if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb
, sizeof (task_iomon_limit_mb
))) {
690 task_iomon_limit_mb
= IOMON_DEFAULT_LIMIT
;
693 if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs
, sizeof (task_iomon_interval_secs
))) {
694 task_iomon_interval_secs
= IOMON_DEFAULT_INTERVAL
;
697 if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit
, sizeof (io_telemetry_limit
))) {
698 io_telemetry_limit
= IO_TELEMETRY_DEFAULT_LIMIT
;
702 * If we have coalitions, coalition_init() will call init_task_ledgers() as it
703 * sets up the ledgers for the default coalition. If we don't have coalitions,
704 * then we have to call it now.
706 #if CONFIG_COALITIONS
707 assert(task_ledger_template
);
708 #else /* CONFIG_COALITIONS */
710 #endif /* CONFIG_COALITIONS */
712 #if TASK_REFERENCE_LEAK_DEBUG
713 task_ref_btlog
= btlog_create(TASK_REF_NUM_RECORDS
, TASK_REF_BTDEPTH
, TRUE
/* caller_will_remove_entries_for_element? */);
714 assert(task_ref_btlog
);
718 * Create the kernel task as the first task.
721 if (task_create_internal(TASK_NULL
, NULL
, FALSE
, TRUE
, TF_NONE
, TPF_NONE
, &kernel_task
) != KERN_SUCCESS
)
723 if (task_create_internal(TASK_NULL
, NULL
, FALSE
, FALSE
, TF_NONE
, TPF_NONE
, &kernel_task
) != KERN_SUCCESS
)
725 panic("task_init\n");
728 vm_map_deallocate(kernel_task
->map
);
729 kernel_task
->map
= kernel_map
;
730 lck_spin_init(&dead_task_statistics_lock
, &task_lck_grp
, &task_lck_attr
);
734 * Create a task running in the kernel address space. It may
735 * have its own map of size mem_size and may have ipc privileges.
739 __unused task_t parent_task
,
740 __unused vm_offset_t map_base
,
741 __unused vm_size_t map_size
,
742 __unused task_t
*child_task
)
744 return (KERN_INVALID_ARGUMENT
);
750 __unused ledger_port_array_t ledger_ports
,
751 __unused mach_msg_type_number_t num_ledger_ports
,
752 __unused boolean_t inherit_memory
,
753 __unused task_t
*child_task
) /* OUT */
755 if (parent_task
== TASK_NULL
)
756 return(KERN_INVALID_ARGUMENT
);
759 * No longer supported: too many calls assume that a task has a valid
762 return(KERN_FAILURE
);
766 host_security_create_task_token(
767 host_security_t host_security
,
769 __unused security_token_t sec_token
,
770 __unused audit_token_t audit_token
,
771 __unused host_priv_t host_priv
,
772 __unused ledger_port_array_t ledger_ports
,
773 __unused mach_msg_type_number_t num_ledger_ports
,
774 __unused boolean_t inherit_memory
,
775 __unused task_t
*child_task
) /* OUT */
777 if (parent_task
== TASK_NULL
)
778 return(KERN_INVALID_ARGUMENT
);
780 if (host_security
== HOST_NULL
)
781 return(KERN_INVALID_SECURITY
);
784 * No longer supported.
786 return(KERN_FAILURE
);
794 * Physical footprint: This is the sum of:
795 * + (internal - alternate_accounting)
796 * + (internal_compressed - alternate_accounting_compressed)
798 * + purgeable_nonvolatile
799 * + purgeable_nonvolatile_compressed
803 * The task's anonymous memory, which on iOS is always resident.
805 * internal_compressed
806 * Amount of this task's internal memory which is held by the compressor.
807 * Such memory is no longer actually resident for the task [i.e., resident in its pmap],
808 * and could be either decompressed back into memory, or paged out to storage, depending
809 * on our implementation.
812 * IOKit mappings: The total size of all IOKit mappings in this task, regardless of
813 clean/dirty or internal/external state].
815 * alternate_accounting
816 * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
817 * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
821 init_task_ledgers(void)
825 assert(task_ledger_template
== NULL
);
826 assert(kernel_task
== TASK_NULL
);
829 PE_parse_boot_argn("pmap_ledgers_panic", &pmap_ledgers_panic
,
830 sizeof (pmap_ledgers_panic
));
831 #endif /* MACH_ASSERT */
833 if ((t
= ledger_template_create("Per-task ledger")) == NULL
)
834 panic("couldn't create task ledger template");
836 task_ledgers
.cpu_time
= ledger_entry_add(t
, "cpu_time", "sched", "ns");
837 task_ledgers
.tkm_private
= ledger_entry_add(t
, "tkm_private",
839 task_ledgers
.tkm_shared
= ledger_entry_add(t
, "tkm_shared", "physmem",
841 task_ledgers
.phys_mem
= ledger_entry_add(t
, "phys_mem", "physmem",
843 task_ledgers
.wired_mem
= ledger_entry_add(t
, "wired_mem", "physmem",
845 task_ledgers
.internal
= ledger_entry_add(t
, "internal", "physmem",
847 task_ledgers
.iokit_mapped
= ledger_entry_add(t
, "iokit_mapped", "mappings",
849 task_ledgers
.alternate_accounting
= ledger_entry_add(t
, "alternate_accounting", "physmem",
851 task_ledgers
.alternate_accounting_compressed
= ledger_entry_add(t
, "alternate_accounting_compressed", "physmem",
853 task_ledgers
.page_table
= ledger_entry_add(t
, "page_table", "physmem",
855 task_ledgers
.phys_footprint
= ledger_entry_add(t
, "phys_footprint", "physmem",
857 task_ledgers
.internal_compressed
= ledger_entry_add(t
, "internal_compressed", "physmem",
859 task_ledgers
.purgeable_volatile
= ledger_entry_add(t
, "purgeable_volatile", "physmem", "bytes");
860 task_ledgers
.purgeable_nonvolatile
= ledger_entry_add(t
, "purgeable_nonvolatile", "physmem", "bytes");
861 task_ledgers
.purgeable_volatile_compressed
= ledger_entry_add(t
, "purgeable_volatile_compress", "physmem", "bytes");
862 task_ledgers
.purgeable_nonvolatile_compressed
= ledger_entry_add(t
, "purgeable_nonvolatile_compress", "physmem", "bytes");
863 task_ledgers
.platform_idle_wakeups
= ledger_entry_add(t
, "platform_idle_wakeups", "power",
865 task_ledgers
.interrupt_wakeups
= ledger_entry_add(t
, "interrupt_wakeups", "power",
869 sfi_class_id_t class_id
, ledger_alias
;
870 for (class_id
= SFI_CLASS_UNSPECIFIED
; class_id
< MAX_SFI_CLASS_ID
; class_id
++) {
871 task_ledgers
.sfi_wait_times
[class_id
] = -1;
874 /* don't account for UNSPECIFIED */
875 for (class_id
= SFI_CLASS_UNSPECIFIED
+ 1; class_id
< MAX_SFI_CLASS_ID
; class_id
++) {
876 ledger_alias
= sfi_get_ledger_alias_for_class(class_id
);
877 if (ledger_alias
!= SFI_CLASS_UNSPECIFIED
) {
878 /* Check to see if alias has been registered yet */
879 if (task_ledgers
.sfi_wait_times
[ledger_alias
] != -1) {
880 task_ledgers
.sfi_wait_times
[class_id
] = task_ledgers
.sfi_wait_times
[ledger_alias
];
882 /* Otherwise, initialize it first */
883 task_ledgers
.sfi_wait_times
[class_id
] = task_ledgers
.sfi_wait_times
[ledger_alias
] = sfi_ledger_entry_add(t
, ledger_alias
);
886 task_ledgers
.sfi_wait_times
[class_id
] = sfi_ledger_entry_add(t
, class_id
);
889 if (task_ledgers
.sfi_wait_times
[class_id
] < 0) {
890 panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id
);
894 assert(task_ledgers
.sfi_wait_times
[MAX_SFI_CLASS_ID
-1] != -1);
895 #endif /* CONFIG_SCHED_SFI */
897 task_ledgers
.cpu_time_billed_to_me
= ledger_entry_add(t
, "cpu_time_billed_to_me", "sched", "ns");
898 task_ledgers
.cpu_time_billed_to_others
= ledger_entry_add(t
, "cpu_time_billed_to_others", "sched", "ns");
899 task_ledgers
.physical_writes
= ledger_entry_add(t
, "physical_writes", "res", "bytes");
900 task_ledgers
.logical_writes
= ledger_entry_add(t
, "logical_writes", "res", "bytes");
901 task_ledgers
.energy_billed_to_me
= ledger_entry_add(t
, "energy_billed_to_me", "power", "nj");
902 task_ledgers
.energy_billed_to_others
= ledger_entry_add(t
, "energy_billed_to_others", "power", "nj");
904 if ((task_ledgers
.cpu_time
< 0) ||
905 (task_ledgers
.tkm_private
< 0) ||
906 (task_ledgers
.tkm_shared
< 0) ||
907 (task_ledgers
.phys_mem
< 0) ||
908 (task_ledgers
.wired_mem
< 0) ||
909 (task_ledgers
.internal
< 0) ||
910 (task_ledgers
.iokit_mapped
< 0) ||
911 (task_ledgers
.alternate_accounting
< 0) ||
912 (task_ledgers
.alternate_accounting_compressed
< 0) ||
913 (task_ledgers
.page_table
< 0) ||
914 (task_ledgers
.phys_footprint
< 0) ||
915 (task_ledgers
.internal_compressed
< 0) ||
916 (task_ledgers
.purgeable_volatile
< 0) ||
917 (task_ledgers
.purgeable_nonvolatile
< 0) ||
918 (task_ledgers
.purgeable_volatile_compressed
< 0) ||
919 (task_ledgers
.purgeable_nonvolatile_compressed
< 0) ||
920 (task_ledgers
.platform_idle_wakeups
< 0) ||
921 (task_ledgers
.interrupt_wakeups
< 0) ||
922 (task_ledgers
.cpu_time_billed_to_me
< 0) || (task_ledgers
.cpu_time_billed_to_others
< 0) ||
923 (task_ledgers
.physical_writes
< 0) ||
924 (task_ledgers
.logical_writes
< 0) ||
925 (task_ledgers
.energy_billed_to_me
< 0) ||
926 (task_ledgers
.energy_billed_to_others
< 0)
928 panic("couldn't create entries for task ledger template");
931 ledger_track_credit_only(t
, task_ledgers
.phys_footprint
);
932 ledger_track_credit_only(t
, task_ledgers
.page_table
);
933 ledger_track_credit_only(t
, task_ledgers
.internal
);
934 ledger_track_credit_only(t
, task_ledgers
.internal_compressed
);
935 ledger_track_credit_only(t
, task_ledgers
.iokit_mapped
);
936 ledger_track_credit_only(t
, task_ledgers
.alternate_accounting
);
937 ledger_track_credit_only(t
, task_ledgers
.alternate_accounting_compressed
);
938 ledger_track_credit_only(t
, task_ledgers
.purgeable_volatile
);
939 ledger_track_credit_only(t
, task_ledgers
.purgeable_nonvolatile
);
940 ledger_track_credit_only(t
, task_ledgers
.purgeable_volatile_compressed
);
941 ledger_track_credit_only(t
, task_ledgers
.purgeable_nonvolatile_compressed
);
943 ledger_track_maximum(t
, task_ledgers
.phys_footprint
, 60);
945 if (pmap_ledgers_panic
) {
946 ledger_panic_on_negative(t
, task_ledgers
.phys_footprint
);
947 ledger_panic_on_negative(t
, task_ledgers
.page_table
);
948 ledger_panic_on_negative(t
, task_ledgers
.internal
);
949 ledger_panic_on_negative(t
, task_ledgers
.internal_compressed
);
950 ledger_panic_on_negative(t
, task_ledgers
.iokit_mapped
);
951 ledger_panic_on_negative(t
, task_ledgers
.alternate_accounting
);
952 ledger_panic_on_negative(t
, task_ledgers
.alternate_accounting_compressed
);
953 ledger_panic_on_negative(t
, task_ledgers
.purgeable_volatile
);
954 ledger_panic_on_negative(t
, task_ledgers
.purgeable_nonvolatile
);
955 ledger_panic_on_negative(t
, task_ledgers
.purgeable_volatile_compressed
);
956 ledger_panic_on_negative(t
, task_ledgers
.purgeable_nonvolatile_compressed
);
958 #endif /* MACH_ASSERT */
960 #if CONFIG_MEMORYSTATUS
961 ledger_set_callback(t
, task_ledgers
.phys_footprint
, task_footprint_exceeded
, NULL
, NULL
);
962 #endif /* CONFIG_MEMORYSTATUS */
964 ledger_set_callback(t
, task_ledgers
.interrupt_wakeups
,
965 task_wakeups_rate_exceeded
, NULL
, NULL
);
966 ledger_set_callback(t
, task_ledgers
.physical_writes
, task_io_rate_exceeded
, (void *)FLAVOR_IO_PHYSICAL_WRITES
, NULL
);
967 ledger_set_callback(t
, task_ledgers
.logical_writes
, task_io_rate_exceeded
, (void *)FLAVOR_IO_LOGICAL_WRITES
, NULL
);
969 ledger_template_complete(t
);
970 task_ledger_template
= t
;
974 task_create_internal(
976 coalition_t
*parent_coalitions __unused
,
977 boolean_t inherit_memory
,
978 __unused boolean_t is_64bit
,
980 uint32_t t_procflags
,
981 task_t
*child_task
) /* OUT */
984 vm_shared_region_t shared_region
;
985 ledger_t ledger
= NULL
;
987 new_task
= (task_t
) zalloc(task_zone
);
989 if (new_task
== TASK_NULL
)
990 return(KERN_RESOURCE_SHORTAGE
);
992 /* one ref for just being alive; one for our caller */
993 new_task
->ref_count
= 2;
995 /* allocate with active entries */
996 assert(task_ledger_template
!= NULL
);
997 if ((ledger
= ledger_instantiate(task_ledger_template
,
998 LEDGER_CREATE_ACTIVE_ENTRIES
)) == NULL
) {
999 zfree(task_zone
, new_task
);
1000 return(KERN_RESOURCE_SHORTAGE
);
1004 new_task
->ledger
= ledger
;
1006 #if defined(CONFIG_SCHED_MULTIQ)
1007 new_task
->sched_group
= sched_group_create();
1010 /* if inherit_memory is true, parent_task MUST not be NULL */
1011 if (!(t_flags
& TF_CORPSE_FORK
) && inherit_memory
)
1012 new_task
->map
= vm_map_fork(ledger
, parent_task
->map
, 0);
1014 new_task
->map
= vm_map_create(pmap_create(ledger
, 0, is_64bit
),
1015 (vm_map_offset_t
)(VM_MIN_ADDRESS
),
1016 (vm_map_offset_t
)(VM_MAX_ADDRESS
), TRUE
);
1018 /* Inherit memlock limit from parent */
1020 vm_map_set_user_wire_limit(new_task
->map
, (vm_size_t
)parent_task
->map
->user_wire_limit
);
1022 lck_mtx_init(&new_task
->lock
, &task_lck_grp
, &task_lck_attr
);
1023 queue_init(&new_task
->threads
);
1024 new_task
->suspend_count
= 0;
1025 new_task
->thread_count
= 0;
1026 new_task
->active_thread_count
= 0;
1027 new_task
->user_stop_count
= 0;
1028 new_task
->legacy_stop_count
= 0;
1029 new_task
->active
= TRUE
;
1030 new_task
->halting
= FALSE
;
1031 new_task
->user_data
= NULL
;
1032 new_task
->priv_flags
= 0;
1033 new_task
->t_flags
= t_flags
;
1034 new_task
->t_procflags
= t_procflags
;
1035 new_task
->importance
= 0;
1036 new_task
->crashed_thread_id
= 0;
1037 new_task
->exec_token
= 0;
1040 new_task
->atm_context
= NULL
;
1042 new_task
->bank_context
= NULL
;
1045 new_task
->bsd_info
= NULL
;
1046 new_task
->corpse_info
= NULL
;
1047 #endif /* MACH_BSD */
1050 new_task
->crash_label
= NULL
;
1053 #if CONFIG_MEMORYSTATUS
1054 if (max_task_footprint
!= 0) {
1055 ledger_set_limit(ledger
, task_ledgers
.phys_footprint
, max_task_footprint
, PHYS_FOOTPRINT_WARNING_LEVEL
);
1057 #endif /* CONFIG_MEMORYSTATUS */
1059 if (task_wakeups_monitor_rate
!= 0) {
1060 uint32_t flags
= WAKEMON_ENABLE
| WAKEMON_SET_DEFAULTS
;
1061 int32_t rate
; // Ignored because of WAKEMON_SET_DEFAULTS
1062 task_wakeups_monitor_ctl(new_task
, &flags
, &rate
);
1065 #if CONFIG_IO_ACCOUNTING
1066 uint32_t flags
= IOMON_ENABLE
;
1067 task_io_monitor_ctl(new_task
, &flags
);
1068 #endif /* CONFIG_IO_ACCOUNTING */
1070 machine_task_init(new_task
, parent_task
, inherit_memory
);
1072 new_task
->task_debug
= NULL
;
1074 #if DEVELOPMENT || DEBUG
1075 new_task
->task_unnested
= FALSE
;
1076 new_task
->task_disconnected_count
= 0;
1078 queue_init(&new_task
->semaphore_list
);
1079 new_task
->semaphores_owned
= 0;
1081 ipc_task_init(new_task
, parent_task
);
1083 new_task
->vtimers
= 0;
1085 new_task
->shared_region
= NULL
;
1087 new_task
->affinity_space
= NULL
;
1089 new_task
->t_chud
= 0;
1091 new_task
->pidsuspended
= FALSE
;
1092 new_task
->frozen
= FALSE
;
1093 new_task
->changing_freeze_state
= FALSE
;
1094 new_task
->rusage_cpu_flags
= 0;
1095 new_task
->rusage_cpu_percentage
= 0;
1096 new_task
->rusage_cpu_interval
= 0;
1097 new_task
->rusage_cpu_deadline
= 0;
1098 new_task
->rusage_cpu_callt
= NULL
;
1100 new_task
->suspends_outstanding
= 0;
1104 new_task
->hv_task_target
= NULL
;
1105 #endif /* HYPERVISOR */
1108 queue_init(&new_task
->task_watchers
);
1109 new_task
->num_taskwatchers
= 0;
1110 new_task
->watchapplying
= 0;
1111 #endif /* CONFIG_EMBEDDED */
1113 new_task
->mem_notify_reserved
= 0;
1114 new_task
->memlimit_attrs_reserved
= 0;
1115 #if IMPORTANCE_INHERITANCE
1116 new_task
->task_imp_base
= NULL
;
1117 #endif /* IMPORTANCE_INHERITANCE */
1119 new_task
->requested_policy
= default_task_requested_policy
;
1120 new_task
->effective_policy
= default_task_effective_policy
;
1122 if (parent_task
!= TASK_NULL
) {
1123 new_task
->sec_token
= parent_task
->sec_token
;
1124 new_task
->audit_token
= parent_task
->audit_token
;
1126 /* inherit the parent's shared region */
1127 shared_region
= vm_shared_region_get(parent_task
);
1128 vm_shared_region_set(new_task
, shared_region
);
1130 if(task_has_64BitAddr(parent_task
))
1131 task_set_64BitAddr(new_task
);
1132 new_task
->all_image_info_addr
= parent_task
->all_image_info_addr
;
1133 new_task
->all_image_info_size
= parent_task
->all_image_info_size
;
1135 if (inherit_memory
&& parent_task
->affinity_space
)
1136 task_affinity_create(parent_task
, new_task
);
1138 new_task
->pset_hint
= parent_task
->pset_hint
= task_choose_pset(parent_task
);
1140 #if IMPORTANCE_INHERITANCE
1141 ipc_importance_task_t new_task_imp
= IIT_NULL
;
1142 boolean_t inherit_receive
= TRUE
;
1144 if (task_is_marked_importance_donor(parent_task
)) {
1145 new_task_imp
= ipc_importance_for_task(new_task
, FALSE
);
1146 assert(IIT_NULL
!= new_task_imp
);
1147 ipc_importance_task_mark_donor(new_task_imp
, TRUE
);
1150 /* Embedded only wants to inherit for exec copy task */
1151 if ((t_procflags
& TPF_EXEC_COPY
) == 0) {
1152 inherit_receive
= FALSE
;
1154 #endif /* CONFIG_EMBEDDED */
1156 if (inherit_receive
) {
1157 if (task_is_marked_importance_receiver(parent_task
)) {
1158 if (IIT_NULL
== new_task_imp
)
1159 new_task_imp
= ipc_importance_for_task(new_task
, FALSE
);
1160 assert(IIT_NULL
!= new_task_imp
);
1161 ipc_importance_task_mark_receiver(new_task_imp
, TRUE
);
1163 if (task_is_marked_importance_denap_receiver(parent_task
)) {
1164 if (IIT_NULL
== new_task_imp
)
1165 new_task_imp
= ipc_importance_for_task(new_task
, FALSE
);
1166 assert(IIT_NULL
!= new_task_imp
);
1167 ipc_importance_task_mark_denap_receiver(new_task_imp
, TRUE
);
1171 if (IIT_NULL
!= new_task_imp
) {
1172 assert(new_task
->task_imp_base
== new_task_imp
);
1173 ipc_importance_task_release(new_task_imp
);
1175 #endif /* IMPORTANCE_INHERITANCE */
1177 new_task
->priority
= BASEPRI_DEFAULT
;
1178 new_task
->max_priority
= MAXPRI_USER
;
1180 task_policy_create(new_task
, parent_task
);
1182 new_task
->sec_token
= KERNEL_SECURITY_TOKEN
;
1183 new_task
->audit_token
= KERNEL_AUDIT_TOKEN
;
1186 task_set_64BitAddr(new_task
);
1188 new_task
->all_image_info_addr
= (mach_vm_address_t
)0;
1189 new_task
->all_image_info_size
= (mach_vm_size_t
)0;
1191 new_task
->pset_hint
= PROCESSOR_SET_NULL
;
1193 if (kernel_task
== TASK_NULL
) {
1194 new_task
->priority
= BASEPRI_KERNEL
;
1195 new_task
->max_priority
= MAXPRI_KERNEL
;
1197 new_task
->priority
= BASEPRI_DEFAULT
;
1198 new_task
->max_priority
= MAXPRI_USER
;
1202 bzero(new_task
->coalition
, sizeof(new_task
->coalition
));
1203 for (int i
= 0; i
< COALITION_NUM_TYPES
; i
++)
1204 queue_chain_init(new_task
->task_coalition
[i
]);
1206 /* Allocate I/O Statistics */
1207 new_task
->task_io_stats
= (io_stat_info_t
)kalloc(sizeof(struct io_stat_info
));
1208 assert(new_task
->task_io_stats
!= NULL
);
1209 bzero(new_task
->task_io_stats
, sizeof(struct io_stat_info
));
1211 bzero(&(new_task
->cpu_time_qos_stats
), sizeof(struct _cpu_time_qos_stats
));
1213 bzero(&new_task
->extmod_statistics
, sizeof(new_task
->extmod_statistics
));
1215 /* Copy resource acc. info from Parent for Corpe Forked task. */
1216 if (parent_task
!= NULL
&& (t_flags
& TF_CORPSE_FORK
)) {
1217 task_rollup_accounting_info(new_task
, parent_task
);
1219 /* Initialize to zero for standard fork/spawn case */
1220 new_task
->total_user_time
= 0;
1221 new_task
->total_system_time
= 0;
1222 new_task
->total_ptime
= 0;
1223 new_task
->faults
= 0;
1224 new_task
->pageins
= 0;
1225 new_task
->cow_faults
= 0;
1226 new_task
->messages_sent
= 0;
1227 new_task
->messages_received
= 0;
1228 new_task
->syscalls_mach
= 0;
1229 new_task
->syscalls_unix
= 0;
1230 new_task
->c_switch
= 0;
1231 new_task
->p_switch
= 0;
1232 new_task
->ps_switch
= 0;
1233 new_task
->low_mem_notified_warn
= 0;
1234 new_task
->low_mem_notified_critical
= 0;
1235 new_task
->purged_memory_warn
= 0;
1236 new_task
->purged_memory_critical
= 0;
1237 new_task
->low_mem_privileged_listener
= 0;
1238 new_task
->memlimit_is_active
= 0;
1239 new_task
->memlimit_is_fatal
= 0;
1240 new_task
->memlimit_active_exc_resource
= 0;
1241 new_task
->memlimit_inactive_exc_resource
= 0;
1242 new_task
->task_timer_wakeups_bin_1
= 0;
1243 new_task
->task_timer_wakeups_bin_2
= 0;
1244 new_task
->task_gpu_ns
= 0;
1245 new_task
->task_immediate_writes
= 0;
1246 new_task
->task_deferred_writes
= 0;
1247 new_task
->task_invalidated_writes
= 0;
1248 new_task
->task_metadata_writes
= 0;
1249 new_task
->task_energy
= 0;
1251 memset(&new_task
->task_monotonic
, 0, sizeof(new_task
->task_monotonic
));
1252 #endif /* MONOTONIC */
1256 #if CONFIG_COALITIONS
1257 if (!(t_flags
& TF_CORPSE_FORK
)) {
1258 /* TODO: there is no graceful failure path here... */
1259 if (parent_coalitions
&& parent_coalitions
[COALITION_TYPE_RESOURCE
]) {
1260 coalitions_adopt_task(parent_coalitions
, new_task
);
1261 } else if (parent_task
&& parent_task
->coalition
[COALITION_TYPE_RESOURCE
]) {
1263 * all tasks at least have a resource coalition, so
1264 * if the parent has one then inherit all coalitions
1265 * the parent is a part of
1267 coalitions_adopt_task(parent_task
->coalition
, new_task
);
1269 /* TODO: assert that new_task will be PID 1 (launchd) */
1270 coalitions_adopt_init_task(new_task
);
1273 * on exec, we need to transfer the coalition roles from the
1274 * parent task to the exec copy task.
1276 if (parent_task
&& (t_procflags
& TPF_EXEC_COPY
)) {
1277 int coal_roles
[COALITION_NUM_TYPES
];
1278 task_coalition_roles(parent_task
, coal_roles
);
1279 (void)coalitions_set_roles(new_task
->coalition
, new_task
, coal_roles
);
1282 coalitions_adopt_corpse_task(new_task
);
1285 if (new_task
->coalition
[COALITION_TYPE_RESOURCE
] == COALITION_NULL
) {
1286 panic("created task is not a member of a resource coalition");
1288 #endif /* CONFIG_COALITIONS */
1290 new_task
->dispatchqueue_offset
= 0;
1291 if (parent_task
!= NULL
) {
1292 new_task
->dispatchqueue_offset
= parent_task
->dispatchqueue_offset
;
1295 if (vm_backing_store_low
&& parent_task
!= NULL
)
1296 new_task
->priv_flags
|= (parent_task
->priv_flags
&VM_BACKING_STORE_PRIV
);
1298 new_task
->task_volatile_objects
= 0;
1299 new_task
->task_nonvolatile_objects
= 0;
1300 new_task
->task_purgeable_disowning
= FALSE
;
1301 new_task
->task_purgeable_disowned
= FALSE
;
1303 #if CONFIG_SECLUDED_MEMORY
1304 new_task
->task_can_use_secluded_mem
= FALSE
;
1305 new_task
->task_could_use_secluded_mem
= FALSE
;
1306 new_task
->task_could_also_use_secluded_mem
= FALSE
;
1307 #endif /* CONFIG_SECLUDED_MEMORY */
1309 queue_init(&new_task
->io_user_clients
);
1311 ipc_task_enable(new_task
);
1313 lck_mtx_lock(&tasks_threads_lock
);
1314 queue_enter(&tasks
, new_task
, task_t
, tasks
);
1316 if (tasks_suspend_state
) {
1317 task_suspend_internal(new_task
);
1319 lck_mtx_unlock(&tasks_threads_lock
);
1321 *child_task
= new_task
;
1322 return(KERN_SUCCESS
);
1326 * task_rollup_accounting_info
1328 * Roll up accounting stats. Used to rollup stats
1329 * for exec copy task and corpse fork.
1332 task_rollup_accounting_info(task_t to_task
, task_t from_task
)
1334 assert(from_task
!= to_task
);
1336 to_task
->total_user_time
= from_task
->total_user_time
;
1337 to_task
->total_system_time
= from_task
->total_system_time
;
1338 to_task
->total_ptime
= from_task
->total_ptime
;
1339 to_task
->faults
= from_task
->faults
;
1340 to_task
->pageins
= from_task
->pageins
;
1341 to_task
->cow_faults
= from_task
->cow_faults
;
1342 to_task
->messages_sent
= from_task
->messages_sent
;
1343 to_task
->messages_received
= from_task
->messages_received
;
1344 to_task
->syscalls_mach
= from_task
->syscalls_mach
;
1345 to_task
->syscalls_unix
= from_task
->syscalls_unix
;
1346 to_task
->c_switch
= from_task
->c_switch
;
1347 to_task
->p_switch
= from_task
->p_switch
;
1348 to_task
->ps_switch
= from_task
->ps_switch
;
1349 to_task
->extmod_statistics
= from_task
->extmod_statistics
;
1350 to_task
->low_mem_notified_warn
= from_task
->low_mem_notified_warn
;
1351 to_task
->low_mem_notified_critical
= from_task
->low_mem_notified_critical
;
1352 to_task
->purged_memory_warn
= from_task
->purged_memory_warn
;
1353 to_task
->purged_memory_critical
= from_task
->purged_memory_critical
;
1354 to_task
->low_mem_privileged_listener
= from_task
->low_mem_privileged_listener
;
1355 *to_task
->task_io_stats
= *from_task
->task_io_stats
;
1356 to_task
->cpu_time_qos_stats
= from_task
->cpu_time_qos_stats
;
1357 to_task
->task_timer_wakeups_bin_1
= from_task
->task_timer_wakeups_bin_1
;
1358 to_task
->task_timer_wakeups_bin_2
= from_task
->task_timer_wakeups_bin_2
;
1359 to_task
->task_gpu_ns
= from_task
->task_gpu_ns
;
1360 to_task
->task_immediate_writes
= from_task
->task_immediate_writes
;
1361 to_task
->task_deferred_writes
= from_task
->task_deferred_writes
;
1362 to_task
->task_invalidated_writes
= from_task
->task_invalidated_writes
;
1363 to_task
->task_metadata_writes
= from_task
->task_metadata_writes
;
1364 to_task
->task_energy
= from_task
->task_energy
;
1366 /* Skip ledger roll up for memory accounting entries */
1367 ledger_rollup_entry(to_task
->ledger
, from_task
->ledger
, task_ledgers
.cpu_time
);
1368 ledger_rollup_entry(to_task
->ledger
, from_task
->ledger
, task_ledgers
.platform_idle_wakeups
);
1369 ledger_rollup_entry(to_task
->ledger
, from_task
->ledger
, task_ledgers
.interrupt_wakeups
);
1370 #if CONFIG_SCHED_SFI
1371 for (sfi_class_id_t class_id
= SFI_CLASS_UNSPECIFIED
; class_id
< MAX_SFI_CLASS_ID
; class_id
++) {
1372 ledger_rollup_entry(to_task
->ledger
, from_task
->ledger
, task_ledgers
.sfi_wait_times
[class_id
]);
1375 ledger_rollup_entry(to_task
->ledger
, from_task
->ledger
, task_ledgers
.cpu_time_billed_to_me
);
1376 ledger_rollup_entry(to_task
->ledger
, from_task
->ledger
, task_ledgers
.cpu_time_billed_to_others
);
1377 ledger_rollup_entry(to_task
->ledger
, from_task
->ledger
, task_ledgers
.physical_writes
);
1378 ledger_rollup_entry(to_task
->ledger
, from_task
->ledger
, task_ledgers
.logical_writes
);
1379 ledger_rollup_entry(to_task
->ledger
, from_task
->ledger
, task_ledgers
.energy_billed_to_me
);
1380 ledger_rollup_entry(to_task
->ledger
, from_task
->ledger
, task_ledgers
.energy_billed_to_others
);
1383 int task_dropped_imp_count
= 0;
1388 * Drop a reference on a task.
1394 ledger_amount_t credit
, debit
, interrupt_wakeups
, platform_idle_wakeups
;
1397 if (task
== TASK_NULL
)
1400 refs
= task_deallocate_internal(task
);
1402 #if IMPORTANCE_INHERITANCE
1406 atomic_load_explicit(&task
->ref_count
, memory_order_acquire
);
1410 * If last ref potentially comes from the task's importance,
1411 * disconnect it. But more task refs may be added before
1412 * that completes, so wait for the reference to go to zero
1413 * naturually (it may happen on a recursive task_deallocate()
1414 * from the ipc_importance_disconnect_task() call).
1416 if (IIT_NULL
!= task
->task_imp_base
)
1417 ipc_importance_disconnect_task(task
);
1424 atomic_load_explicit(&task
->ref_count
, memory_order_acquire
);
1426 #endif /* IMPORTANCE_INHERITANCE */
1428 lck_mtx_lock(&tasks_threads_lock
);
1429 queue_remove(&terminated_tasks
, task
, task_t
, tasks
);
1430 terminated_tasks_count
--;
1431 lck_mtx_unlock(&tasks_threads_lock
);
1434 * remove the reference on atm descriptor
1436 task_atm_reset(task
);
1439 * remove the reference on bank context
1441 task_bank_reset(task
);
1443 if (task
->task_io_stats
)
1444 kfree(task
->task_io_stats
, sizeof(struct io_stat_info
));
1447 * Give the machine dependent code a chance
1448 * to perform cleanup before ripping apart
1451 machine_task_terminate(task
);
1453 ipc_task_terminate(task
);
1455 /* let iokit know */
1456 iokit_task_terminate(task
);
1458 if (task
->affinity_space
)
1459 task_affinity_deallocate(task
);
1462 if (task
->ledger
!= NULL
&&
1463 task
->map
!= NULL
&&
1464 task
->map
->pmap
!= NULL
&&
1465 task
->map
->pmap
->ledger
!= NULL
) {
1466 assert(task
->ledger
== task
->map
->pmap
->ledger
);
1468 #endif /* MACH_ASSERT */
1470 vm_purgeable_disown(task
);
1471 assert(task
->task_purgeable_disowned
);
1472 if (task
->task_volatile_objects
!= 0 ||
1473 task
->task_nonvolatile_objects
!= 0) {
1474 panic("task_deallocate(%p): "
1475 "volatile_objects=%d nonvolatile_objects=%d\n",
1477 task
->task_volatile_objects
,
1478 task
->task_nonvolatile_objects
);
1481 vm_map_deallocate(task
->map
);
1482 is_release(task
->itk_space
);
1484 ledger_get_entries(task
->ledger
, task_ledgers
.interrupt_wakeups
,
1485 &interrupt_wakeups
, &debit
);
1486 ledger_get_entries(task
->ledger
, task_ledgers
.platform_idle_wakeups
,
1487 &platform_idle_wakeups
, &debit
);
1489 #if defined(CONFIG_SCHED_MULTIQ)
1490 sched_group_destroy(task
->sched_group
);
1493 /* Accumulate statistics for dead tasks */
1494 lck_spin_lock(&dead_task_statistics_lock
);
1495 dead_task_statistics
.total_user_time
+= task
->total_user_time
;
1496 dead_task_statistics
.total_system_time
+= task
->total_system_time
;
1498 dead_task_statistics
.task_interrupt_wakeups
+= interrupt_wakeups
;
1499 dead_task_statistics
.task_platform_idle_wakeups
+= platform_idle_wakeups
;
1501 dead_task_statistics
.task_timer_wakeups_bin_1
+= task
->task_timer_wakeups_bin_1
;
1502 dead_task_statistics
.task_timer_wakeups_bin_2
+= task
->task_timer_wakeups_bin_2
;
1503 dead_task_statistics
.total_ptime
+= task
->total_ptime
;
1504 dead_task_statistics
.total_pset_switches
+= task
->ps_switch
;
1505 dead_task_statistics
.task_gpu_ns
+= task
->task_gpu_ns
;
1506 dead_task_statistics
.task_energy
+= task
->task_energy
;
1508 lck_spin_unlock(&dead_task_statistics_lock
);
1509 lck_mtx_destroy(&task
->lock
, &task_lck_grp
);
1511 if (!ledger_get_entries(task
->ledger
, task_ledgers
.tkm_private
, &credit
,
1513 OSAddAtomic64(credit
, (int64_t *)&tasks_tkm_private
.alloc
);
1514 OSAddAtomic64(debit
, (int64_t *)&tasks_tkm_private
.free
);
1516 if (!ledger_get_entries(task
->ledger
, task_ledgers
.tkm_shared
, &credit
,
1518 OSAddAtomic64(credit
, (int64_t *)&tasks_tkm_shared
.alloc
);
1519 OSAddAtomic64(debit
, (int64_t *)&tasks_tkm_shared
.free
);
1521 ledger_dereference(task
->ledger
);
1523 #if TASK_REFERENCE_LEAK_DEBUG
1524 btlog_remove_entries_for_element(task_ref_btlog
, task
);
1527 #if CONFIG_COALITIONS
1528 task_release_coalitions(task
);
1529 #endif /* CONFIG_COALITIONS */
1531 bzero(task
->coalition
, sizeof(task
->coalition
));
1534 /* clean up collected information since last reference to task is gone */
1535 if (task
->corpse_info
) {
1536 void *corpse_info_kernel
= kcdata_memory_get_begin_addr(task
->corpse_info
);
1537 task_crashinfo_destroy(task
->corpse_info
);
1538 task
->corpse_info
= NULL
;
1539 if (corpse_info_kernel
) {
1540 kfree(corpse_info_kernel
, CORPSEINFO_ALLOCATION_SIZE
);
1546 if (task
->crash_label
) {
1547 mac_exc_free_label(task
->crash_label
);
1548 task
->crash_label
= NULL
;
1552 zfree(task_zone
, task
);
1556 * task_name_deallocate:
1558 * Drop a reference on a task name.
1561 task_name_deallocate(
1562 task_name_t task_name
)
1564 return(task_deallocate((task_t
)task_name
));
1568 * task_inspect_deallocate:
1570 * Drop a task inspection reference.
1573 task_inspect_deallocate(
1574 task_inspect_t task_inspect
)
1576 return(task_deallocate((task_t
)task_inspect
));
1580 * task_suspension_token_deallocate:
1582 * Drop a reference on a task suspension token.
1585 task_suspension_token_deallocate(
1586 task_suspension_token_t token
)
1588 return(task_deallocate((task_t
)token
));
1593 * task_collect_crash_info:
1595 * collect crash info from bsd and mach based data
1598 task_collect_crash_info(
1601 struct label
*crash_label
,
1605 kern_return_t kr
= KERN_SUCCESS
;
1607 kcdata_descriptor_t crash_data
= NULL
;
1608 kcdata_descriptor_t crash_data_release
= NULL
;
1609 mach_msg_type_number_t size
= CORPSEINFO_ALLOCATION_SIZE
;
1610 mach_vm_offset_t crash_data_ptr
= 0;
1611 void *crash_data_kernel
= NULL
;
1612 void *crash_data_kernel_release
= NULL
;
1614 struct label
*label
, *free_label
;
1617 if (!corpses_enabled()) {
1618 return KERN_NOT_SUPPORTED
;
1622 free_label
= label
= mac_exc_create_label();
1627 assert(is_corpse_fork
|| task
->bsd_info
!= NULL
);
1628 if (task
->corpse_info
== NULL
&& (is_corpse_fork
|| task
->bsd_info
!= NULL
)) {
1630 /* Set the crash label, used by the exception delivery mac hook */
1631 free_label
= task
->crash_label
; // Most likely NULL.
1632 task
->crash_label
= label
;
1633 mac_exc_update_task_crash_label(task
, crash_label
);
1637 crash_data_kernel
= (void *) kalloc(CORPSEINFO_ALLOCATION_SIZE
);
1638 if (crash_data_kernel
== NULL
) {
1639 kr
= KERN_RESOURCE_SHORTAGE
;
1642 bzero(crash_data_kernel
, CORPSEINFO_ALLOCATION_SIZE
);
1643 crash_data_ptr
= (mach_vm_offset_t
) crash_data_kernel
;
1645 /* Do not get a corpse ref for corpse fork */
1646 crash_data
= task_crashinfo_alloc_init((mach_vm_address_t
)crash_data_ptr
, size
,
1647 is_corpse_fork
? 0 : CORPSE_CRASHINFO_HAS_REF
,
1648 KCFLAG_USE_MEMCOPY
);
1651 crash_data_release
= task
->corpse_info
;
1652 crash_data_kernel_release
= kcdata_memory_get_begin_addr(crash_data_release
);
1653 task
->corpse_info
= crash_data
;
1658 kfree(crash_data_kernel
, CORPSEINFO_ALLOCATION_SIZE
);
1662 if (crash_data_release
!= NULL
) {
1663 task_crashinfo_destroy(crash_data_release
);
1665 if (crash_data_kernel_release
!= NULL
) {
1666 kfree(crash_data_kernel_release
, CORPSEINFO_ALLOCATION_SIZE
);
1674 if (free_label
!= NULL
) {
1675 mac_exc_free_label(free_label
);
1682 * task_deliver_crash_notification:
1684 * Makes outcall to registered host port for a corpse.
1687 task_deliver_crash_notification(
1690 exception_type_t etype
,
1691 mach_exception_subcode_t subcode
)
1693 kcdata_descriptor_t crash_info
= task
->corpse_info
;
1694 thread_t th_iter
= NULL
;
1695 kern_return_t kr
= KERN_SUCCESS
;
1696 wait_interrupt_t wsave
;
1697 mach_exception_data_type_t code
[EXCEPTION_CODE_MAX
];
1698 ipc_port_t task_port
, old_notify
;
1700 if (crash_info
== NULL
)
1701 return KERN_FAILURE
;
1704 if (task_is_a_corpse_fork(task
)) {
1705 /* Populate code with EXC_{RESOURCE,GUARD} for corpse fork */
1709 /* Populate code with EXC_CRASH for corpses */
1710 code
[0] = EXC_CRASH
;
1712 /* Update the code[1] if the boot-arg corpse_for_fatal_memkill is set */
1713 if (corpse_for_fatal_memkill
) {
1718 queue_iterate(&task
->threads
, th_iter
, thread_t
, task_threads
)
1720 if (th_iter
->corpse_dup
== FALSE
) {
1721 ipc_thread_reset(th_iter
);
1726 /* Arm the no-sender notification for taskport */
1727 task_reference(task
);
1728 task_port
= convert_task_to_port(task
);
1730 assert(ip_active(task_port
));
1731 ipc_port_nsrequest(task_port
, task_port
->ip_mscount
, ipc_port_make_sonce_locked(task_port
), &old_notify
);
1733 assert(IP_NULL
== old_notify
);
1735 wsave
= thread_interrupt_level(THREAD_UNINT
);
1736 kr
= exception_triage_thread(EXC_CORPSE_NOTIFY
, code
, EXCEPTION_CODE_MAX
, thread
);
1737 if (kr
!= KERN_SUCCESS
) {
1738 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr
, task_pid(task
));
1741 (void)thread_interrupt_level(wsave
);
1744 * Drop the send right on task port, will fire the
1745 * no-sender notification if exception deliver failed.
1747 ipc_port_release_send(task_port
);
1754 * Terminate the specified task. See comments on thread_terminate
1755 * (kern/thread.c) about problems with terminating the "current task."
1762 if (task
== TASK_NULL
)
1763 return (KERN_INVALID_ARGUMENT
);
1766 return (KERN_FAILURE
);
1768 return (task_terminate_internal(task
));
1772 extern int proc_pid(struct proc
*);
1773 extern void proc_name_kdp(task_t t
, char *buf
, int size
);
1774 #endif /* MACH_ASSERT */
1776 #define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
1778 __unused
task_partial_reap(task_t task
, __unused
int pid
)
1780 unsigned int reclaimed_resident
= 0;
1781 unsigned int reclaimed_compressed
= 0;
1782 uint64_t task_page_count
;
1784 task_page_count
= (get_task_phys_footprint(task
) / PAGE_SIZE_64
);
1786 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, VM_MAP_PARTIAL_REAP
) | DBG_FUNC_START
),
1787 pid
, task_page_count
, 0, 0, 0);
1789 vm_map_partial_reap(task
->map
, &reclaimed_resident
, &reclaimed_compressed
);
1791 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, VM_MAP_PARTIAL_REAP
) | DBG_FUNC_END
),
1792 pid
, reclaimed_resident
, reclaimed_compressed
, 0, 0);
1796 task_mark_corpse(task_t task
)
1798 kern_return_t kr
= KERN_SUCCESS
;
1799 thread_t self_thread
;
1801 wait_interrupt_t wsave
;
1803 struct label
*crash_label
= NULL
;
1806 assert(task
!= kernel_task
);
1807 assert(task
== current_task());
1808 assert(!task_is_a_corpse(task
));
1811 crash_label
= mac_exc_create_label_for_proc((struct proc
*)task
->bsd_info
);
1814 kr
= task_collect_crash_info(task
,
1819 if (kr
!= KERN_SUCCESS
) {
1823 self_thread
= current_thread();
1825 wsave
= thread_interrupt_level(THREAD_UNINT
);
1828 task_set_corpse_pending_report(task
);
1829 task_set_corpse(task
);
1830 task
->crashed_thread_id
= thread_tid(self_thread
);
1832 kr
= task_start_halt_locked(task
, TRUE
);
1833 assert(kr
== KERN_SUCCESS
);
1835 ipc_task_reset(task
);
1836 /* Remove the naked send right for task port, needed to arm no sender notification */
1837 task_set_special_port(task
, TASK_KERNEL_PORT
, IPC_PORT_NULL
);
1838 ipc_task_enable(task
);
1841 /* terminate the ipc space */
1842 ipc_space_terminate(task
->itk_space
);
1844 /* Add it to global corpse task list */
1845 task_add_to_corpse_task_list(task
);
1847 task_start_halt(task
);
1848 thread_terminate_internal(self_thread
);
1850 (void) thread_interrupt_level(wsave
);
1851 assert(task
->halting
== TRUE
);
1855 mac_exc_free_label(crash_label
);
1863 * Clears the corpse pending bit on task.
1864 * Removes inspection bit on the threads.
1867 task_clear_corpse(task_t task
)
1869 thread_t th_iter
= NULL
;
1872 queue_iterate(&task
->threads
, th_iter
, thread_t
, task_threads
)
1874 thread_mtx_lock(th_iter
);
1875 th_iter
->inspection
= FALSE
;
1876 thread_mtx_unlock(th_iter
);
1879 thread_terminate_crashed_threads();
1880 /* remove the pending corpse report flag */
1881 task_clear_corpse_pending_report(task
);
1889 * Called whenever the Mach port system detects no-senders on
1890 * the task port of a corpse.
1891 * Each notification that comes in should terminate the task (corpse).
1894 task_port_notify(mach_msg_header_t
*msg
)
1896 mach_no_senders_notification_t
*notification
= (void *)msg
;
1897 ipc_port_t port
= notification
->not_header
.msgh_remote_port
;
1900 assert(ip_active(port
));
1901 assert(IKOT_TASK
== ip_kotype(port
));
1902 task
= (task_t
) port
->ip_kobject
;
1904 assert(task_is_a_corpse(task
));
1906 /* Remove the task from global corpse task list */
1907 task_remove_from_corpse_task_list(task
);
1909 task_clear_corpse(task
);
1910 task_terminate_internal(task
);
1914 * task_wait_till_threads_terminate_locked
1916 * Wait till all the threads in the task are terminated.
1917 * Might release the task lock and re-acquire it.
1920 task_wait_till_threads_terminate_locked(task_t task
)
1922 /* wait for all the threads in the task to terminate */
1923 while (task
->active_thread_count
!= 0) {
1924 assert_wait((event_t
)&task
->active_thread_count
, THREAD_UNINT
);
1926 thread_block(THREAD_CONTINUE_NULL
);
1933 * task_duplicate_map_and_threads
1935 * Copy vmmap of source task.
1936 * Copy active threads from source task to destination task.
1937 * Source task would be suspended during the copy.
1940 task_duplicate_map_and_threads(
1944 thread_t
*thread_ret
,
1945 uint64_t **udata_buffer
,
1949 kern_return_t kr
= KERN_SUCCESS
;
1951 thread_t thread
, self
, thread_return
= THREAD_NULL
;
1952 thread_t new_thread
= THREAD_NULL
;
1953 thread_t
*thread_array
;
1954 uint32_t active_thread_count
= 0, array_count
= 0, i
;
1956 uint64_t *buffer
= NULL
;
1958 int est_knotes
= 0, num_knotes
= 0;
1960 self
= current_thread();
1963 * Suspend the task to copy thread state, use the internal
1964 * variant so that no user-space process can resume
1965 * the task from under us
1967 kr
= task_suspend_internal(task
);
1968 if (kr
!= KERN_SUCCESS
) {
1972 if (task
->map
->disable_vmentry_reuse
== TRUE
) {
1974 * Quite likely GuardMalloc (or some debugging tool)
1975 * is being used on this task. And it has gone through
1976 * its limit. Making a corpse will likely encounter
1977 * a lot of VM entries that will need COW.
1981 task_resume_internal(task
);
1982 return KERN_FAILURE
;
1985 /* Check with VM if vm_map_fork is allowed for this task */
1986 if (task_allowed_vm_map_fork(task
)) {
1988 /* Setup new task's vmmap, switch from parent task's map to it COW map */
1989 oldmap
= new_task
->map
;
1990 new_task
->map
= vm_map_fork(new_task
->ledger
,
1992 (VM_MAP_FORK_SHARE_IF_INHERIT_NONE
|
1993 VM_MAP_FORK_PRESERVE_PURGEABLE
));
1994 vm_map_deallocate(oldmap
);
1996 /* Get all the udata pointers from kqueue */
1997 est_knotes
= kevent_proc_copy_uptrs(p
, NULL
, 0);
1998 if (est_knotes
> 0) {
1999 buf_size
= (est_knotes
+ 32) * sizeof(uint64_t);
2000 buffer
= (uint64_t *) kalloc(buf_size
);
2001 num_knotes
= kevent_proc_copy_uptrs(p
, buffer
, buf_size
);
2002 if (num_knotes
> est_knotes
+ 32) {
2003 num_knotes
= est_knotes
+ 32;
2008 active_thread_count
= task
->active_thread_count
;
2009 if (active_thread_count
== 0) {
2010 if (buffer
!= NULL
) {
2011 kfree(buffer
, buf_size
);
2013 task_resume_internal(task
);
2014 return KERN_FAILURE
;
2017 thread_array
= (thread_t
*) kalloc(sizeof(thread_t
) * active_thread_count
);
2019 /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2021 queue_iterate(&task
->threads
, thread
, thread_t
, task_threads
) {
2022 /* Skip inactive threads */
2023 active
= thread
->active
;
2028 if (array_count
>= active_thread_count
) {
2032 thread_array
[array_count
++] = thread
;
2033 thread_reference(thread
);
2037 for (i
= 0; i
< array_count
; i
++) {
2039 kr
= thread_create_with_continuation(new_task
, &new_thread
, (thread_continue_t
)thread_corpse_continue
);
2040 if (kr
!= KERN_SUCCESS
) {
2044 /* Equivalent of current thread in corpse */
2045 if (thread_array
[i
] == self
) {
2046 thread_return
= new_thread
;
2047 new_task
->crashed_thread_id
= thread_tid(new_thread
);
2049 /* drop the extra ref returned by thread_create_with_continuation */
2050 thread_deallocate(new_thread
);
2053 kr
= thread_dup2(thread_array
[i
], new_thread
);
2054 if (kr
!= KERN_SUCCESS
) {
2055 thread_mtx_lock(new_thread
);
2056 new_thread
->corpse_dup
= TRUE
;
2057 thread_mtx_unlock(new_thread
);
2061 /* Copy thread name */
2062 bsd_copythreadname(new_thread
->uthread
, thread_array
[i
]->uthread
);
2063 thread_copy_resource_info(new_thread
, thread_array
[i
]);
2066 task_resume_internal(task
);
2068 for (i
= 0; i
< array_count
; i
++) {
2069 thread_deallocate(thread_array
[i
]);
2071 kfree(thread_array
, sizeof(thread_t
) * active_thread_count
);
2073 if (kr
== KERN_SUCCESS
) {
2074 *thread_ret
= thread_return
;
2075 *udata_buffer
= buffer
;
2077 *num_udata
= num_knotes
;
2079 if (thread_return
!= THREAD_NULL
) {
2080 thread_deallocate(thread_return
);
2082 if (buffer
!= NULL
) {
2083 kfree(buffer
, buf_size
);
2091 * Place holder function to be filled by VM to return
2092 * TRUE if vm_map_fork is allowed on the given task.
2095 task_allowed_vm_map_fork(task_t task __unused
)
2097 return memorystatus_allowed_vm_map_fork(task
);
2100 #if CONFIG_SECLUDED_MEMORY
2101 extern void task_set_can_use_secluded_mem_locked(
2103 boolean_t can_use_secluded_mem
);
2104 #endif /* CONFIG_SECLUDED_MEMORY */
2107 task_terminate_internal(
2110 thread_t thread
, self
;
2112 boolean_t interrupt_save
;
2115 assert(task
!= kernel_task
);
2117 self
= current_thread();
2118 self_task
= self
->task
;
2121 * Get the task locked and make sure that we are not racing
2122 * with someone else trying to terminate us.
2124 if (task
== self_task
)
2127 if (task
< self_task
) {
2129 task_lock(self_task
);
2132 task_lock(self_task
);
2136 #if CONFIG_SECLUDED_MEMORY
2137 if (task
->task_can_use_secluded_mem
) {
2138 task_set_can_use_secluded_mem_locked(task
, FALSE
);
2140 task
->task_could_use_secluded_mem
= FALSE
;
2141 task
->task_could_also_use_secluded_mem
= FALSE
;
2142 #endif /* CONFIG_SECLUDED_MEMORY */
2144 if (!task
->active
) {
2146 * Task is already being terminated.
2147 * Just return an error. If we are dying, this will
2148 * just get us to our AST special handler and that
2149 * will get us to finalize the termination of ourselves.
2152 if (self_task
!= task
)
2153 task_unlock(self_task
);
2155 return (KERN_FAILURE
);
2158 if (task_corpse_pending_report(task
)) {
2160 * Task is marked for reporting as corpse.
2161 * Just return an error. This will
2162 * just get us to our AST special handler and that
2163 * will get us to finish the path to death
2166 if (self_task
!= task
)
2167 task_unlock(self_task
);
2169 return (KERN_FAILURE
);
2172 if (self_task
!= task
)
2173 task_unlock(self_task
);
2176 * Make sure the current thread does not get aborted out of
2177 * the waits inside these operations.
2179 interrupt_save
= thread_interrupt_level(THREAD_UNINT
);
2182 * Indicate that we want all the threads to stop executing
2183 * at user space by holding the task (we would have held
2184 * each thread independently in thread_terminate_internal -
2185 * but this way we may be more likely to already find it
2186 * held there). Mark the task inactive, and prevent
2187 * further task operations via the task port.
2189 task_hold_locked(task
);
2190 task
->active
= FALSE
;
2191 ipc_task_disable(task
);
2193 #if CONFIG_TELEMETRY
2195 * Notify telemetry that this task is going away.
2197 telemetry_task_ctl_locked(task
, TF_TELEMETRY
, 0);
2201 * Terminate each thread in the task.
2203 queue_iterate(&task
->threads
, thread
, thread_t
, task_threads
) {
2204 thread_terminate_internal(thread
);
2208 if (task
->bsd_info
!= NULL
&& !task_is_exec_copy(task
)) {
2209 pid
= proc_pid(task
->bsd_info
);
2211 #endif /* MACH_BSD */
2215 proc_set_task_policy(task
, TASK_POLICY_ATTRIBUTE
,
2216 TASK_POLICY_TERMINATED
, TASK_POLICY_ENABLE
);
2218 /* Early object reap phase */
2220 // PR-17045188: Revisit implementation
2221 // task_partial_reap(task, pid);
2225 * remove all task watchers
2227 task_removewatchers(task
);
2229 #endif /* CONFIG_EMBEDDED */
2232 * Destroy all synchronizers owned by the task.
2234 task_synchronizer_destroy_all(task
);
2237 * Destroy the IPC space, leaving just a reference for it.
2239 ipc_space_terminate(task
->itk_space
);
2242 /* if some ledgers go negative on tear-down again... */
2243 ledger_disable_panic_on_negative(task
->map
->pmap
->ledger
,
2244 task_ledgers
.phys_footprint
);
2245 ledger_disable_panic_on_negative(task
->map
->pmap
->ledger
,
2246 task_ledgers
.internal
);
2247 ledger_disable_panic_on_negative(task
->map
->pmap
->ledger
,
2248 task_ledgers
.internal_compressed
);
2249 ledger_disable_panic_on_negative(task
->map
->pmap
->ledger
,
2250 task_ledgers
.iokit_mapped
);
2251 ledger_disable_panic_on_negative(task
->map
->pmap
->ledger
,
2252 task_ledgers
.alternate_accounting
);
2253 ledger_disable_panic_on_negative(task
->map
->pmap
->ledger
,
2254 task_ledgers
.alternate_accounting_compressed
);
2258 * If the current thread is a member of the task
2259 * being terminated, then the last reference to
2260 * the task will not be dropped until the thread
2261 * is finally reaped. To avoid incurring the
2262 * expense of removing the address space regions
2263 * at reap time, we do it explictly here.
2266 vm_map_lock(task
->map
);
2267 vm_map_disable_hole_optimization(task
->map
);
2268 vm_map_unlock(task
->map
);
2272 * Identify the pmap's process, in case the pmap ledgers drift
2273 * and we have to report it.
2276 if (task
->bsd_info
&& !task_is_exec_copy(task
)) {
2277 pid
= proc_pid(task
->bsd_info
);
2278 proc_name_kdp(task
, procname
, sizeof (procname
));
2281 strlcpy(procname
, "<unknown>", sizeof (procname
));
2283 pmap_set_process(task
->map
->pmap
, pid
, procname
);
2284 #endif /* MACH_ASSERT */
2286 vm_map_remove(task
->map
,
2287 task
->map
->min_offset
,
2288 task
->map
->max_offset
,
2292 * + remove immutable mappings
2294 (VM_MAP_REMOVE_NO_UNNESTING
|
2295 VM_MAP_REMOVE_IMMUTABLE
));
2297 /* release our shared region */
2298 vm_shared_region_set(task
, NULL
);
2301 lck_mtx_lock(&tasks_threads_lock
);
2302 queue_remove(&tasks
, task
, task_t
, tasks
);
2303 queue_enter(&terminated_tasks
, task
, task_t
, tasks
);
2305 terminated_tasks_count
++;
2306 lck_mtx_unlock(&tasks_threads_lock
);
2309 * We no longer need to guard against being aborted, so restore
2310 * the previous interruptible state.
2312 thread_interrupt_level(interrupt_save
);
2315 /* force the task to release all ctrs */
2316 if (task
->t_chud
& TASK_KPC_FORCED_ALL_CTRS
)
2317 kpc_force_all_ctrs(task
, 0);
2320 #if CONFIG_COALITIONS
2322 * Leave our coalitions. (drop activation but not reference)
2324 coalitions_remove_task(task
);
2328 * Get rid of the task active reference on itself.
2330 task_deallocate(task
);
2332 return (KERN_SUCCESS
);
2336 tasks_system_suspend(boolean_t suspend
)
2340 lck_mtx_lock(&tasks_threads_lock
);
2341 assert(tasks_suspend_state
!= suspend
);
2342 tasks_suspend_state
= suspend
;
2343 queue_iterate(&tasks
, task
, task_t
, tasks
) {
2344 if (task
== kernel_task
) {
2347 suspend
? task_suspend_internal(task
) : task_resume_internal(task
);
2349 lck_mtx_unlock(&tasks_threads_lock
);
2355 * Shut the current task down (except for the current thread) in
2356 * preparation for dramatic changes to the task (probably exec).
2357 * We hold the task and mark all other threads in the task for
2361 task_start_halt(task_t task
)
2363 kern_return_t kr
= KERN_SUCCESS
;
2365 kr
= task_start_halt_locked(task
, FALSE
);
2370 static kern_return_t
2371 task_start_halt_locked(task_t task
, boolean_t should_mark_corpse
)
2373 thread_t thread
, self
;
2374 uint64_t dispatchqueue_offset
;
2376 assert(task
!= kernel_task
);
2378 self
= current_thread();
2380 if (task
!= self
->task
&& !task_is_a_corpse_fork(task
))
2381 return (KERN_INVALID_ARGUMENT
);
2383 if (task
->halting
|| !task
->active
|| !self
->active
) {
2385 * Task or current thread is already being terminated.
2386 * Hurry up and return out of the current kernel context
2387 * so that we run our AST special handler to terminate
2390 return (KERN_FAILURE
);
2393 task
->halting
= TRUE
;
2396 * Mark all the threads to keep them from starting any more
2397 * user-level execution. The thread_terminate_internal code
2398 * would do this on a thread by thread basis anyway, but this
2399 * gives us a better chance of not having to wait there.
2401 task_hold_locked(task
);
2402 dispatchqueue_offset
= get_dispatchqueue_offset_from_proc(task
->bsd_info
);
2405 * Terminate all the other threads in the task.
2407 queue_iterate(&task
->threads
, thread
, thread_t
, task_threads
)
2409 if (should_mark_corpse
) {
2410 thread_mtx_lock(thread
);
2411 thread
->inspection
= TRUE
;
2412 thread_mtx_unlock(thread
);
2415 thread_terminate_internal(thread
);
2417 task
->dispatchqueue_offset
= dispatchqueue_offset
;
2419 task_release_locked(task
);
2421 return KERN_SUCCESS
;
2426 * task_complete_halt:
2428 * Complete task halt by waiting for threads to terminate, then clean
2429 * up task resources (VM, port namespace, etc...) and then let the
2430 * current thread go in the (practically empty) task context.
2432 * Note: task->halting flag is not cleared in order to avoid creation
2433 * of new thread in old exec'ed task.
2436 task_complete_halt(task_t task
)
2439 assert(task
->halting
);
2440 assert(task
== current_task());
2443 * Wait for the other threads to get shut down.
2444 * When the last other thread is reaped, we'll be
2447 if (task
->thread_count
> 1) {
2448 assert_wait((event_t
)&task
->halting
, THREAD_UNINT
);
2450 thread_block(THREAD_CONTINUE_NULL
);
2456 * Give the machine dependent code a chance
2457 * to perform cleanup of task-level resources
2458 * associated with the current thread before
2459 * ripping apart the task.
2461 machine_task_terminate(task
);
2464 * Destroy all synchronizers owned by the task.
2466 task_synchronizer_destroy_all(task
);
2469 * Destroy the contents of the IPC space, leaving just
2470 * a reference for it.
2472 ipc_space_clean(task
->itk_space
);
2475 * Clean out the address space, as we are going to be
2476 * getting a new one.
2478 vm_map_remove(task
->map
, task
->map
->min_offset
,
2479 task
->map
->max_offset
,
2483 * + remove immutable mappings
2485 (VM_MAP_REMOVE_NO_UNNESTING
|
2486 VM_MAP_REMOVE_IMMUTABLE
));
2489 * Kick out any IOKitUser handles to the task. At best they're stale,
2490 * at worst someone is racing a SUID exec.
2492 iokit_task_terminate(task
);
2498 * Suspend execution of the specified task.
2499 * This is a recursive-style suspension of the task, a count of
2500 * suspends is maintained.
2502 * CONDITIONS: the task is locked and active.
2510 assert(task
->active
);
2512 if (task
->suspend_count
++ > 0)
2516 * Iterate through all the threads and hold them.
2518 queue_iterate(&task
->threads
, thread
, thread_t
, task_threads
) {
2519 thread_mtx_lock(thread
);
2520 thread_hold(thread
);
2521 thread_mtx_unlock(thread
);
2528 * Same as the internal routine above, except that is must lock
2529 * and verify that the task is active. This differs from task_suspend
2530 * in that it places a kernel hold on the task rather than just a
2531 * user-level hold. This keeps users from over resuming and setting
2532 * it running out from under the kernel.
2534 * CONDITIONS: the caller holds a reference on the task
2540 if (task
== TASK_NULL
)
2541 return (KERN_INVALID_ARGUMENT
);
2545 if (!task
->active
) {
2548 return (KERN_FAILURE
);
2551 task_hold_locked(task
);
2554 return (KERN_SUCCESS
);
2560 boolean_t until_not_runnable
)
2562 if (task
== TASK_NULL
)
2563 return (KERN_INVALID_ARGUMENT
);
2567 if (!task
->active
) {
2570 return (KERN_FAILURE
);
2573 task_wait_locked(task
, until_not_runnable
);
2576 return (KERN_SUCCESS
);
2582 * Wait for all threads in task to stop.
2585 * Called with task locked, active, and held.
2590 boolean_t until_not_runnable
)
2592 thread_t thread
, self
;
2594 assert(task
->active
);
2595 assert(task
->suspend_count
> 0);
2597 self
= current_thread();
2600 * Iterate through all the threads and wait for them to
2601 * stop. Do not wait for the current thread if it is within
2604 queue_iterate(&task
->threads
, thread
, thread_t
, task_threads
) {
2606 thread_wait(thread
, until_not_runnable
);
2611 * task_release_locked:
2613 * Release a kernel hold on a task.
2615 * CONDITIONS: the task is locked and active
2618 task_release_locked(
2623 assert(task
->active
);
2624 assert(task
->suspend_count
> 0);
2626 if (--task
->suspend_count
> 0)
2629 queue_iterate(&task
->threads
, thread
, thread_t
, task_threads
) {
2630 thread_mtx_lock(thread
);
2631 thread_release(thread
);
2632 thread_mtx_unlock(thread
);
2639 * Same as the internal routine above, except that it must lock
2640 * and verify that the task is active.
2642 * CONDITIONS: The caller holds a reference to the task
2648 if (task
== TASK_NULL
)
2649 return (KERN_INVALID_ARGUMENT
);
2653 if (!task
->active
) {
2656 return (KERN_FAILURE
);
2659 task_release_locked(task
);
2662 return (KERN_SUCCESS
);
2668 thread_act_array_t
*threads_out
,
2669 mach_msg_type_number_t
*count
)
2671 mach_msg_type_number_t actual
;
2672 thread_t
*thread_list
;
2674 vm_size_t size
, size_needed
;
2678 if (task
== TASK_NULL
)
2679 return (KERN_INVALID_ARGUMENT
);
2681 size
= 0; addr
= NULL
;
2685 if (!task
->active
) {
2691 return (KERN_FAILURE
);
2694 actual
= task
->thread_count
;
2696 /* do we have the memory we need? */
2697 size_needed
= actual
* sizeof (mach_port_t
);
2698 if (size_needed
<= size
)
2701 /* unlock the task and allocate more memory */
2707 assert(size_needed
> 0);
2710 addr
= kalloc(size
);
2712 return (KERN_RESOURCE_SHORTAGE
);
2715 /* OK, have memory and the task is locked & active */
2716 thread_list
= (thread_t
*)addr
;
2720 for (thread
= (thread_t
)queue_first(&task
->threads
); i
< actual
;
2721 ++i
, thread
= (thread_t
)queue_next(&thread
->task_threads
)) {
2722 thread_reference_internal(thread
);
2723 thread_list
[j
++] = thread
;
2726 assert(queue_end(&task
->threads
, (queue_entry_t
)thread
));
2729 size_needed
= actual
* sizeof (mach_port_t
);
2731 /* can unlock task now that we've got the thread refs */
2735 /* no threads, so return null pointer and deallocate memory */
2737 *threads_out
= NULL
;
2744 /* if we allocated too much, must copy */
2746 if (size_needed
< size
) {
2749 newaddr
= kalloc(size_needed
);
2751 for (i
= 0; i
< actual
; ++i
)
2752 thread_deallocate(thread_list
[i
]);
2754 return (KERN_RESOURCE_SHORTAGE
);
2757 bcopy(addr
, newaddr
, size_needed
);
2759 thread_list
= (thread_t
*)newaddr
;
2762 *threads_out
= thread_list
;
2765 /* do the conversion that Mig should handle */
2767 for (i
= 0; i
< actual
; ++i
)
2768 ((ipc_port_t
*) thread_list
)[i
] = convert_thread_to_port(thread_list
[i
]);
2771 return (KERN_SUCCESS
);
2774 #define TASK_HOLD_NORMAL 0
2775 #define TASK_HOLD_PIDSUSPEND 1
2776 #define TASK_HOLD_LEGACY 2
2777 #define TASK_HOLD_LEGACY_ALL 3
2779 static kern_return_t
2784 if (!task
->active
&& !task_is_a_corpse(task
)) {
2785 return (KERN_FAILURE
);
2788 /* Return success for corpse task */
2789 if (task_is_a_corpse(task
)) {
2790 return KERN_SUCCESS
;
2793 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
2794 MACHDBG_CODE(DBG_MACH_IPC
,MACH_TASK_SUSPEND
) | DBG_FUNC_NONE
,
2795 task_pid(task
), ((thread_t
)queue_first(&task
->threads
))->thread_id
,
2796 task
->user_stop_count
, task
->user_stop_count
+ 1, 0);
2799 current_task()->suspends_outstanding
++;
2802 if (mode
== TASK_HOLD_LEGACY
)
2803 task
->legacy_stop_count
++;
2805 if (task
->user_stop_count
++ > 0) {
2807 * If the stop count was positive, the task is
2808 * already stopped and we can exit.
2810 return (KERN_SUCCESS
);
2814 * Put a kernel-level hold on the threads in the task (all
2815 * user-level task suspensions added together represent a
2816 * single kernel-level hold). We then wait for the threads
2817 * to stop executing user code.
2819 task_hold_locked(task
);
2820 task_wait_locked(task
, FALSE
);
2822 return (KERN_SUCCESS
);
2825 static kern_return_t
2830 boolean_t release
= FALSE
;
2832 if (!task
->active
&& !task_is_a_corpse(task
)) {
2833 return (KERN_FAILURE
);
2836 /* Return success for corpse task */
2837 if (task_is_a_corpse(task
)) {
2838 return KERN_SUCCESS
;
2841 if (mode
== TASK_HOLD_PIDSUSPEND
) {
2842 if (task
->pidsuspended
== FALSE
) {
2843 return (KERN_FAILURE
);
2845 task
->pidsuspended
= FALSE
;
2848 if (task
->user_stop_count
> (task
->pidsuspended
? 1 : 0)) {
2850 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
2851 MACHDBG_CODE(DBG_MACH_IPC
,MACH_TASK_RESUME
) | DBG_FUNC_NONE
,
2852 task_pid(task
), ((thread_t
)queue_first(&task
->threads
))->thread_id
,
2853 task
->user_stop_count
, mode
, task
->legacy_stop_count
);
2857 * This is obviously not robust; if we suspend one task and then resume a different one,
2858 * we'll fly under the radar. This is only meant to catch the common case of a crashed
2859 * or buggy suspender.
2861 current_task()->suspends_outstanding
--;
2864 if (mode
== TASK_HOLD_LEGACY_ALL
) {
2865 if (task
->legacy_stop_count
>= task
->user_stop_count
) {
2866 task
->user_stop_count
= 0;
2869 task
->user_stop_count
-= task
->legacy_stop_count
;
2871 task
->legacy_stop_count
= 0;
2873 if (mode
== TASK_HOLD_LEGACY
&& task
->legacy_stop_count
> 0)
2874 task
->legacy_stop_count
--;
2875 if (--task
->user_stop_count
== 0)
2880 return (KERN_FAILURE
);
2884 * Release the task if necessary.
2887 task_release_locked(task
);
2889 return (KERN_SUCCESS
);
2896 * Implement an (old-fashioned) user-level suspension on a task.
2898 * Because the user isn't expecting to have to manage a suspension
2899 * token, we'll track it for him in the kernel in the form of a naked
2900 * send right to the task's resume port. All such send rights
2901 * account for a single suspension against the task (unlike task_suspend2()
2902 * where each caller gets a unique suspension count represented by a
2903 * unique send-once right).
2906 * The caller holds a reference to the task
2913 mach_port_t port
, send
, old_notify
;
2914 mach_port_name_t name
;
2916 if (task
== TASK_NULL
|| task
== kernel_task
)
2917 return (KERN_INVALID_ARGUMENT
);
2922 * Claim a send right on the task resume port, and request a no-senders
2923 * notification on that port (if none outstanding).
2925 if (task
->itk_resume
== IP_NULL
) {
2926 task
->itk_resume
= ipc_port_alloc_kernel();
2927 if (!IP_VALID(task
->itk_resume
))
2928 panic("failed to create resume port");
2929 ipc_kobject_set(task
->itk_resume
, (ipc_kobject_t
)task
, IKOT_TASK_RESUME
);
2932 port
= task
->itk_resume
;
2934 assert(ip_active(port
));
2936 send
= ipc_port_make_send_locked(port
);
2937 assert(IP_VALID(send
));
2939 if (port
->ip_nsrequest
== IP_NULL
) {
2940 ipc_port_nsrequest(port
, port
->ip_mscount
, ipc_port_make_sonce_locked(port
), &old_notify
);
2941 assert(old_notify
== IP_NULL
);
2948 * place a legacy hold on the task.
2950 kr
= place_task_hold(task
, TASK_HOLD_LEGACY
);
2951 if (kr
!= KERN_SUCCESS
) {
2953 ipc_port_release_send(send
);
2960 * Copyout the send right into the calling task's IPC space. It won't know it is there,
2961 * but we'll look it up when calling a traditional resume. Any IPC operations that
2962 * deallocate the send right will auto-release the suspension.
2964 if ((kr
= ipc_kmsg_copyout_object(current_task()->itk_space
, (ipc_object_t
)send
,
2965 MACH_MSG_TYPE_MOVE_SEND
, &name
)) != KERN_SUCCESS
) {
2966 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
2967 proc_name_address(current_task()->bsd_info
), proc_pid(current_task()->bsd_info
),
2968 task_pid(task
), kr
);
2977 * Release a user hold on a task.
2980 * The caller holds a reference to the task
2987 mach_port_name_t resume_port_name
;
2988 ipc_entry_t resume_port_entry
;
2989 ipc_space_t space
= current_task()->itk_space
;
2991 if (task
== TASK_NULL
|| task
== kernel_task
)
2992 return (KERN_INVALID_ARGUMENT
);
2994 /* release a legacy task hold */
2996 kr
= release_task_hold(task
, TASK_HOLD_LEGACY
);
2999 is_write_lock(space
);
3000 if (is_active(space
) && IP_VALID(task
->itk_resume
) &&
3001 ipc_hash_lookup(space
, (ipc_object_t
)task
->itk_resume
, &resume_port_name
, &resume_port_entry
) == TRUE
) {
3003 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
3004 * we are holding one less legacy hold on the task from this caller. If the release failed,
3005 * go ahead and drop all the rights, as someone either already released our holds or the task
3008 if (kr
== KERN_SUCCESS
)
3009 ipc_right_dealloc(space
, resume_port_name
, resume_port_entry
);
3011 ipc_right_destroy(space
, resume_port_name
, resume_port_entry
, FALSE
, 0);
3012 /* space unlocked */
3014 is_write_unlock(space
);
3015 if (kr
== KERN_SUCCESS
)
3016 printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
3017 proc_name_address(current_task()->bsd_info
), proc_pid(current_task()->bsd_info
),
3025 * Suspend the target task.
3026 * Making/holding a token/reference/port is the callers responsibility.
3029 task_suspend_internal(task_t task
)
3033 if (task
== TASK_NULL
|| task
== kernel_task
)
3034 return (KERN_INVALID_ARGUMENT
);
3037 kr
= place_task_hold(task
, TASK_HOLD_NORMAL
);
3043 * Suspend the target task, and return a suspension token. The token
3044 * represents a reference on the suspended task.
3049 task_suspension_token_t
*suspend_token
)
3053 kr
= task_suspend_internal(task
);
3054 if (kr
!= KERN_SUCCESS
) {
3055 *suspend_token
= TASK_NULL
;
3060 * Take a reference on the target task and return that to the caller
3061 * as a "suspension token," which can be converted into an SO right to
3062 * the now-suspended task's resume port.
3064 task_reference_internal(task
);
3065 *suspend_token
= task
;
3067 return (KERN_SUCCESS
);
3072 * (reference/token/port management is caller's responsibility).
3075 task_resume_internal(
3076 task_suspension_token_t task
)
3080 if (task
== TASK_NULL
|| task
== kernel_task
)
3081 return (KERN_INVALID_ARGUMENT
);
3084 kr
= release_task_hold(task
, TASK_HOLD_NORMAL
);
3090 * Resume the task using a suspension token. Consumes the token's ref.
3094 task_suspension_token_t task
)
3098 kr
= task_resume_internal(task
);
3099 task_suspension_token_deallocate(task
);
3105 task_suspension_notify(mach_msg_header_t
*request_header
)
3107 ipc_port_t port
= (ipc_port_t
) request_header
->msgh_remote_port
;
3108 task_t task
= convert_port_to_task_suspension_token(port
);
3109 mach_msg_type_number_t not_count
;
3111 if (task
== TASK_NULL
|| task
== kernel_task
)
3112 return TRUE
; /* nothing to do */
3114 switch (request_header
->msgh_id
) {
3116 case MACH_NOTIFY_SEND_ONCE
:
3117 /* release the hold held by this specific send-once right */
3119 release_task_hold(task
, TASK_HOLD_NORMAL
);
3123 case MACH_NOTIFY_NO_SENDERS
:
3124 not_count
= ((mach_no_senders_notification_t
*)request_header
)->not_count
;
3128 if (port
->ip_mscount
== not_count
) {
3130 /* release all the [remaining] outstanding legacy holds */
3131 assert(port
->ip_nsrequest
== IP_NULL
);
3133 release_task_hold(task
, TASK_HOLD_LEGACY_ALL
);
3136 } else if (port
->ip_nsrequest
== IP_NULL
) {
3137 ipc_port_t old_notify
;
3140 /* new send rights, re-arm notification at current make-send count */
3141 ipc_port_nsrequest(port
, port
->ip_mscount
, ipc_port_make_sonce_locked(port
), &old_notify
);
3142 assert(old_notify
== IP_NULL
);
3154 task_suspension_token_deallocate(task
); /* drop token reference */
3159 task_pidsuspend_locked(task_t task
)
3163 if (task
->pidsuspended
) {
3168 task
->pidsuspended
= TRUE
;
3170 kr
= place_task_hold(task
, TASK_HOLD_PIDSUSPEND
);
3171 if (kr
!= KERN_SUCCESS
) {
3172 task
->pidsuspended
= FALSE
;
3182 * Suspends a task by placing a hold on its threads.
3185 * The caller holds a reference to the task
3193 if (task
== TASK_NULL
|| task
== kernel_task
)
3194 return (KERN_INVALID_ARGUMENT
);
3198 kr
= task_pidsuspend_locked(task
);
3207 * Resumes a previously suspended task.
3210 * The caller holds a reference to the task
3218 if (task
== TASK_NULL
|| task
== kernel_task
)
3219 return (KERN_INVALID_ARGUMENT
);
3225 while (task
->changing_freeze_state
) {
3227 assert_wait((event_t
)&task
->changing_freeze_state
, THREAD_UNINT
);
3229 thread_block(THREAD_CONTINUE_NULL
);
3233 task
->changing_freeze_state
= TRUE
;
3236 kr
= release_task_hold(task
, TASK_HOLD_PIDSUSPEND
);
3244 if (kr
== KERN_SUCCESS
)
3245 task
->frozen
= FALSE
;
3246 task
->changing_freeze_state
= FALSE
;
3247 thread_wakeup(&task
->changing_freeze_state
);
3256 #if DEVELOPMENT || DEBUG
3258 extern void IOSleep(int);
3261 task_disconnect_page_mappings(task_t task
)
3265 if (task
== TASK_NULL
|| task
== kernel_task
)
3266 return (KERN_INVALID_ARGUMENT
);
3269 * this function is used to strip all of the mappings from
3270 * the pmap for the specified task to force the task to
3271 * re-fault all of the pages it is actively using... this
3272 * allows us to approximate the true working set of the
3273 * specified task. We only engage if at least 1 of the
3274 * threads in the task is runnable, but we want to continuously
3275 * sweep (at least for a while - I've arbitrarily set the limit at
3276 * 100 sweeps to be re-looked at as we gain experience) to get a better
3277 * view into what areas within a page are being visited (as opposed to only
3278 * seeing the first fault of a page after the task becomes
3279 * runnable)... in the future I may
3280 * try to block until awakened by a thread in this task
3281 * being made runnable, but for now we'll periodically poll from the
3282 * user level debug tool driving the sysctl
3284 for (n
= 0; n
< 100; n
++) {
3287 boolean_t do_unnest
;
3295 queue_iterate(&task
->threads
, thread
, thread_t
, task_threads
) {
3297 if (thread
->state
& TH_RUN
) {
3303 task
->task_disconnected_count
++;
3305 if (task
->task_unnested
== FALSE
) {
3306 if (runnable
== TRUE
) {
3307 task
->task_unnested
= TRUE
;
3313 if (runnable
== FALSE
)
3316 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
, (MACHDBG_CODE(DBG_MACH_WORKINGSET
, VM_DISCONNECT_TASK_PAGE_MAPPINGS
)) | DBG_FUNC_START
,
3317 task
, do_unnest
, task
->task_disconnected_count
, 0, 0);
3319 page_count
= vm_map_disconnect_page_mappings(task
->map
, do_unnest
);
3321 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
, (MACHDBG_CODE(DBG_MACH_WORKINGSET
, VM_DISCONNECT_TASK_PAGE_MAPPINGS
)) | DBG_FUNC_END
,
3322 task
, page_count
, 0, 0, 0);
3327 return (KERN_SUCCESS
);
3341 * The caller holds a reference to the task
3343 extern void vm_wake_compactor_swapper(void);
3344 extern queue_head_t c_swapout_list_head
;
3349 uint32_t *purgeable_count
,
3350 uint32_t *wired_count
,
3351 uint32_t *clean_count
,
3352 uint32_t *dirty_count
,
3353 uint32_t dirty_budget
,
3355 boolean_t walk_only
)
3357 kern_return_t kr
= KERN_SUCCESS
;
3359 if (task
== TASK_NULL
|| task
== kernel_task
)
3360 return (KERN_INVALID_ARGUMENT
);
3364 while (task
->changing_freeze_state
) {
3366 assert_wait((event_t
)&task
->changing_freeze_state
, THREAD_UNINT
);
3368 thread_block(THREAD_CONTINUE_NULL
);
3374 return (KERN_FAILURE
);
3376 task
->changing_freeze_state
= TRUE
;
3381 panic("task_freeze - walk_only == TRUE");
3383 kr
= vm_map_freeze(task
->map
, purgeable_count
, wired_count
, clean_count
, dirty_count
, dirty_budget
, shared
);
3388 if (walk_only
== FALSE
&& kr
== KERN_SUCCESS
)
3389 task
->frozen
= TRUE
;
3390 task
->changing_freeze_state
= FALSE
;
3391 thread_wakeup(&task
->changing_freeze_state
);
3395 if (VM_CONFIG_COMPRESSOR_IS_PRESENT
) {
3396 vm_wake_compactor_swapper();
3398 * We do an explicit wakeup of the swapout thread here
3399 * because the compact_and_swap routines don't have
3400 * knowledge about these kind of "per-task packed c_segs"
3401 * and so will not be evaluating whether we need to do
3404 thread_wakeup((event_t
)&c_swapout_list_head
);
3413 * Thaw a currently frozen task.
3416 * The caller holds a reference to the task
3422 if (task
== TASK_NULL
|| task
== kernel_task
)
3423 return (KERN_INVALID_ARGUMENT
);
3427 while (task
->changing_freeze_state
) {
3429 assert_wait((event_t
)&task
->changing_freeze_state
, THREAD_UNINT
);
3431 thread_block(THREAD_CONTINUE_NULL
);
3435 if (!task
->frozen
) {
3437 return (KERN_FAILURE
);
3439 task
->frozen
= FALSE
;
3443 return (KERN_SUCCESS
);
3446 #endif /* CONFIG_FREEZE */
3449 host_security_set_task_token(
3450 host_security_t host_security
,
3452 security_token_t sec_token
,
3453 audit_token_t audit_token
,
3454 host_priv_t host_priv
)
3456 ipc_port_t host_port
;
3459 if (task
== TASK_NULL
)
3460 return(KERN_INVALID_ARGUMENT
);
3462 if (host_security
== HOST_NULL
)
3463 return(KERN_INVALID_SECURITY
);
3466 task
->sec_token
= sec_token
;
3467 task
->audit_token
= audit_token
;
3471 if (host_priv
!= HOST_PRIV_NULL
) {
3472 kr
= host_get_host_priv_port(host_priv
, &host_port
);
3474 kr
= host_get_host_port(host_priv_self(), &host_port
);
3476 assert(kr
== KERN_SUCCESS
);
3477 kr
= task_set_special_port(task
, TASK_HOST_PORT
, host_port
);
3482 task_send_trace_memory(
3484 __unused
uint32_t pid
,
3485 __unused
uint64_t uniqueid
)
3487 kern_return_t kr
= KERN_INVALID_ARGUMENT
;
3488 if (target_task
== TASK_NULL
)
3489 return (KERN_INVALID_ARGUMENT
);
3492 kr
= atm_send_proc_inspect_notification(target_task
,
3500 * This routine was added, pretty much exclusively, for registering the
3501 * RPC glue vector for in-kernel short circuited tasks. Rather than
3502 * removing it completely, I have only disabled that feature (which was
3503 * the only feature at the time). It just appears that we are going to
3504 * want to add some user data to tasks in the future (i.e. bsd info,
3505 * task names, etc...), so I left it in the formal task interface.
3510 task_flavor_t flavor
,
3511 __unused task_info_t task_info_in
, /* pointer to IN array */
3512 __unused mach_msg_type_number_t task_info_count
)
3514 if (task
== TASK_NULL
)
3515 return(KERN_INVALID_ARGUMENT
);
3520 case TASK_TRACE_MEMORY_INFO
:
3522 if (task_info_count
!= TASK_TRACE_MEMORY_INFO_COUNT
)
3523 return (KERN_INVALID_ARGUMENT
);
3525 assert(task_info_in
!= NULL
);
3526 task_trace_memory_info_t mem_info
;
3527 mem_info
= (task_trace_memory_info_t
) task_info_in
;
3528 kern_return_t kr
= atm_register_trace_memory(task
,
3529 mem_info
->user_memory_address
,
3530 mem_info
->buffer_size
);
3536 return (KERN_INVALID_ARGUMENT
);
3538 return (KERN_SUCCESS
);
3541 int radar_20146450
= 1;
3545 task_flavor_t flavor
,
3546 task_info_t task_info_out
,
3547 mach_msg_type_number_t
*task_info_count
)
3549 kern_return_t error
= KERN_SUCCESS
;
3550 mach_msg_type_number_t original_task_info_count
;
3552 if (task
== TASK_NULL
)
3553 return (KERN_INVALID_ARGUMENT
);
3555 original_task_info_count
= *task_info_count
;
3558 if ((task
!= current_task()) && (!task
->active
)) {
3560 return (KERN_INVALID_ARGUMENT
);
3565 case TASK_BASIC_INFO_32
:
3566 case TASK_BASIC2_INFO_32
:
3567 #if defined(__arm__) || defined(__arm64__)
3568 case TASK_BASIC_INFO_64
:
3571 task_basic_info_32_t basic_info
;
3576 if (*task_info_count
< TASK_BASIC_INFO_32_COUNT
) {
3577 error
= KERN_INVALID_ARGUMENT
;
3581 basic_info
= (task_basic_info_32_t
)task_info_out
;
3583 map
= (task
== kernel_task
)? kernel_map
: task
->map
;
3584 basic_info
->virtual_size
= (typeof(basic_info
->virtual_size
))map
->size
;
3585 if (flavor
== TASK_BASIC2_INFO_32
) {
3587 * The "BASIC2" flavor gets the maximum resident
3588 * size instead of the current resident size...
3590 basic_info
->resident_size
= pmap_resident_max(map
->pmap
);
3592 basic_info
->resident_size
= pmap_resident_count(map
->pmap
);
3594 basic_info
->resident_size
*= PAGE_SIZE
;
3596 basic_info
->policy
= ((task
!= kernel_task
)?
3597 POLICY_TIMESHARE
: POLICY_RR
);
3598 basic_info
->suspend_count
= task
->user_stop_count
;
3600 absolutetime_to_microtime(task
->total_user_time
, &secs
, &usecs
);
3601 basic_info
->user_time
.seconds
=
3602 (typeof(basic_info
->user_time
.seconds
))secs
;
3603 basic_info
->user_time
.microseconds
= usecs
;
3605 absolutetime_to_microtime(task
->total_system_time
, &secs
, &usecs
);
3606 basic_info
->system_time
.seconds
=
3607 (typeof(basic_info
->system_time
.seconds
))secs
;
3608 basic_info
->system_time
.microseconds
= usecs
;
3610 *task_info_count
= TASK_BASIC_INFO_32_COUNT
;
3614 #if defined(__arm__) || defined(__arm64__)
3615 case TASK_BASIC_INFO_64_2
:
3617 task_basic_info_64_2_t basic_info
;
3622 if (*task_info_count
< TASK_BASIC_INFO_64_2_COUNT
) {
3623 error
= KERN_INVALID_ARGUMENT
;
3627 basic_info
= (task_basic_info_64_2_t
)task_info_out
;
3629 map
= (task
== kernel_task
)? kernel_map
: task
->map
;
3630 basic_info
->virtual_size
= map
->size
;
3631 basic_info
->resident_size
=
3632 (mach_vm_size_t
)(pmap_resident_count(map
->pmap
))
3635 basic_info
->policy
= ((task
!= kernel_task
)?
3636 POLICY_TIMESHARE
: POLICY_RR
);
3637 basic_info
->suspend_count
= task
->user_stop_count
;
3639 absolutetime_to_microtime(task
->total_user_time
, &secs
, &usecs
);
3640 basic_info
->user_time
.seconds
=
3641 (typeof(basic_info
->user_time
.seconds
))secs
;
3642 basic_info
->user_time
.microseconds
= usecs
;
3644 absolutetime_to_microtime(task
->total_system_time
, &secs
, &usecs
);
3645 basic_info
->system_time
.seconds
=
3646 (typeof(basic_info
->system_time
.seconds
))secs
;
3647 basic_info
->system_time
.microseconds
= usecs
;
3649 *task_info_count
= TASK_BASIC_INFO_64_2_COUNT
;
3653 #else /* defined(__arm__) || defined(__arm64__) */
3654 case TASK_BASIC_INFO_64
:
3656 task_basic_info_64_t basic_info
;
3661 if (*task_info_count
< TASK_BASIC_INFO_64_COUNT
) {
3662 error
= KERN_INVALID_ARGUMENT
;
3666 basic_info
= (task_basic_info_64_t
)task_info_out
;
3668 map
= (task
== kernel_task
)? kernel_map
: task
->map
;
3669 basic_info
->virtual_size
= map
->size
;
3670 basic_info
->resident_size
=
3671 (mach_vm_size_t
)(pmap_resident_count(map
->pmap
))
3674 basic_info
->policy
= ((task
!= kernel_task
)?
3675 POLICY_TIMESHARE
: POLICY_RR
);
3676 basic_info
->suspend_count
= task
->user_stop_count
;
3678 absolutetime_to_microtime(task
->total_user_time
, &secs
, &usecs
);
3679 basic_info
->user_time
.seconds
=
3680 (typeof(basic_info
->user_time
.seconds
))secs
;
3681 basic_info
->user_time
.microseconds
= usecs
;
3683 absolutetime_to_microtime(task
->total_system_time
, &secs
, &usecs
);
3684 basic_info
->system_time
.seconds
=
3685 (typeof(basic_info
->system_time
.seconds
))secs
;
3686 basic_info
->system_time
.microseconds
= usecs
;
3688 *task_info_count
= TASK_BASIC_INFO_64_COUNT
;
3691 #endif /* defined(__arm__) || defined(__arm64__) */
3693 case MACH_TASK_BASIC_INFO
:
3695 mach_task_basic_info_t basic_info
;
3700 if (*task_info_count
< MACH_TASK_BASIC_INFO_COUNT
) {
3701 error
= KERN_INVALID_ARGUMENT
;
3705 basic_info
= (mach_task_basic_info_t
)task_info_out
;
3707 map
= (task
== kernel_task
) ? kernel_map
: task
->map
;
3709 basic_info
->virtual_size
= map
->size
;
3711 basic_info
->resident_size
=
3712 (mach_vm_size_t
)(pmap_resident_count(map
->pmap
));
3713 basic_info
->resident_size
*= PAGE_SIZE_64
;
3715 basic_info
->resident_size_max
=
3716 (mach_vm_size_t
)(pmap_resident_max(map
->pmap
));
3717 basic_info
->resident_size_max
*= PAGE_SIZE_64
;
3719 basic_info
->policy
= ((task
!= kernel_task
) ?
3720 POLICY_TIMESHARE
: POLICY_RR
);
3722 basic_info
->suspend_count
= task
->user_stop_count
;
3724 absolutetime_to_microtime(task
->total_user_time
, &secs
, &usecs
);
3725 basic_info
->user_time
.seconds
=
3726 (typeof(basic_info
->user_time
.seconds
))secs
;
3727 basic_info
->user_time
.microseconds
= usecs
;
3729 absolutetime_to_microtime(task
->total_system_time
, &secs
, &usecs
);
3730 basic_info
->system_time
.seconds
=
3731 (typeof(basic_info
->system_time
.seconds
))secs
;
3732 basic_info
->system_time
.microseconds
= usecs
;
3734 *task_info_count
= MACH_TASK_BASIC_INFO_COUNT
;
3738 case TASK_THREAD_TIMES_INFO
:
3740 task_thread_times_info_t times_info
;
3743 if (*task_info_count
< TASK_THREAD_TIMES_INFO_COUNT
) {
3744 error
= KERN_INVALID_ARGUMENT
;
3748 times_info
= (task_thread_times_info_t
) task_info_out
;
3749 times_info
->user_time
.seconds
= 0;
3750 times_info
->user_time
.microseconds
= 0;
3751 times_info
->system_time
.seconds
= 0;
3752 times_info
->system_time
.microseconds
= 0;
3755 queue_iterate(&task
->threads
, thread
, thread_t
, task_threads
) {
3756 time_value_t user_time
, system_time
;
3758 if (thread
->options
& TH_OPT_IDLE_THREAD
)
3761 thread_read_times(thread
, &user_time
, &system_time
);
3763 time_value_add(×_info
->user_time
, &user_time
);
3764 time_value_add(×_info
->system_time
, &system_time
);
3767 *task_info_count
= TASK_THREAD_TIMES_INFO_COUNT
;
3771 case TASK_ABSOLUTETIME_INFO
:
3773 task_absolutetime_info_t info
;
3776 if (*task_info_count
< TASK_ABSOLUTETIME_INFO_COUNT
) {
3777 error
= KERN_INVALID_ARGUMENT
;
3781 info
= (task_absolutetime_info_t
)task_info_out
;
3782 info
->threads_user
= info
->threads_system
= 0;
3785 info
->total_user
= task
->total_user_time
;
3786 info
->total_system
= task
->total_system_time
;
3788 queue_iterate(&task
->threads
, thread
, thread_t
, task_threads
) {
3792 if (thread
->options
& TH_OPT_IDLE_THREAD
)
3796 thread_lock(thread
);
3798 tval
= timer_grab(&thread
->user_timer
);
3799 info
->threads_user
+= tval
;
3800 info
->total_user
+= tval
;
3802 tval
= timer_grab(&thread
->system_timer
);
3803 if (thread
->precise_user_kernel_time
) {
3804 info
->threads_system
+= tval
;
3805 info
->total_system
+= tval
;
3807 /* system_timer may represent either sys or user */
3808 info
->threads_user
+= tval
;
3809 info
->total_user
+= tval
;
3812 thread_unlock(thread
);
3817 *task_info_count
= TASK_ABSOLUTETIME_INFO_COUNT
;
3821 case TASK_DYLD_INFO
:
3823 task_dyld_info_t info
;
3826 * We added the format field to TASK_DYLD_INFO output. For
3827 * temporary backward compatibility, accept the fact that
3828 * clients may ask for the old version - distinquished by the
3829 * size of the expected result structure.
3831 #define TASK_LEGACY_DYLD_INFO_COUNT \
3832 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
3834 if (*task_info_count
< TASK_LEGACY_DYLD_INFO_COUNT
) {
3835 error
= KERN_INVALID_ARGUMENT
;
3839 info
= (task_dyld_info_t
)task_info_out
;
3840 info
->all_image_info_addr
= task
->all_image_info_addr
;
3841 info
->all_image_info_size
= task
->all_image_info_size
;
3843 /* only set format on output for those expecting it */
3844 if (*task_info_count
>= TASK_DYLD_INFO_COUNT
) {
3845 info
->all_image_info_format
= task_has_64BitAddr(task
) ?
3846 TASK_DYLD_ALL_IMAGE_INFO_64
:
3847 TASK_DYLD_ALL_IMAGE_INFO_32
;
3848 *task_info_count
= TASK_DYLD_INFO_COUNT
;
3850 *task_info_count
= TASK_LEGACY_DYLD_INFO_COUNT
;
3855 case TASK_EXTMOD_INFO
:
3857 task_extmod_info_t info
;
3860 if (*task_info_count
< TASK_EXTMOD_INFO_COUNT
) {
3861 error
= KERN_INVALID_ARGUMENT
;
3865 info
= (task_extmod_info_t
)task_info_out
;
3867 p
= get_bsdtask_info(task
);
3869 proc_getexecutableuuid(p
, info
->task_uuid
, sizeof(info
->task_uuid
));
3871 bzero(info
->task_uuid
, sizeof(info
->task_uuid
));
3873 info
->extmod_statistics
= task
->extmod_statistics
;
3874 *task_info_count
= TASK_EXTMOD_INFO_COUNT
;
3879 case TASK_KERNELMEMORY_INFO
:
3881 task_kernelmemory_info_t tkm_info
;
3882 ledger_amount_t credit
, debit
;
3884 if (*task_info_count
< TASK_KERNELMEMORY_INFO_COUNT
) {
3885 error
= KERN_INVALID_ARGUMENT
;
3889 tkm_info
= (task_kernelmemory_info_t
) task_info_out
;
3890 tkm_info
->total_palloc
= 0;
3891 tkm_info
->total_pfree
= 0;
3892 tkm_info
->total_salloc
= 0;
3893 tkm_info
->total_sfree
= 0;
3895 if (task
== kernel_task
) {
3897 * All shared allocs/frees from other tasks count against
3898 * the kernel private memory usage. If we are looking up
3899 * info for the kernel task, gather from everywhere.
3903 /* start by accounting for all the terminated tasks against the kernel */
3904 tkm_info
->total_palloc
= tasks_tkm_private
.alloc
+ tasks_tkm_shared
.alloc
;
3905 tkm_info
->total_pfree
= tasks_tkm_private
.free
+ tasks_tkm_shared
.free
;
3907 /* count all other task/thread shared alloc/free against the kernel */
3908 lck_mtx_lock(&tasks_threads_lock
);
3910 /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
3911 queue_iterate(&tasks
, task
, task_t
, tasks
) {
3912 if (task
== kernel_task
) {
3913 if (ledger_get_entries(task
->ledger
,
3914 task_ledgers
.tkm_private
, &credit
,
3915 &debit
) == KERN_SUCCESS
) {
3916 tkm_info
->total_palloc
+= credit
;
3917 tkm_info
->total_pfree
+= debit
;
3920 if (!ledger_get_entries(task
->ledger
,
3921 task_ledgers
.tkm_shared
, &credit
, &debit
)) {
3922 tkm_info
->total_palloc
+= credit
;
3923 tkm_info
->total_pfree
+= debit
;
3926 lck_mtx_unlock(&tasks_threads_lock
);
3928 if (!ledger_get_entries(task
->ledger
,
3929 task_ledgers
.tkm_private
, &credit
, &debit
)) {
3930 tkm_info
->total_palloc
= credit
;
3931 tkm_info
->total_pfree
= debit
;
3933 if (!ledger_get_entries(task
->ledger
,
3934 task_ledgers
.tkm_shared
, &credit
, &debit
)) {
3935 tkm_info
->total_salloc
= credit
;
3936 tkm_info
->total_sfree
= debit
;
3941 *task_info_count
= TASK_KERNELMEMORY_INFO_COUNT
;
3942 return KERN_SUCCESS
;
3946 case TASK_SCHED_FIFO_INFO
:
3949 if (*task_info_count
< POLICY_FIFO_BASE_COUNT
) {
3950 error
= KERN_INVALID_ARGUMENT
;
3954 error
= KERN_INVALID_POLICY
;
3959 case TASK_SCHED_RR_INFO
:
3961 policy_rr_base_t rr_base
;
3962 uint32_t quantum_time
;
3963 uint64_t quantum_ns
;
3965 if (*task_info_count
< POLICY_RR_BASE_COUNT
) {
3966 error
= KERN_INVALID_ARGUMENT
;
3970 rr_base
= (policy_rr_base_t
) task_info_out
;
3972 if (task
!= kernel_task
) {
3973 error
= KERN_INVALID_POLICY
;
3977 rr_base
->base_priority
= task
->priority
;
3979 quantum_time
= SCHED(initial_quantum_size
)(THREAD_NULL
);
3980 absolutetime_to_nanoseconds(quantum_time
, &quantum_ns
);
3982 rr_base
->quantum
= (uint32_t)(quantum_ns
/ 1000 / 1000);
3984 *task_info_count
= POLICY_RR_BASE_COUNT
;
3989 case TASK_SCHED_TIMESHARE_INFO
:
3991 policy_timeshare_base_t ts_base
;
3993 if (*task_info_count
< POLICY_TIMESHARE_BASE_COUNT
) {
3994 error
= KERN_INVALID_ARGUMENT
;
3998 ts_base
= (policy_timeshare_base_t
) task_info_out
;
4000 if (task
== kernel_task
) {
4001 error
= KERN_INVALID_POLICY
;
4005 ts_base
->base_priority
= task
->priority
;
4007 *task_info_count
= POLICY_TIMESHARE_BASE_COUNT
;
4011 case TASK_SECURITY_TOKEN
:
4013 security_token_t
*sec_token_p
;
4015 if (*task_info_count
< TASK_SECURITY_TOKEN_COUNT
) {
4016 error
= KERN_INVALID_ARGUMENT
;
4020 sec_token_p
= (security_token_t
*) task_info_out
;
4022 *sec_token_p
= task
->sec_token
;
4024 *task_info_count
= TASK_SECURITY_TOKEN_COUNT
;
4028 case TASK_AUDIT_TOKEN
:
4030 audit_token_t
*audit_token_p
;
4032 if (*task_info_count
< TASK_AUDIT_TOKEN_COUNT
) {
4033 error
= KERN_INVALID_ARGUMENT
;
4037 audit_token_p
= (audit_token_t
*) task_info_out
;
4039 *audit_token_p
= task
->audit_token
;
4041 *task_info_count
= TASK_AUDIT_TOKEN_COUNT
;
4045 case TASK_SCHED_INFO
:
4046 error
= KERN_INVALID_ARGUMENT
;
4049 case TASK_EVENTS_INFO
:
4051 task_events_info_t events_info
;
4054 if (*task_info_count
< TASK_EVENTS_INFO_COUNT
) {
4055 error
= KERN_INVALID_ARGUMENT
;
4059 events_info
= (task_events_info_t
) task_info_out
;
4062 events_info
->faults
= task
->faults
;
4063 events_info
->pageins
= task
->pageins
;
4064 events_info
->cow_faults
= task
->cow_faults
;
4065 events_info
->messages_sent
= task
->messages_sent
;
4066 events_info
->messages_received
= task
->messages_received
;
4067 events_info
->syscalls_mach
= task
->syscalls_mach
;
4068 events_info
->syscalls_unix
= task
->syscalls_unix
;
4070 events_info
->csw
= task
->c_switch
;
4072 queue_iterate(&task
->threads
, thread
, thread_t
, task_threads
) {
4073 events_info
->csw
+= thread
->c_switch
;
4074 events_info
->syscalls_mach
+= thread
->syscalls_mach
;
4075 events_info
->syscalls_unix
+= thread
->syscalls_unix
;
4079 *task_info_count
= TASK_EVENTS_INFO_COUNT
;
4082 case TASK_AFFINITY_TAG_INFO
:
4084 if (*task_info_count
< TASK_AFFINITY_TAG_INFO_COUNT
) {
4085 error
= KERN_INVALID_ARGUMENT
;
4089 error
= task_affinity_info(task
, task_info_out
, task_info_count
);
4092 case TASK_POWER_INFO
:
4094 if (*task_info_count
< TASK_POWER_INFO_COUNT
) {
4095 error
= KERN_INVALID_ARGUMENT
;
4099 task_power_info_locked(task
, (task_power_info_t
)task_info_out
, NULL
, NULL
);
4103 case TASK_POWER_INFO_V2
:
4105 if (*task_info_count
< TASK_POWER_INFO_V2_COUNT_OLD
) {
4106 error
= KERN_INVALID_ARGUMENT
;
4109 task_power_info_v2_t tpiv2
= (task_power_info_v2_t
) task_info_out
;
4110 task_power_info_locked(task
, &tpiv2
->cpu_energy
, &tpiv2
->gpu_energy
, tpiv2
);
4115 case TASK_VM_INFO_PURGEABLE
:
4117 task_vm_info_t vm_info
;
4120 if (*task_info_count
< TASK_VM_INFO_REV0_COUNT
) {
4121 error
= KERN_INVALID_ARGUMENT
;
4125 vm_info
= (task_vm_info_t
)task_info_out
;
4127 if (task
== kernel_task
) {
4132 vm_map_lock_read(map
);
4135 vm_info
->virtual_size
= (typeof(vm_info
->virtual_size
))map
->size
;
4136 vm_info
->region_count
= map
->hdr
.nentries
;
4137 vm_info
->page_size
= vm_map_page_size(map
);
4139 vm_info
->resident_size
= pmap_resident_count(map
->pmap
);
4140 vm_info
->resident_size
*= PAGE_SIZE
;
4141 vm_info
->resident_size_peak
= pmap_resident_max(map
->pmap
);
4142 vm_info
->resident_size_peak
*= PAGE_SIZE
;
4144 #define _VM_INFO(_name) \
4145 vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
4148 _VM_INFO(device_peak
);
4150 _VM_INFO(external_peak
);
4152 _VM_INFO(internal_peak
);
4154 _VM_INFO(reusable_peak
);
4155 _VM_INFO(compressed
);
4156 _VM_INFO(compressed_peak
);
4157 _VM_INFO(compressed_lifetime
);
4159 vm_info
->purgeable_volatile_pmap
= 0;
4160 vm_info
->purgeable_volatile_resident
= 0;
4161 vm_info
->purgeable_volatile_virtual
= 0;
4162 if (task
== kernel_task
) {
4164 * We do not maintain the detailed stats for the
4165 * kernel_pmap, so just count everything as
4168 vm_info
->internal
= vm_info
->resident_size
;
4170 * ... but since the memory held by the VM compressor
4171 * in the kernel address space ought to be attributed
4172 * to user-space tasks, we subtract it from "internal"
4173 * to give memory reporting tools a more accurate idea
4174 * of what the kernel itself is actually using, instead
4175 * of making it look like the kernel is leaking memory
4176 * when the system is under memory pressure.
4178 vm_info
->internal
-= (VM_PAGE_COMPRESSOR_COUNT
*
4181 mach_vm_size_t volatile_virtual_size
;
4182 mach_vm_size_t volatile_resident_size
;
4183 mach_vm_size_t volatile_compressed_size
;
4184 mach_vm_size_t volatile_pmap_size
;
4185 mach_vm_size_t volatile_compressed_pmap_size
;
4188 if (flavor
== TASK_VM_INFO_PURGEABLE
) {
4189 kr
= vm_map_query_volatile(
4191 &volatile_virtual_size
,
4192 &volatile_resident_size
,
4193 &volatile_compressed_size
,
4194 &volatile_pmap_size
,
4195 &volatile_compressed_pmap_size
);
4196 if (kr
== KERN_SUCCESS
) {
4197 vm_info
->purgeable_volatile_pmap
=
4199 if (radar_20146450
) {
4200 vm_info
->compressed
-=
4201 volatile_compressed_pmap_size
;
4203 vm_info
->purgeable_volatile_resident
=
4204 volatile_resident_size
;
4205 vm_info
->purgeable_volatile_virtual
=
4206 volatile_virtual_size
;
4210 *task_info_count
= TASK_VM_INFO_REV0_COUNT
;
4212 if (original_task_info_count
>= TASK_VM_INFO_REV1_COUNT
) {
4213 vm_info
->phys_footprint
=
4214 (mach_vm_size_t
) get_task_phys_footprint(task
);
4215 *task_info_count
= TASK_VM_INFO_REV1_COUNT
;
4217 if (original_task_info_count
>= TASK_VM_INFO_REV2_COUNT
) {
4218 vm_info
->min_address
= map
->min_offset
;
4219 vm_info
->max_address
= map
->max_offset
;
4220 *task_info_count
= TASK_VM_INFO_REV2_COUNT
;
4223 if (task
!= kernel_task
) {
4224 vm_map_unlock_read(map
);
4230 case TASK_WAIT_STATE_INFO
:
4233 * Deprecated flavor. Currently allowing some results until all users
4234 * stop calling it. The results may not be accurate.
4236 task_wait_state_info_t wait_state_info
;
4237 uint64_t total_sfi_ledger_val
= 0;
4239 if (*task_info_count
< TASK_WAIT_STATE_INFO_COUNT
) {
4240 error
= KERN_INVALID_ARGUMENT
;
4244 wait_state_info
= (task_wait_state_info_t
) task_info_out
;
4246 wait_state_info
->total_wait_state_time
= 0;
4247 bzero(wait_state_info
->_reserved
, sizeof(wait_state_info
->_reserved
));
4249 #if CONFIG_SCHED_SFI
4250 int i
, prev_lentry
= -1;
4251 int64_t val_credit
, val_debit
;
4253 for (i
= 0; i
< MAX_SFI_CLASS_ID
; i
++){
4256 * checking with prev_lentry != entry ensures adjacent classes
4257 * which share the same ledger do not add wait times twice.
4258 * Note: Use ledger() call to get data for each individual sfi class.
4260 if (prev_lentry
!= task_ledgers
.sfi_wait_times
[i
] &&
4261 KERN_SUCCESS
== ledger_get_entries(task
->ledger
,
4262 task_ledgers
.sfi_wait_times
[i
], &val_credit
, &val_debit
)) {
4263 total_sfi_ledger_val
+= val_credit
;
4265 prev_lentry
= task_ledgers
.sfi_wait_times
[i
];
4268 #endif /* CONFIG_SCHED_SFI */
4269 wait_state_info
->total_wait_sfi_state_time
= total_sfi_ledger_val
;
4270 *task_info_count
= TASK_WAIT_STATE_INFO_COUNT
;
4274 case TASK_VM_INFO_PURGEABLE_ACCOUNT
:
4276 #if DEVELOPMENT || DEBUG
4277 pvm_account_info_t acnt_info
;
4279 if (*task_info_count
< PVM_ACCOUNT_INFO_COUNT
) {
4280 error
= KERN_INVALID_ARGUMENT
;
4284 if (task_info_out
== NULL
) {
4285 error
= KERN_INVALID_ARGUMENT
;
4289 acnt_info
= (pvm_account_info_t
) task_info_out
;
4291 error
= vm_purgeable_account(task
, acnt_info
);
4293 *task_info_count
= PVM_ACCOUNT_INFO_COUNT
;
4296 #else /* DEVELOPMENT || DEBUG */
4297 error
= KERN_NOT_SUPPORTED
;
4299 #endif /* DEVELOPMENT || DEBUG */
4301 case TASK_FLAGS_INFO
:
4303 task_flags_info_t flags_info
;
4305 if (*task_info_count
< TASK_FLAGS_INFO_COUNT
) {
4306 error
= KERN_INVALID_ARGUMENT
;
4310 flags_info
= (task_flags_info_t
)task_info_out
;
4312 /* only publish the 64-bit flag of the task */
4313 flags_info
->flags
= task
->t_flags
& TF_64B_ADDR
;
4315 *task_info_count
= TASK_FLAGS_INFO_COUNT
;
4319 case TASK_DEBUG_INFO_INTERNAL
:
4321 #if DEVELOPMENT || DEBUG
4322 task_debug_info_internal_t dbg_info
;
4323 if (*task_info_count
< TASK_DEBUG_INFO_INTERNAL_COUNT
) {
4324 error
= KERN_NOT_SUPPORTED
;
4328 if (task_info_out
== NULL
) {
4329 error
= KERN_INVALID_ARGUMENT
;
4332 dbg_info
= (task_debug_info_internal_t
) task_info_out
;
4333 dbg_info
->ipc_space_size
= 0;
4334 if (task
->itk_space
){
4335 dbg_info
->ipc_space_size
= task
->itk_space
->is_table_size
;
4338 error
= KERN_SUCCESS
;
4339 *task_info_count
= TASK_DEBUG_INFO_INTERNAL_COUNT
;
4341 #else /* DEVELOPMENT || DEBUG */
4342 error
= KERN_NOT_SUPPORTED
;
4344 #endif /* DEVELOPMENT || DEBUG */
4347 error
= KERN_INVALID_ARGUMENT
;
4355 * task_info_from_user
4357 * When calling task_info from user space,
4358 * this function will be executed as mig server side
4359 * instead of calling directly into task_info.
4360 * This gives the possibility to perform more security
4361 * checks on task_port.
4363 * In the case of TASK_DYLD_INFO, we require the more
4364 * privileged task_port not the less-privileged task_name_port.
4368 task_info_from_user(
4369 mach_port_t task_port
,
4370 task_flavor_t flavor
,
4371 task_info_t task_info_out
,
4372 mach_msg_type_number_t
*task_info_count
)
4377 if (flavor
== TASK_DYLD_INFO
)
4378 task
= convert_port_to_task(task_port
);
4380 task
= convert_port_to_task_name(task_port
);
4382 ret
= task_info(task
, flavor
, task_info_out
, task_info_count
);
4384 task_deallocate(task
);
4392 * Returns power stats for the task.
4393 * Note: Called with task locked.
4396 task_power_info_locked(
4398 task_power_info_t info
,
4399 gpu_energy_data_t ginfo
,
4400 task_power_info_v2_t infov2
)
4403 ledger_amount_t tmp
;
4405 task_lock_assert_owned(task
);
4407 ledger_get_entries(task
->ledger
, task_ledgers
.interrupt_wakeups
,
4408 (ledger_amount_t
*)&info
->task_interrupt_wakeups
, &tmp
);
4409 ledger_get_entries(task
->ledger
, task_ledgers
.platform_idle_wakeups
,
4410 (ledger_amount_t
*)&info
->task_platform_idle_wakeups
, &tmp
);
4412 info
->task_timer_wakeups_bin_1
= task
->task_timer_wakeups_bin_1
;
4413 info
->task_timer_wakeups_bin_2
= task
->task_timer_wakeups_bin_2
;
4415 info
->total_user
= task
->total_user_time
;
4416 info
->total_system
= task
->total_system_time
;
4420 infov2
->task_energy
= task
->task_energy
;
4425 ginfo
->task_gpu_utilisation
= task
->task_gpu_ns
;
4429 infov2
->task_ptime
= task
->total_ptime
;
4430 infov2
->task_pset_switches
= task
->ps_switch
;
4433 queue_iterate(&task
->threads
, thread
, thread_t
, task_threads
) {
4437 if (thread
->options
& TH_OPT_IDLE_THREAD
)
4441 thread_lock(thread
);
4443 info
->task_timer_wakeups_bin_1
+= thread
->thread_timer_wakeups_bin_1
;
4444 info
->task_timer_wakeups_bin_2
+= thread
->thread_timer_wakeups_bin_2
;
4448 infov2
->task_energy
+= ml_energy_stat(thread
);
4452 tval
= timer_grab(&thread
->user_timer
);
4453 info
->total_user
+= tval
;
4456 tval
= timer_grab(&thread
->ptime
);
4457 infov2
->task_ptime
+= tval
;
4458 infov2
->task_pset_switches
+= thread
->ps_switch
;
4461 tval
= timer_grab(&thread
->system_timer
);
4462 if (thread
->precise_user_kernel_time
) {
4463 info
->total_system
+= tval
;
4465 /* system_timer may represent either sys or user */
4466 info
->total_user
+= tval
;
4470 ginfo
->task_gpu_utilisation
+= ml_gpu_stat(thread
);
4472 thread_unlock(thread
);
4478 * task_gpu_utilisation
4480 * Returns the total gpu time used by the all the threads of the task
4481 * (both dead and alive)
4484 task_gpu_utilisation(
4487 uint64_t gpu_time
= 0;
4488 #if !CONFIG_EMBEDDED
4492 gpu_time
+= task
->task_gpu_ns
;
4494 queue_iterate(&task
->threads
, thread
, thread_t
, task_threads
) {
4497 thread_lock(thread
);
4498 gpu_time
+= ml_gpu_stat(thread
);
4499 thread_unlock(thread
);
4504 #else /* CONFIG_EMBEDDED */
4505 /* silence compiler warning */
4507 #endif /* !CONFIG_EMBEDDED */
4514 * Returns the total energy used by the all the threads of the task
4515 * (both dead and alive)
4521 uint64_t energy
= 0;
4525 energy
+= task
->task_energy
;
4527 queue_iterate(&task
->threads
, thread
, thread_t
, task_threads
) {
4530 thread_lock(thread
);
4531 energy
+= ml_energy_stat(thread
);
4532 thread_unlock(thread
);
4543 __unused task_t task
)
4552 task_purgable_info_t
*stats
)
4554 if (task
== TASK_NULL
|| stats
== NULL
)
4555 return KERN_INVALID_ARGUMENT
;
4556 /* Take task reference */
4557 task_reference(task
);
4558 vm_purgeable_stats((vm_purgeable_info_t
)stats
, task
);
4559 /* Drop task reference */
4560 task_deallocate(task
);
4561 return KERN_SUCCESS
;
4574 task
->vtimers
|= which
;
4578 case TASK_VTIMER_USER
:
4579 queue_iterate(&task
->threads
, thread
, thread_t
, task_threads
) {
4581 thread_lock(thread
);
4582 if (thread
->precise_user_kernel_time
)
4583 thread
->vtimer_user_save
= timer_grab(&thread
->user_timer
);
4585 thread
->vtimer_user_save
= timer_grab(&thread
->system_timer
);
4586 thread_unlock(thread
);
4591 case TASK_VTIMER_PROF
:
4592 queue_iterate(&task
->threads
, thread
, thread_t
, task_threads
) {
4594 thread_lock(thread
);
4595 thread
->vtimer_prof_save
= timer_grab(&thread
->user_timer
);
4596 thread
->vtimer_prof_save
+= timer_grab(&thread
->system_timer
);
4597 thread_unlock(thread
);
4602 case TASK_VTIMER_RLIM
:
4603 queue_iterate(&task
->threads
, thread
, thread_t
, task_threads
) {
4605 thread_lock(thread
);
4606 thread
->vtimer_rlim_save
= timer_grab(&thread
->user_timer
);
4607 thread
->vtimer_rlim_save
+= timer_grab(&thread
->system_timer
);
4608 thread_unlock(thread
);
4622 assert(task
== current_task());
4626 task
->vtimers
&= ~which
;
4636 uint32_t *microsecs
)
4638 thread_t thread
= current_thread();
4640 clock_sec_t secs
= 0;
4643 assert(task
== current_task());
4645 spl_t s
= splsched();
4646 thread_lock(thread
);
4648 if ((task
->vtimers
& which
) != (uint32_t)which
) {
4649 thread_unlock(thread
);
4656 case TASK_VTIMER_USER
:
4657 if (thread
->precise_user_kernel_time
) {
4658 tdelt
= (uint32_t)timer_delta(&thread
->user_timer
,
4659 &thread
->vtimer_user_save
);
4661 tdelt
= (uint32_t)timer_delta(&thread
->system_timer
,
4662 &thread
->vtimer_user_save
);
4664 absolutetime_to_microtime(tdelt
, &secs
, microsecs
);
4667 case TASK_VTIMER_PROF
:
4668 tsum
= timer_grab(&thread
->user_timer
);
4669 tsum
+= timer_grab(&thread
->system_timer
);
4670 tdelt
= (uint32_t)(tsum
- thread
->vtimer_prof_save
);
4671 absolutetime_to_microtime(tdelt
, &secs
, microsecs
);
4672 /* if the time delta is smaller than a usec, ignore */
4673 if (*microsecs
!= 0)
4674 thread
->vtimer_prof_save
= tsum
;
4677 case TASK_VTIMER_RLIM
:
4678 tsum
= timer_grab(&thread
->user_timer
);
4679 tsum
+= timer_grab(&thread
->system_timer
);
4680 tdelt
= (uint32_t)(tsum
- thread
->vtimer_rlim_save
);
4681 thread
->vtimer_rlim_save
= tsum
;
4682 absolutetime_to_microtime(tdelt
, &secs
, microsecs
);
4686 thread_unlock(thread
);
4693 * Change the assigned processor set for the task
4697 __unused task_t task
,
4698 __unused processor_set_t new_pset
,
4699 __unused boolean_t assign_threads
)
4701 return(KERN_FAILURE
);
4705 * task_assign_default:
4707 * Version of task_assign to assign to default processor set.
4710 task_assign_default(
4712 boolean_t assign_threads
)
4714 return (task_assign(task
, &pset0
, assign_threads
));
4718 * task_get_assignment
4720 * Return name of processor set that task is assigned to.
4723 task_get_assignment(
4725 processor_set_t
*pset
)
4727 if (!task
|| !task
->active
)
4728 return KERN_FAILURE
;
4732 return KERN_SUCCESS
;
4736 get_task_dispatchqueue_offset(
4739 return task
->dispatchqueue_offset
;
4745 * Set scheduling policy and parameters, both base and limit, for
4746 * the given task. Policy must be a policy which is enabled for the
4747 * processor set. Change contained threads if requested.
4751 __unused task_t task
,
4752 __unused policy_t policy_id
,
4753 __unused policy_base_t base
,
4754 __unused mach_msg_type_number_t count
,
4755 __unused boolean_t set_limit
,
4756 __unused boolean_t change
)
4758 return(KERN_FAILURE
);
4764 * Set scheduling policy and parameters, both base and limit, for
4765 * the given task. Policy can be any policy implemented by the
4766 * processor set, whether enabled or not. Change contained threads
4771 __unused task_t task
,
4772 __unused processor_set_t pset
,
4773 __unused policy_t policy_id
,
4774 __unused policy_base_t base
,
4775 __unused mach_msg_type_number_t base_count
,
4776 __unused policy_limit_t limit
,
4777 __unused mach_msg_type_number_t limit_count
,
4778 __unused boolean_t change
)
4780 return(KERN_FAILURE
);
4785 __unused task_t task
,
4786 __unused vm_offset_t pc
,
4787 __unused vm_offset_t endpc
)
4789 return KERN_FAILURE
;
4793 task_synchronizer_destroy_all(task_t task
)
4796 * Destroy owned semaphores
4798 semaphore_destroy_all(task
);
4802 * Install default (machine-dependent) initial thread state
4803 * on the task. Subsequent thread creation will have this initial
4804 * state set on the thread by machine_thread_inherit_taskwide().
4805 * Flavors and structures are exactly the same as those to thread_set_state()
4811 thread_state_t state
,
4812 mach_msg_type_number_t state_count
)
4816 if (task
== TASK_NULL
) {
4817 return (KERN_INVALID_ARGUMENT
);
4822 if (!task
->active
) {
4824 return (KERN_FAILURE
);
4827 ret
= machine_task_set_state(task
, flavor
, state
, state_count
);
4834 * Examine the default (machine-dependent) initial thread state
4835 * on the task, as set by task_set_state(). Flavors and structures
4836 * are exactly the same as those passed to thread_get_state().
4842 thread_state_t state
,
4843 mach_msg_type_number_t
*state_count
)
4847 if (task
== TASK_NULL
) {
4848 return (KERN_INVALID_ARGUMENT
);
4853 if (!task
->active
) {
4855 return (KERN_FAILURE
);
4858 ret
= machine_task_get_state(task
, flavor
, state
, state_count
);
4865 static kern_return_t
__attribute__((noinline
,not_tail_called
))
4866 PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(
4867 mach_exception_code_t code
,
4868 mach_exception_subcode_t subcode
,
4872 if (1 == proc_selfpid())
4873 return KERN_NOT_SUPPORTED
; // initproc is immune
4875 mach_exception_data_type_t codes
[EXCEPTION_CODE_MAX
] = {
4879 task_t task
= current_task();
4882 /* (See jetsam-related comments below) */
4884 proc_memstat_terminated(task
->bsd_info
, TRUE
);
4885 kr
= task_enqueue_exception_with_corpse(task
, EXC_GUARD
, codes
, 2, reason
);
4886 proc_memstat_terminated(task
->bsd_info
, FALSE
);
4890 extern kern_return_t
4891 task_violated_guard(mach_exception_code_t
, mach_exception_subcode_t
, void *);
4894 task_violated_guard(
4895 mach_exception_code_t code
,
4896 mach_exception_subcode_t subcode
,
4899 return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code
, subcode
, reason
);
4903 #if CONFIG_MEMORYSTATUS
4906 task_get_memlimit_is_active(task_t task
)
4908 assert (task
!= NULL
);
4910 if (task
->memlimit_is_active
== 1) {
4918 task_set_memlimit_is_active(task_t task
, boolean_t memlimit_is_active
)
4920 assert (task
!= NULL
);
4922 if (memlimit_is_active
) {
4923 task
->memlimit_is_active
= 1;
4925 task
->memlimit_is_active
= 0;
4930 task_get_memlimit_is_fatal(task_t task
)
4932 assert(task
!= NULL
);
4934 if (task
->memlimit_is_fatal
== 1) {
4942 task_set_memlimit_is_fatal(task_t task
, boolean_t memlimit_is_fatal
)
4944 assert (task
!= NULL
);
4946 if (memlimit_is_fatal
) {
4947 task
->memlimit_is_fatal
= 1;
4949 task
->memlimit_is_fatal
= 0;
4954 task_has_triggered_exc_resource(task_t task
, boolean_t memlimit_is_active
)
4956 boolean_t triggered
= FALSE
;
4958 assert(task
== current_task());
4961 * Returns true, if task has already triggered an exc_resource exception.
4964 if (memlimit_is_active
) {
4965 triggered
= (task
->memlimit_active_exc_resource
? TRUE
: FALSE
);
4967 triggered
= (task
->memlimit_inactive_exc_resource
? TRUE
: FALSE
);
4974 task_mark_has_triggered_exc_resource(task_t task
, boolean_t memlimit_is_active
)
4976 assert(task
== current_task());
4979 * We allow one exc_resource per process per active/inactive limit.
4980 * The limit's fatal attribute does not come into play.
4983 if (memlimit_is_active
) {
4984 task
->memlimit_active_exc_resource
= 1;
4986 task
->memlimit_inactive_exc_resource
= 1;
4990 #define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
4992 void __attribute__((noinline
))
4993 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb
, boolean_t is_fatal
)
4995 task_t task
= current_task();
4997 const char *procname
= "unknown";
4998 mach_exception_data_type_t code
[EXCEPTION_CODE_MAX
];
5001 pid
= proc_selfpid();
5005 * Cannot have ReportCrash analyzing
5006 * a suspended initproc.
5011 if (task
->bsd_info
!= NULL
)
5012 procname
= proc_name_address(current_task()->bsd_info
);
5015 if (hwm_user_cores
) {
5017 uint64_t starttime
, end
;
5018 clock_sec_t secs
= 0;
5019 uint32_t microsecs
= 0;
5021 starttime
= mach_absolute_time();
5023 * Trigger a coredump of this process. Don't proceed unless we know we won't
5024 * be filling up the disk; and ignore the core size resource limit for this
5027 if ((error
= coredump(current_task()->bsd_info
, HWM_USERCORE_MINSPACE
, COREDUMP_IGNORE_ULIMIT
)) != 0) {
5028 printf("couldn't take coredump of %s[%d]: %d\n", procname
, pid
, error
);
5031 * coredump() leaves the task suspended.
5033 task_resume_internal(current_task());
5035 end
= mach_absolute_time();
5036 absolutetime_to_microtime(end
- starttime
, &secs
, µsecs
);
5037 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
5038 proc_name_address(current_task()->bsd_info
), pid
, (int)secs
, microsecs
);
5040 #endif /* CONFIG_COREDUMP */
5042 if (disable_exc_resource
) {
5043 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
5044 "supressed by a boot-arg.\n", procname
, pid
, max_footprint_mb
);
5049 * A task that has triggered an EXC_RESOURCE, should not be
5050 * jetsammed when the device is under memory pressure. Here
5051 * we set the P_MEMSTAT_TERMINATED flag so that the process
5052 * will be skipped if the memorystatus_thread wakes up.
5054 proc_memstat_terminated(current_task()->bsd_info
, TRUE
);
5056 code
[0] = code
[1] = 0;
5057 EXC_RESOURCE_ENCODE_TYPE(code
[0], RESOURCE_TYPE_MEMORY
);
5058 EXC_RESOURCE_ENCODE_FLAVOR(code
[0], FLAVOR_HIGH_WATERMARK
);
5059 EXC_RESOURCE_HWM_ENCODE_LIMIT(code
[0], max_footprint_mb
);
5061 /* Do not generate a corpse fork if the violation is a fatal one */
5062 if (is_fatal
|| exc_via_corpse_forking
== 0) {
5063 /* Do not send a EXC_RESOURCE is corpse_for_fatal_memkill is set */
5064 if (corpse_for_fatal_memkill
== 0) {
5066 * Use the _internal_ variant so that no user-space
5067 * process can resume our task from under us.
5069 task_suspend_internal(task
);
5070 exception_triage(EXC_RESOURCE
, code
, EXCEPTION_CODE_MAX
);
5071 task_resume_internal(task
);
5075 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
5076 "supressed due to audio playback.\n", procname
, pid
, max_footprint_mb
);
5078 task_enqueue_exception_with_corpse(task
, EXC_RESOURCE
,
5079 code
, EXCEPTION_CODE_MAX
, NULL
);
5084 * After the EXC_RESOURCE has been handled, we must clear the
5085 * P_MEMSTAT_TERMINATED flag so that the process can again be
5086 * considered for jetsam if the memorystatus_thread wakes up.
5088 proc_memstat_terminated(current_task()->bsd_info
, FALSE
); /* clear the flag */
5092 * Callback invoked when a task exceeds its physical footprint limit.
5095 task_footprint_exceeded(int warning
, __unused
const void *param0
, __unused
const void *param1
)
5097 ledger_amount_t max_footprint
, max_footprint_mb
;
5099 boolean_t is_warning
;
5100 boolean_t memlimit_is_active
;
5101 boolean_t memlimit_is_fatal
;
5103 if (warning
== LEDGER_WARNING_DIPPED_BELOW
) {
5105 * Task memory limits only provide a warning on the way up.
5108 } else if (warning
== LEDGER_WARNING_ROSE_ABOVE
) {
5110 * This task is in danger of violating a memory limit,
5111 * It has exceeded a percentage level of the limit.
5116 * The task has exceeded the physical footprint limit.
5117 * This is not a warning but a true limit violation.
5122 task
= current_task();
5124 ledger_get_limit(task
->ledger
, task_ledgers
.phys_footprint
, &max_footprint
);
5125 max_footprint_mb
= max_footprint
>> 20;
5127 memlimit_is_active
= task_get_memlimit_is_active(task
);
5128 memlimit_is_fatal
= task_get_memlimit_is_fatal(task
);
5131 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
5132 * We only generate the exception once per process per memlimit (active/inactive limit).
5133 * To enforce this, we monitor state based on the memlimit's active/inactive attribute
5134 * and we disable it by marking that memlimit as exception triggered.
5136 if ((is_warning
== FALSE
) && (!task_has_triggered_exc_resource(task
, memlimit_is_active
))) {
5137 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb
, memlimit_is_fatal
);
5138 memorystatus_log_exception((int)max_footprint_mb
, memlimit_is_active
, memlimit_is_fatal
);
5139 task_mark_has_triggered_exc_resource(task
, memlimit_is_active
);
5142 memorystatus_on_ledger_footprint_exceeded(is_warning
, memlimit_is_active
, memlimit_is_fatal
);
5145 extern int proc_check_footprint_priv(void);
5148 task_set_phys_footprint_limit(
5153 kern_return_t error
;
5155 boolean_t memlimit_is_active
;
5156 boolean_t memlimit_is_fatal
;
5158 if ((error
= proc_check_footprint_priv())) {
5159 return (KERN_NO_ACCESS
);
5163 * This call should probably be obsoleted.
5164 * But for now, we default to current state.
5166 memlimit_is_active
= task_get_memlimit_is_active(task
);
5167 memlimit_is_fatal
= task_get_memlimit_is_fatal(task
);
5169 return task_set_phys_footprint_limit_internal(task
, new_limit_mb
, old_limit_mb
, memlimit_is_active
, memlimit_is_fatal
);
5173 task_convert_phys_footprint_limit(
5175 int *converted_limit_mb
)
5177 if (limit_mb
== -1) {
5181 if (max_task_footprint
!= 0) {
5182 *converted_limit_mb
= (int)(max_task_footprint
/ 1024 / 1024); /* bytes to MB */
5184 *converted_limit_mb
= (int)(LEDGER_LIMIT_INFINITY
>> 20);
5187 /* nothing to convert */
5188 *converted_limit_mb
= limit_mb
;
5190 return (KERN_SUCCESS
);
5195 task_set_phys_footprint_limit_internal(
5199 boolean_t memlimit_is_active
,
5200 boolean_t memlimit_is_fatal
)
5202 ledger_amount_t old
;
5204 ledger_get_limit(task
->ledger
, task_ledgers
.phys_footprint
, &old
);
5207 * Check that limit >> 20 will not give an "unexpected" 32-bit
5208 * result. There are, however, implicit assumptions that -1 mb limit
5209 * equates to LEDGER_LIMIT_INFINITY.
5211 assert(((old
& 0xFFF0000000000000LL
) == 0) || (old
== LEDGER_LIMIT_INFINITY
));
5214 *old_limit_mb
= (int)(old
>> 20);
5217 if (new_limit_mb
== -1) {
5219 * Caller wishes to remove the limit.
5221 ledger_set_limit(task
->ledger
, task_ledgers
.phys_footprint
,
5222 max_task_footprint
? max_task_footprint
: LEDGER_LIMIT_INFINITY
,
5223 max_task_footprint
? max_task_footprint_warning_level
: 0);
5226 task_set_memlimit_is_active(task
, memlimit_is_active
);
5227 task_set_memlimit_is_fatal(task
, memlimit_is_fatal
);
5230 return (KERN_SUCCESS
);
5233 #ifdef CONFIG_NOMONITORS
5234 return (KERN_SUCCESS
);
5235 #endif /* CONFIG_NOMONITORS */
5239 if ((memlimit_is_active
== task_get_memlimit_is_active(task
)) &&
5240 (memlimit_is_fatal
== task_get_memlimit_is_fatal(task
)) &&
5241 (((ledger_amount_t
)new_limit_mb
<< 20) == old
)) {
5243 * memlimit state is not changing
5246 return(KERN_SUCCESS
);
5249 task_set_memlimit_is_active(task
, memlimit_is_active
);
5250 task_set_memlimit_is_fatal(task
, memlimit_is_fatal
);
5252 ledger_set_limit(task
->ledger
, task_ledgers
.phys_footprint
,
5253 (ledger_amount_t
)new_limit_mb
<< 20, PHYS_FOOTPRINT_WARNING_LEVEL
);
5255 if (task
== current_task()) {
5256 ledger_check_new_balance(current_thread(), task
->ledger
,
5257 task_ledgers
.phys_footprint
);
5262 return (KERN_SUCCESS
);
5266 task_get_phys_footprint_limit(
5270 ledger_amount_t limit
;
5272 ledger_get_limit(task
->ledger
, task_ledgers
.phys_footprint
, &limit
);
5274 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
5275 * result. There are, however, implicit assumptions that -1 mb limit
5276 * equates to LEDGER_LIMIT_INFINITY.
5278 assert(((limit
& 0xFFF0000000000000LL
) == 0) || (limit
== LEDGER_LIMIT_INFINITY
));
5279 *limit_mb
= (int)(limit
>> 20);
5281 return (KERN_SUCCESS
);
5283 #else /* CONFIG_MEMORYSTATUS */
5285 task_set_phys_footprint_limit(
5286 __unused task_t task
,
5287 __unused
int new_limit_mb
,
5288 __unused
int *old_limit_mb
)
5290 return (KERN_FAILURE
);
5294 task_get_phys_footprint_limit(
5295 __unused task_t task
,
5296 __unused
int *limit_mb
)
5298 return (KERN_FAILURE
);
5300 #endif /* CONFIG_MEMORYSTATUS */
5303 * We need to export some functions to other components that
5304 * are currently implemented in macros within the osfmk
5305 * component. Just export them as functions of the same name.
5307 boolean_t
is_kerneltask(task_t t
)
5309 if (t
== kernel_task
)
5315 boolean_t
is_corpsetask(task_t t
)
5317 return (task_is_a_corpse(t
));
5321 task_t
current_task(void);
5322 task_t
current_task(void)
5324 return (current_task_fast());
5327 #undef task_reference
5328 void task_reference(task_t task
);
5333 if (task
!= TASK_NULL
)
5334 task_reference_internal(task
);
5337 /* defined in bsd/kern/kern_prot.c */
5338 extern int get_audit_token_pid(audit_token_t
*audit_token
);
5340 int task_pid(task_t task
)
5343 return get_audit_token_pid(&task
->audit_token
);
5349 * This routine finds a thread in a task by its unique id
5350 * Returns a referenced thread or THREAD_NULL if the thread was not found
5352 * TODO: This is super inefficient - it's an O(threads in task) list walk!
5353 * We should make a tid hash, or transition all tid clients to thread ports
5355 * Precondition: No locks held (will take task lock)
5358 task_findtid(task_t task
, uint64_t tid
)
5360 thread_t self
= current_thread();
5361 thread_t found_thread
= THREAD_NULL
;
5362 thread_t iter_thread
= THREAD_NULL
;
5364 /* Short-circuit the lookup if we're looking up ourselves */
5365 if (tid
== self
->thread_id
|| tid
== TID_NULL
) {
5366 assert(self
->task
== task
);
5368 thread_reference(self
);
5375 queue_iterate(&task
->threads
, iter_thread
, thread_t
, task_threads
) {
5376 if (iter_thread
->thread_id
== tid
) {
5377 found_thread
= iter_thread
;
5378 thread_reference(found_thread
);
5385 return (found_thread
);
5388 int pid_from_task(task_t task
)
5392 if (task
->bsd_info
) {
5393 pid
= proc_pid(task
->bsd_info
);
5395 pid
= task_pid(task
);
5402 * Control the CPU usage monitor for a task.
5405 task_cpu_usage_monitor_ctl(task_t task
, uint32_t *flags
)
5407 int error
= KERN_SUCCESS
;
5409 if (*flags
& CPUMON_MAKE_FATAL
) {
5410 task
->rusage_cpu_flags
|= TASK_RUSECPU_FLAGS_FATAL_CPUMON
;
5412 error
= KERN_INVALID_ARGUMENT
;
5419 * Control the wakeups monitor for a task.
5422 task_wakeups_monitor_ctl(task_t task
, uint32_t *flags
, int32_t *rate_hz
)
5424 ledger_t ledger
= task
->ledger
;
5427 if (*flags
& WAKEMON_GET_PARAMS
) {
5428 ledger_amount_t limit
;
5431 ledger_get_limit(ledger
, task_ledgers
.interrupt_wakeups
, &limit
);
5432 ledger_get_period(ledger
, task_ledgers
.interrupt_wakeups
, &period
);
5434 if (limit
!= LEDGER_LIMIT_INFINITY
) {
5436 * An active limit means the wakeups monitor is enabled.
5438 *rate_hz
= (int32_t)(limit
/ (int64_t)(period
/ NSEC_PER_SEC
));
5439 *flags
= WAKEMON_ENABLE
;
5440 if (task
->rusage_cpu_flags
& TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON
) {
5441 *flags
|= WAKEMON_MAKE_FATAL
;
5444 *flags
= WAKEMON_DISABLE
;
5449 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
5452 return KERN_SUCCESS
;
5455 if (*flags
& WAKEMON_ENABLE
) {
5456 if (*flags
& WAKEMON_SET_DEFAULTS
) {
5457 *rate_hz
= task_wakeups_monitor_rate
;
5460 #ifndef CONFIG_NOMONITORS
5461 if (*flags
& WAKEMON_MAKE_FATAL
) {
5462 task
->rusage_cpu_flags
|= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON
;
5464 #endif /* CONFIG_NOMONITORS */
5466 if (*rate_hz
<= 0) {
5468 return KERN_INVALID_ARGUMENT
;
5471 #ifndef CONFIG_NOMONITORS
5472 ledger_set_limit(ledger
, task_ledgers
.interrupt_wakeups
, *rate_hz
* task_wakeups_monitor_interval
,
5473 task_wakeups_monitor_ustackshots_trigger_pct
);
5474 ledger_set_period(ledger
, task_ledgers
.interrupt_wakeups
, task_wakeups_monitor_interval
* NSEC_PER_SEC
);
5475 ledger_enable_callback(ledger
, task_ledgers
.interrupt_wakeups
);
5476 #endif /* CONFIG_NOMONITORS */
5477 } else if (*flags
& WAKEMON_DISABLE
) {
5479 * Caller wishes to disable wakeups monitor on the task.
5481 * Disable telemetry if it was triggered by the wakeups monitor, and
5482 * remove the limit & callback on the wakeups ledger entry.
5484 #if CONFIG_TELEMETRY
5485 telemetry_task_ctl_locked(task
, TF_WAKEMON_WARNING
, 0);
5487 ledger_disable_refill(ledger
, task_ledgers
.interrupt_wakeups
);
5488 ledger_disable_callback(ledger
, task_ledgers
.interrupt_wakeups
);
5492 return KERN_SUCCESS
;
5496 task_wakeups_rate_exceeded(int warning
, __unused
const void *param0
, __unused
const void *param1
)
5498 if (warning
== LEDGER_WARNING_ROSE_ABOVE
) {
5499 #if CONFIG_TELEMETRY
5501 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
5502 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
5504 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING
, 1);
5509 #if CONFIG_TELEMETRY
5511 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
5512 * exceeded the limit, turn telemetry off for the task.
5514 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING
, 0);
5518 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
5522 void __attribute__((noinline
))
5523 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
5525 task_t task
= current_task();
5527 const char *procname
= "unknown";
5530 #ifdef EXC_RESOURCE_MONITORS
5531 mach_exception_data_type_t code
[EXCEPTION_CODE_MAX
];
5532 #endif /* EXC_RESOURCE_MONITORS */
5533 struct ledger_entry_info lei
;
5536 pid
= proc_selfpid();
5537 if (task
->bsd_info
!= NULL
)
5538 procname
= proc_name_address(current_task()->bsd_info
);
5541 ledger_get_entry_info(task
->ledger
, task_ledgers
.interrupt_wakeups
, &lei
);
5544 * Disable the exception notification so we don't overwhelm
5545 * the listener with an endless stream of redundant exceptions.
5546 * TODO: detect whether another thread is already reporting the violation.
5548 uint32_t flags
= WAKEMON_DISABLE
;
5549 task_wakeups_monitor_ctl(task
, &flags
, NULL
);
5551 fatal
= task
->rusage_cpu_flags
& TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON
;
5552 trace_resource_violation(RMON_CPUWAKES_VIOLATED
, &lei
);
5553 os_log(OS_LOG_DEFAULT
, "process %s[%d] caught waking the CPU %llu times "
5554 "over ~%llu seconds, averaging %llu wakes / second and "
5555 "violating a %slimit of %llu wakes over %llu seconds.\n",
5557 lei
.lei_balance
, lei
.lei_last_refill
/ NSEC_PER_SEC
,
5558 lei
.lei_last_refill
== 0 ? 0 :
5559 (NSEC_PER_SEC
* lei
.lei_balance
/ lei
.lei_last_refill
),
5560 fatal
? "FATAL " : "",
5561 lei
.lei_limit
, lei
.lei_refill_period
/ NSEC_PER_SEC
);
5563 kr
= send_resource_violation(send_cpu_wakes_violation
, task
, &lei
,
5564 fatal
? kRNFatalLimitFlag
: 0);
5566 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr
);
5569 #ifdef EXC_RESOURCE_MONITORS
5570 if (disable_exc_resource
) {
5571 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5572 "supressed by a boot-arg\n", procname
, pid
);
5576 os_log(OS_LOG_DEFAULT
, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5577 "supressed due to audio playback\n", procname
, pid
);
5580 if (lei
.lei_last_refill
== 0) {
5581 os_log(OS_LOG_DEFAULT
, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5582 "supressed due to lei.lei_last_refill = 0 \n", procname
, pid
);
5585 code
[0] = code
[1] = 0;
5586 EXC_RESOURCE_ENCODE_TYPE(code
[0], RESOURCE_TYPE_WAKEUPS
);
5587 EXC_RESOURCE_ENCODE_FLAVOR(code
[0], FLAVOR_WAKEUPS_MONITOR
);
5588 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code
[0],
5589 NSEC_PER_SEC
* lei
.lei_limit
/ lei
.lei_refill_period
);
5590 EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code
[0],
5591 lei
.lei_last_refill
);
5592 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code
[1],
5593 NSEC_PER_SEC
* lei
.lei_balance
/ lei
.lei_last_refill
);
5594 exception_triage(EXC_RESOURCE
, code
, EXCEPTION_CODE_MAX
);
5595 #endif /* EXC_RESOURCE_MONITORS */
5598 task_terminate_internal(task
);
5603 global_update_logical_writes(int64_t io_delta
)
5605 int64_t old_count
, new_count
;
5606 boolean_t needs_telemetry
;
5609 new_count
= old_count
= global_logical_writes_count
;
5610 new_count
+= io_delta
;
5611 if (new_count
>= io_telemetry_limit
) {
5613 needs_telemetry
= TRUE
;
5615 needs_telemetry
= FALSE
;
5617 } while(!OSCompareAndSwap64(old_count
, new_count
, &global_logical_writes_count
));
5618 return needs_telemetry
;
5621 void task_update_logical_writes(task_t task
, uint32_t io_size
, int flags
, void *vp
)
5623 int64_t io_delta
= 0;
5624 boolean_t needs_telemetry
= FALSE
;
5626 if ((!task
) || (!io_size
) || (!vp
))
5629 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, VM_DATA_WRITE
)) | DBG_FUNC_NONE
,
5630 task_pid(task
), io_size
, flags
, (uintptr_t)VM_KERNEL_ADDRPERM(vp
), 0);
5631 DTRACE_IO4(logical_writes
, struct task
*, task
, uint32_t, io_size
, int, flags
, vnode
*, vp
);
5633 case TASK_WRITE_IMMEDIATE
:
5634 OSAddAtomic64(io_size
, (SInt64
*)&(task
->task_immediate_writes
));
5635 ledger_credit(task
->ledger
, task_ledgers
.logical_writes
, io_size
);
5637 case TASK_WRITE_DEFERRED
:
5638 OSAddAtomic64(io_size
, (SInt64
*)&(task
->task_deferred_writes
));
5639 ledger_credit(task
->ledger
, task_ledgers
.logical_writes
, io_size
);
5641 case TASK_WRITE_INVALIDATED
:
5642 OSAddAtomic64(io_size
, (SInt64
*)&(task
->task_invalidated_writes
));
5643 ledger_debit(task
->ledger
, task_ledgers
.logical_writes
, io_size
);
5645 case TASK_WRITE_METADATA
:
5646 OSAddAtomic64(io_size
, (SInt64
*)&(task
->task_metadata_writes
));
5647 ledger_credit(task
->ledger
, task_ledgers
.logical_writes
, io_size
);
5651 io_delta
= (flags
== TASK_WRITE_INVALIDATED
) ? ((int64_t)io_size
* -1ll) : ((int64_t)io_size
);
5652 if (io_telemetry_limit
!= 0) {
5653 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
5654 needs_telemetry
= global_update_logical_writes(io_delta
);
5655 if (needs_telemetry
) {
5656 act_set_io_telemetry_ast(current_thread());
5662 * Control the I/O monitor for a task.
5665 task_io_monitor_ctl(task_t task
, uint32_t *flags
)
5667 ledger_t ledger
= task
->ledger
;
5670 if (*flags
& IOMON_ENABLE
) {
5671 /* Configure the physical I/O ledger */
5672 ledger_set_limit(ledger
, task_ledgers
.physical_writes
, (task_iomon_limit_mb
* 1024 * 1024), 0);
5673 ledger_set_period(ledger
, task_ledgers
.physical_writes
, (task_iomon_interval_secs
* NSEC_PER_SEC
));
5675 /* Configure the logical I/O ledger */
5676 ledger_set_limit(ledger
, task_ledgers
.logical_writes
, (task_iomon_limit_mb
* 1024 * 1024), 0);
5677 ledger_set_period(ledger
, task_ledgers
.logical_writes
, (task_iomon_interval_secs
* NSEC_PER_SEC
));
5679 } else if (*flags
& IOMON_DISABLE
) {
5681 * Caller wishes to disable I/O monitor on the task.
5683 ledger_disable_refill(ledger
, task_ledgers
.physical_writes
);
5684 ledger_disable_callback(ledger
, task_ledgers
.physical_writes
);
5685 ledger_disable_refill(ledger
, task_ledgers
.logical_writes
);
5686 ledger_disable_callback(ledger
, task_ledgers
.logical_writes
);
5690 return KERN_SUCCESS
;
5694 task_io_rate_exceeded(int warning
, const void *param0
, __unused
const void *param1
)
5697 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0
);
5701 void __attribute__((noinline
)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor
)
5704 task_t task
= current_task();
5705 #ifdef EXC_RESOURCE_MONITORS
5706 mach_exception_data_type_t code
[EXCEPTION_CODE_MAX
];
5707 #endif /* EXC_RESOURCE_MONITORS */
5708 struct ledger_entry_info lei
;
5712 pid
= proc_selfpid();
5715 * Get the ledger entry info. We need to do this before disabling the exception
5716 * to get correct values for all fields.
5719 case FLAVOR_IO_PHYSICAL_WRITES
:
5720 ledger_get_entry_info(task
->ledger
, task_ledgers
.physical_writes
, &lei
);
5722 case FLAVOR_IO_LOGICAL_WRITES
:
5723 ledger_get_entry_info(task
->ledger
, task_ledgers
.logical_writes
, &lei
);
5729 * Disable the exception notification so we don't overwhelm
5730 * the listener with an endless stream of redundant exceptions.
5731 * TODO: detect whether another thread is already reporting the violation.
5733 uint32_t flags
= IOMON_DISABLE
;
5734 task_io_monitor_ctl(task
, &flags
);
5736 if (flavor
== FLAVOR_IO_LOGICAL_WRITES
) {
5737 trace_resource_violation(RMON_LOGWRITES_VIOLATED
, &lei
);
5739 os_log(OS_LOG_DEFAULT
, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
5740 pid
, flavor
, (lei
.lei_balance
/ (1024 * 1024)), (lei
.lei_limit
/ (1024 * 1024)), (lei
.lei_refill_period
/ NSEC_PER_SEC
));
5742 kr
= send_resource_violation(send_disk_writes_violation
, task
, &lei
, kRNFlagsNone
);
5744 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr
);
5747 #ifdef EXC_RESOURCE_MONITORS
5748 code
[0] = code
[1] = 0;
5749 EXC_RESOURCE_ENCODE_TYPE(code
[0], RESOURCE_TYPE_IO
);
5750 EXC_RESOURCE_ENCODE_FLAVOR(code
[0], flavor
);
5751 EXC_RESOURCE_IO_ENCODE_INTERVAL(code
[0], (lei
.lei_refill_period
/ NSEC_PER_SEC
));
5752 EXC_RESOURCE_IO_ENCODE_LIMIT(code
[0], (lei
.lei_limit
/ (1024 * 1024)));
5753 EXC_RESOURCE_IO_ENCODE_OBSERVED(code
[1], (lei
.lei_balance
/ (1024 * 1024)));
5754 exception_triage(EXC_RESOURCE
, code
, EXCEPTION_CODE_MAX
);
5755 #endif /* EXC_RESOURCE_MONITORS */
5758 /* Placeholders for the task set/get voucher interfaces */
5760 task_get_mach_voucher(
5762 mach_voucher_selector_t __unused which
,
5763 ipc_voucher_t
*voucher
)
5765 if (TASK_NULL
== task
)
5766 return KERN_INVALID_TASK
;
5769 return KERN_SUCCESS
;
5773 task_set_mach_voucher(
5775 ipc_voucher_t __unused voucher
)
5777 if (TASK_NULL
== task
)
5778 return KERN_INVALID_TASK
;
5780 return KERN_SUCCESS
;
5784 task_swap_mach_voucher(
5786 ipc_voucher_t new_voucher
,
5787 ipc_voucher_t
*in_out_old_voucher
)
5789 if (TASK_NULL
== task
)
5790 return KERN_INVALID_TASK
;
5792 *in_out_old_voucher
= new_voucher
;
5793 return KERN_SUCCESS
;
5796 void task_set_gpu_denied(task_t task
, boolean_t denied
)
5801 task
->t_flags
|= TF_GPU_DENIED
;
5803 task
->t_flags
&= ~TF_GPU_DENIED
;
5809 boolean_t
task_is_gpu_denied(task_t task
)
5811 /* We don't need the lock to read this flag */
5812 return (task
->t_flags
& TF_GPU_DENIED
) ? TRUE
: FALSE
;
5816 uint64_t get_task_memory_region_count(task_t task
)
5819 map
= (task
== kernel_task
) ? kernel_map
: task
->map
;
5820 return((uint64_t)get_map_nentries(map
));
5824 kdebug_trace_dyld_internal(uint32_t base_code
,
5825 struct dyld_kernel_image_info
*info
)
5827 static_assert(sizeof(info
->uuid
) >= 16);
5829 #if defined(__LP64__)
5830 uint64_t *uuid
= (uint64_t *)&(info
->uuid
);
5832 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
5833 KDBG_EVENTID(DBG_DYLD
, DBG_DYLD_UUID
, base_code
), uuid
[0],
5834 uuid
[1], info
->load_addr
,
5835 (uint64_t)info
->fsid
.val
[0] | ((uint64_t)info
->fsid
.val
[1] << 32),
5837 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
5838 KDBG_EVENTID(DBG_DYLD
, DBG_DYLD_UUID
, base_code
+ 1),
5839 (uint64_t)info
->fsobjid
.fid_objno
|
5840 ((uint64_t)info
->fsobjid
.fid_generation
<< 32),
5842 #else /* defined(__LP64__) */
5843 uint32_t *uuid
= (uint32_t *)&(info
->uuid
);
5845 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
5846 KDBG_EVENTID(DBG_DYLD
, DBG_DYLD_UUID
, base_code
+ 2), uuid
[0],
5847 uuid
[1], uuid
[2], uuid
[3], 0);
5848 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
5849 KDBG_EVENTID(DBG_DYLD
, DBG_DYLD_UUID
, base_code
+ 3),
5850 (uint32_t)info
->load_addr
, info
->fsid
.val
[0], info
->fsid
.val
[1],
5851 info
->fsobjid
.fid_objno
, 0);
5852 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
,
5853 KDBG_EVENTID(DBG_DYLD
, DBG_DYLD_UUID
, base_code
+ 4),
5854 info
->fsobjid
.fid_generation
, 0, 0, 0, 0);
5855 #endif /* !defined(__LP64__) */
5858 static kern_return_t
5859 kdebug_trace_dyld(task_t task
, uint32_t base_code
,
5860 vm_map_copy_t infos_copy
, mach_msg_type_number_t infos_len
)
5863 dyld_kernel_image_info_array_t infos
;
5864 vm_map_offset_t map_data
;
5868 return KERN_INVALID_ADDRESS
;
5871 if (!kdebug_enable
||
5872 !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD
, DBG_DYLD_UUID
, 0)))
5874 vm_map_copy_discard(infos_copy
);
5875 return KERN_SUCCESS
;
5878 if (task
== NULL
|| task
!= current_task()) {
5879 return KERN_INVALID_TASK
;
5882 kr
= vm_map_copyout(ipc_kernel_map
, &map_data
, (vm_map_copy_t
)infos_copy
);
5883 if (kr
!= KERN_SUCCESS
) {
5887 infos
= CAST_DOWN(dyld_kernel_image_info_array_t
, map_data
);
5889 for (mach_msg_type_number_t i
= 0; i
< infos_len
; i
++) {
5890 kdebug_trace_dyld_internal(base_code
, &(infos
[i
]));
5893 data
= CAST_DOWN(vm_offset_t
, map_data
);
5894 mach_vm_deallocate(ipc_kernel_map
, data
, infos_len
* sizeof(infos
[0]));
5895 return KERN_SUCCESS
;
5899 task_register_dyld_image_infos(task_t task
,
5900 dyld_kernel_image_info_array_t infos_copy
,
5901 mach_msg_type_number_t infos_len
)
5903 return kdebug_trace_dyld(task
, DBG_DYLD_UUID_MAP_A
,
5904 (vm_map_copy_t
)infos_copy
, infos_len
);
5908 task_unregister_dyld_image_infos(task_t task
,
5909 dyld_kernel_image_info_array_t infos_copy
,
5910 mach_msg_type_number_t infos_len
)
5912 return kdebug_trace_dyld(task
, DBG_DYLD_UUID_UNMAP_A
,
5913 (vm_map_copy_t
)infos_copy
, infos_len
);
5917 task_get_dyld_image_infos(__unused task_t task
,
5918 __unused dyld_kernel_image_info_array_t
* dyld_images
,
5919 __unused mach_msg_type_number_t
* dyld_imagesCnt
)
5921 return KERN_NOT_SUPPORTED
;
5925 task_register_dyld_shared_cache_image_info(task_t task
,
5926 dyld_kernel_image_info_t cache_img
,
5927 __unused boolean_t no_cache
,
5928 __unused boolean_t private_cache
)
5930 if (task
== NULL
|| task
!= current_task()) {
5931 return KERN_INVALID_TASK
;
5934 kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A
, &cache_img
);
5935 return KERN_SUCCESS
;
5939 task_register_dyld_set_dyld_state(__unused task_t task
,
5940 __unused
uint8_t dyld_state
)
5942 return KERN_NOT_SUPPORTED
;
5946 task_register_dyld_get_process_state(__unused task_t task
,
5947 __unused dyld_kernel_process_info_t
* dyld_process_state
)
5949 return KERN_NOT_SUPPORTED
;
5953 task_inspect(task_inspect_t task_insp
, task_inspect_flavor_t flavor
,
5954 task_inspect_info_t info_out
, mach_msg_type_number_t
*size_in_out
)
5957 task_t task
= (task_t
)task_insp
;
5958 kern_return_t kr
= KERN_SUCCESS
;
5959 mach_msg_type_number_t size
;
5961 if (task
== TASK_NULL
) {
5962 return KERN_INVALID_ARGUMENT
;
5965 size
= *size_in_out
;
5968 case TASK_INSPECT_BASIC_COUNTS
: {
5969 struct task_inspect_basic_counts
*bc
;
5970 uint64_t task_counts
[MT_CORE_NFIXED
];
5972 if (size
< TASK_INSPECT_BASIC_COUNTS_COUNT
) {
5973 kr
= KERN_INVALID_ARGUMENT
;
5977 mt_fixed_task_counts(task
, task_counts
);
5978 bc
= (struct task_inspect_basic_counts
*)info_out
;
5979 #ifdef MT_CORE_INSTRS
5980 bc
->instructions
= task_counts
[MT_CORE_INSTRS
];
5981 #else /* defined(MT_CORE_INSTRS) */
5982 bc
->instructions
= 0;
5983 #endif /* !defined(MT_CORE_INSTRS) */
5984 bc
->cycles
= task_counts
[MT_CORE_CYCLES
];
5985 size
= TASK_INSPECT_BASIC_COUNTS_COUNT
;
5989 kr
= KERN_INVALID_ARGUMENT
;
5993 if (kr
== KERN_SUCCESS
) {
5994 *size_in_out
= size
;
5997 #else /* MONOTONIC */
5998 #pragma unused(task_insp, flavor, info_out, size_in_out)
5999 return KERN_NOT_SUPPORTED
;
6000 #endif /* !MONOTONIC */
6003 #if CONFIG_SECLUDED_MEMORY
6004 int num_tasks_can_use_secluded_mem
= 0;
6007 task_set_can_use_secluded_mem(
6009 boolean_t can_use_secluded_mem
)
6011 if (!task
->task_could_use_secluded_mem
) {
6015 task_set_can_use_secluded_mem_locked(task
, can_use_secluded_mem
);
6020 task_set_can_use_secluded_mem_locked(
6022 boolean_t can_use_secluded_mem
)
6024 assert(task
->task_could_use_secluded_mem
);
6025 if (can_use_secluded_mem
&&
6026 secluded_for_apps
&& /* global boot-arg */
6027 !task
->task_can_use_secluded_mem
) {
6028 assert(num_tasks_can_use_secluded_mem
>= 0);
6030 (volatile SInt32
*)&num_tasks_can_use_secluded_mem
);
6031 task
->task_can_use_secluded_mem
= TRUE
;
6032 } else if (!can_use_secluded_mem
&&
6033 task
->task_can_use_secluded_mem
) {
6034 assert(num_tasks_can_use_secluded_mem
> 0);
6036 (volatile SInt32
*)&num_tasks_can_use_secluded_mem
);
6037 task
->task_can_use_secluded_mem
= FALSE
;
6042 task_set_could_use_secluded_mem(
6044 boolean_t could_use_secluded_mem
)
6046 task
->task_could_use_secluded_mem
= could_use_secluded_mem
;
6050 task_set_could_also_use_secluded_mem(
6052 boolean_t could_also_use_secluded_mem
)
6054 task
->task_could_also_use_secluded_mem
= could_also_use_secluded_mem
;
6058 task_can_use_secluded_mem(
6061 if (task
->task_can_use_secluded_mem
) {
6062 assert(task
->task_could_use_secluded_mem
);
6063 assert(num_tasks_can_use_secluded_mem
> 0);
6066 if (task
->task_could_also_use_secluded_mem
&&
6067 num_tasks_can_use_secluded_mem
> 0) {
6068 assert(num_tasks_can_use_secluded_mem
> 0);
6075 task_could_use_secluded_mem(
6078 return task
->task_could_use_secluded_mem
;
6080 #endif /* CONFIG_SECLUDED_MEMORY */
6083 task_io_user_clients(task_t task
)
6085 return (&task
->io_user_clients
);
6089 task_copy_fields_for_exec(task_t dst_task
, task_t src_task
)
6091 dst_task
->vtimers
= src_task
->vtimers
;